From 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 27 Apr 2024 20:24:20 +0200
Subject: Adding upstream version 14.2.21.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/spdk/lib/Makefile                              |   58 +
 src/spdk/lib/bdev/Makefile                         |   60 +
 src/spdk/lib/bdev/aio/Makefile                     |   41 +
 src/spdk/lib/bdev/aio/bdev_aio.c                   |  751 +++
 src/spdk/lib/bdev/aio/bdev_aio.h                   |   80 +
 src/spdk/lib/bdev/aio/bdev_aio_rpc.c               |  160 +
 src/spdk/lib/bdev/bdev.c                           | 3950 ++++++++++++++
 src/spdk/lib/bdev/crypto/Makefile                  |   42 +
 src/spdk/lib/bdev/crypto/vbdev_crypto.c            | 1506 ++++++
 src/spdk/lib/bdev/crypto/vbdev_crypto.h            |   66 +
 src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c        |  163 +
 src/spdk/lib/bdev/error/Makefile                   |   40 +
 src/spdk/lib/bdev/error/vbdev_error.c              |  513 ++
 src/spdk/lib/bdev/error/vbdev_error.h              |   76 +
 src/spdk/lib/bdev/error/vbdev_error_rpc.c          |  258 +
 src/spdk/lib/bdev/gpt/Makefile                     |   40 +
 src/spdk/lib/bdev/gpt/gpt.c                        |  239 +
 src/spdk/lib/bdev/gpt/gpt.h                        |   62 +
 src/spdk/lib/bdev/gpt/vbdev_gpt.c                  |  463 ++
 src/spdk/lib/bdev/iscsi/Makefile                   |   46 +
 src/spdk/lib/bdev/iscsi/bdev_iscsi.c               |  875 +++
 src/spdk/lib/bdev/iscsi/bdev_iscsi.h               |   75 +
 src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c           |  173 +
 src/spdk/lib/bdev/lvol/Makefile                    |   41 +
 src/spdk/lib/bdev/lvol/vbdev_lvol.c                | 1321 +++++
 src/spdk/lib/bdev/lvol/vbdev_lvol.h                |  120 +
 src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c            | 1089 ++++
 src/spdk/lib/bdev/malloc/Makefile                  |   41 +
 src/spdk/lib/bdev/malloc/bdev_malloc.c             |  524 ++
 src/spdk/lib/bdev/malloc/bdev_malloc.h             |   48 +
 src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c         |  170 +
 src/spdk/lib/bdev/null/Makefile                    |   40 +
 src/spdk/lib/bdev/null/bdev_null.c                 |  384 ++
 src/spdk/lib/bdev/null/bdev_null.h                 |   57 +
 src/spdk/lib/bdev/null/bdev_null_rpc.c             |  169 +
 src/spdk/lib/bdev/nvme/Makefile                    |   40 +
 src/spdk/lib/bdev/nvme/bdev_nvme.c                 | 1856 +++++++
 src/spdk/lib/bdev/nvme/bdev_nvme.h                 |  112 +
 src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c             |  740 +++
 src/spdk/lib/bdev/nvme/nvme_rpc.c                  |  487 ++
 src/spdk/lib/bdev/part.c                           |  373 ++
 src/spdk/lib/bdev/passthru/Makefile                |   42 +
 src/spdk/lib/bdev/passthru/vbdev_passthru.c        |  671 +++
 src/spdk/lib/bdev/passthru/vbdev_passthru.h        |   62 +
 src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c    |  160 +
 src/spdk/lib/bdev/pmem/Makefile                    |   40 +
 src/spdk/lib/bdev/pmem/bdev_pmem.c                 |  465 ++
 src/spdk/lib/bdev/pmem/bdev_pmem.h                 |   64 +
 src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c             |  350 ++
 src/spdk/lib/bdev/raid/Makefile                    |   41 +
 src/spdk/lib/bdev/raid/bdev_raid.c                 | 1624 ++++++
 src/spdk/lib/bdev/raid/bdev_raid.h                 |  225 +
 src/spdk/lib/bdev/raid/bdev_raid_rpc.c             |  408 ++
 src/spdk/lib/bdev/rbd/Makefile                     |   40 +
 src/spdk/lib/bdev/rbd/bdev_rbd.c                   |  740 +++
 src/spdk/lib/bdev/rbd/bdev_rbd.h                   |   55 +
 src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c               |  157 +
 src/spdk/lib/bdev/rpc/Makefile                     |   40 +
 src/spdk/lib/bdev/rpc/bdev_rpc.c                   |  587 ++
 src/spdk/lib/bdev/scsi_nvme.c                      |  261 +
 src/spdk/lib/bdev/split/Makefile                   |   40 +
 src/spdk/lib/bdev/split/vbdev_split.c              |  565 ++
 src/spdk/lib/bdev/split/vbdev_split.h              |   68 +
 src/spdk/lib/bdev/split/vbdev_split_rpc.c          |  151 +
 src/spdk/lib/bdev/virtio/Makefile                  |   40 +
 src/spdk/lib/bdev/virtio/bdev_virtio.h             |  164 +
 src/spdk/lib/bdev/virtio/bdev_virtio_blk.c         |  707 +++
 src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c         |  613 +++
 src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c        | 2017 +++++++
 src/spdk/lib/bdev/vtune.c                          |   49 +
 src/spdk/lib/blob/Makefile                         |   42 +
 src/spdk/lib/blob/bdev/Makefile                    |   40 +
 src/spdk/lib/blob/bdev/blob_bdev.c                 |  357 ++
 src/spdk/lib/blob/blob_bs_dev.c                    |  150 +
 src/spdk/lib/blob/blobstore.c                      | 5720 ++++++++++++++++++++
 src/spdk/lib/blob/blobstore.h                      |  572 ++
 src/spdk/lib/blob/request.c                        |  558 ++
 src/spdk/lib/blob/request.h                        |  223 +
 src/spdk/lib/blob/zeroes.c                         |  122 +
 src/spdk/lib/blobfs/Makefile                       |   40 +
 src/spdk/lib/blobfs/blobfs.c                       | 2617 +++++++++
 src/spdk/lib/blobfs/blobfs_internal.h              |   69 +
 src/spdk/lib/blobfs/tree.c                         |  181 +
 src/spdk/lib/blobfs/tree.h                         |   77 +
 src/spdk/lib/conf/Makefile                         |   40 +
 src/spdk/lib/conf/conf.c                           |  684 +++
 src/spdk/lib/copy/Makefile                         |   42 +
 src/spdk/lib/copy/copy_engine.c                    |  318 ++
 src/spdk/lib/copy/ioat/Makefile                    |   40 +
 src/spdk/lib/copy/ioat/copy_engine_ioat.c          |  421 ++
 src/spdk/lib/copy/ioat/copy_engine_ioat.h          |   44 +
 src/spdk/lib/copy/ioat/copy_engine_ioat_rpc.c      |  118 +
 src/spdk/lib/env_dpdk/Makefile                     |   42 +
 src/spdk/lib/env_dpdk/env.c                        |  419 ++
 src/spdk/lib/env_dpdk/env.mk                       |  112 +
 src/spdk/lib/env_dpdk/env_internal.h               |  104 +
 src/spdk/lib/env_dpdk/init.c                       |  401 ++
 src/spdk/lib/env_dpdk/memory.c                     |  712 +++
 src/spdk/lib/env_dpdk/pci.c                        |  551 ++
 src/spdk/lib/env_dpdk/pci_ioat.c                   |  123 +
 src/spdk/lib/env_dpdk/pci_nvme.c                   |   89 +
 src/spdk/lib/env_dpdk/pci_virtio.c                 |   80 +
 src/spdk/lib/env_dpdk/threads.c                    |  108 +
 src/spdk/lib/env_dpdk/vtophys.c                    |  691 +++
 src/spdk/lib/event/Makefile                        |   42 +
 src/spdk/lib/event/app.c                           |  998 ++++
 src/spdk/lib/event/reactor.c                       |  804 +++
 src/spdk/lib/event/rpc.c                           |   82 +
 src/spdk/lib/event/rpc/Makefile                    |   40 +
 src/spdk/lib/event/rpc/app_rpc.c                   |  155 +
 src/spdk/lib/event/rpc/subsystem_rpc.c             |  129 +
 src/spdk/lib/event/subsystem.c                     |  256 +
 src/spdk/lib/event/subsystems/Makefile             |   44 +
 src/spdk/lib/event/subsystems/bdev/Makefile        |   40 +
 src/spdk/lib/event/subsystems/bdev/bdev.c          |   83 +
 src/spdk/lib/event/subsystems/bdev/bdev_rpc.c      |   97 +
 src/spdk/lib/event/subsystems/copy/Makefile        |   40 +
 src/spdk/lib/event/subsystems/copy/copy.c          |   70 +
 src/spdk/lib/event/subsystems/iscsi/Makefile       |   41 +
 src/spdk/lib/event/subsystems/iscsi/iscsi.c        |   81 +
 src/spdk/lib/event/subsystems/iscsi/iscsi_rpc.c    |  119 +
 src/spdk/lib/event/subsystems/nbd/Makefile         |   40 +
 src/spdk/lib/event/subsystems/nbd/nbd.c            |   74 +
 src/spdk/lib/event/subsystems/net/Makefile         |   40 +
 src/spdk/lib/event/subsystems/net/net.c            |   91 +
 src/spdk/lib/event/subsystems/nvmf/Makefile        |   40 +
 src/spdk/lib/event/subsystems/nvmf/conf.c          |  587 ++
 src/spdk/lib/event/subsystems/nvmf/event_nvmf.h    |   67 +
 src/spdk/lib/event/subsystems/nvmf/nvmf_rpc.c      | 1562 ++++++
 .../event/subsystems/nvmf/nvmf_rpc_deprecated.c    |  620 +++
 src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c      |  503 ++
 src/spdk/lib/event/subsystems/scsi/Makefile        |   40 +
 src/spdk/lib/event/subsystems/scsi/scsi.c          |   65 +
 src/spdk/lib/event/subsystems/vhost/Makefile       |   40 +
 src/spdk/lib/event/subsystems/vhost/vhost.c        |   71 +
 src/spdk/lib/ioat/Makefile                         |   40 +
 src/spdk/lib/ioat/ioat.c                           |  733 +++
 src/spdk/lib/ioat/ioat_internal.h                  |  100 +
 src/spdk/lib/iscsi/Makefile                        |   45 +
 src/spdk/lib/iscsi/acceptor.c                      |   91 +
 src/spdk/lib/iscsi/acceptor.h                      |   43 +
 src/spdk/lib/iscsi/conn.c                          | 1470 +++++
 src/spdk/lib/iscsi/conn.h                          |  193 +
 src/spdk/lib/iscsi/init_grp.c                      |  786 +++
 src/spdk/lib/iscsi/init_grp.h                      |   79 +
 src/spdk/lib/iscsi/iscsi.c                         | 4583 ++++++++++++++++
 src/spdk/lib/iscsi/iscsi.h                         |  467 ++
 src/spdk/lib/iscsi/iscsi_rpc.c                     | 1542 ++++++
 src/spdk/lib/iscsi/iscsi_subsystem.c               | 1523 ++++++
 src/spdk/lib/iscsi/md5.c                           |   75 +
 src/spdk/lib/iscsi/md5.h                           |   52 +
 src/spdk/lib/iscsi/param.c                         | 1182 ++++
 src/spdk/lib/iscsi/param.h                         |   84 +
 src/spdk/lib/iscsi/portal_grp.c                    |  707 +++
 src/spdk/lib/iscsi/portal_grp.h                    |   83 +
 src/spdk/lib/iscsi/task.c                          |   88 +
 src/spdk/lib/iscsi/task.h                          |  187 +
 src/spdk/lib/iscsi/tgt_node.c                      | 1538 ++++++
 src/spdk/lib/iscsi/tgt_node.h                      |  146 +
 src/spdk/lib/json/Makefile                         |   40 +
 src/spdk/lib/json/json_parse.c                     |  668 +++
 src/spdk/lib/json/json_util.c                      |  650 +++
 src/spdk/lib/json/json_write.c                     |  687 +++
 src/spdk/lib/jsonrpc/Makefile                      |   41 +
 src/spdk/lib/jsonrpc/jsonrpc_client.c              |  213 +
 src/spdk/lib/jsonrpc/jsonrpc_client_tcp.c          |  284 +
 src/spdk/lib/jsonrpc/jsonrpc_internal.h            |  149 +
 src/spdk/lib/jsonrpc/jsonrpc_server.c              |  360 ++
 src/spdk/lib/jsonrpc/jsonrpc_server_tcp.c          |  394 ++
 src/spdk/lib/log/Makefile                          |   45 +
 src/spdk/lib/log/log.c                             |  189 +
 src/spdk/lib/log/log_flags.c                       |  196 +
 src/spdk/lib/log/rpc/Makefile                      |   40 +
 src/spdk/lib/log/rpc/log_rpc.c                     |  336 ++
 src/spdk/lib/lvol/Makefile                         |   40 +
 src/spdk/lib/lvol/lvol.c                           | 1494 +++++
 src/spdk/lib/nbd/Makefile                          |   40 +
 src/spdk/lib/nbd/nbd.c                             |  969 ++++
 src/spdk/lib/nbd/nbd_internal.h                    |   52 +
 src/spdk/lib/nbd/nbd_rpc.c                         |  304 ++
 src/spdk/lib/net/Makefile                          |   41 +
 src/spdk/lib/net/interface.c                       |  505 ++
 src/spdk/lib/net/net_internal.h                    |   79 +
 src/spdk/lib/net/net_rpc.c                         |  180 +
 src/spdk/lib/nvme/Makefile                         |   61 +
 src/spdk/lib/nvme/nvme.c                           |  862 +++
 src/spdk/lib/nvme/nvme_ctrlr.c                     | 2678 +++++++++
 src/spdk/lib/nvme/nvme_ctrlr_cmd.c                 |  694 +++
 src/spdk/lib/nvme/nvme_ctrlr_ocssd_cmd.c           |   83 +
 src/spdk/lib/nvme/nvme_fabric.c                    |  340 ++
 src/spdk/lib/nvme/nvme_internal.h                  | 1003 ++++
 src/spdk/lib/nvme/nvme_ns.c                        |  360 ++
 src/spdk/lib/nvme/nvme_ns_cmd.c                    | 1026 ++++
 src/spdk/lib/nvme/nvme_ns_ocssd_cmd.c              |  232 +
 src/spdk/lib/nvme/nvme_pcie.c                      | 2142 ++++++++
 src/spdk/lib/nvme/nvme_qpair.c                     |  663 +++
 src/spdk/lib/nvme/nvme_quirks.c                    |  141 +
 src/spdk/lib/nvme/nvme_rdma.c                      | 1634 ++++++
 src/spdk/lib/nvme/nvme_transport.c                 |  219 +
 src/spdk/lib/nvme/nvme_uevent.c                    |  214 +
 src/spdk/lib/nvme/nvme_uevent.h                    |   61 +
 src/spdk/lib/nvmf/Makefile                         |   63 +
 src/spdk/lib/nvmf/ctrlr.c                          | 1773 ++++++
 src/spdk/lib/nvmf/ctrlr_bdev.c                     |  531 ++
 src/spdk/lib/nvmf/ctrlr_discovery.c                |  144 +
 src/spdk/lib/nvmf/nvmf.c                           | 1173 ++++
 src/spdk/lib/nvmf/nvmf_fc.h                        |  871 +++
 src/spdk/lib/nvmf/nvmf_internal.h                  |  333 ++
 src/spdk/lib/nvmf/rdma.c                           | 2930 ++++++++++
 src/spdk/lib/nvmf/request.c                        |  190 +
 src/spdk/lib/nvmf/subsystem.c                      | 1269 +++++
 src/spdk/lib/nvmf/transport.c                      |  236 +
 src/spdk/lib/nvmf/transport.h                      |  200 +
 src/spdk/lib/rocksdb/env_spdk.cc                   |  764 +++
 src/spdk/lib/rocksdb/spdk.rocksdb.mk               |   70 +
 src/spdk/lib/rpc/Makefile                          |   40 +
 src/spdk/lib/rpc/rpc.c                             |  285 +
 src/spdk/lib/scsi/Makefile                         |   40 +
 src/spdk/lib/scsi/dev.c                            |  415 ++
 src/spdk/lib/scsi/lun.c                            |  452 ++
 src/spdk/lib/scsi/port.c                           |   96 +
 src/spdk/lib/scsi/scsi.c                           |   69 +
 src/spdk/lib/scsi/scsi_bdev.c                      | 2116 ++++++++
 src/spdk/lib/scsi/scsi_internal.h                  |  160 +
 src/spdk/lib/scsi/scsi_rpc.c                       |   82 +
 src/spdk/lib/scsi/task.c                           |  256 +
 src/spdk/lib/sock/Makefile                         |   44 +
 src/spdk/lib/sock/net_framework.c                  |   70 +
 src/spdk/lib/sock/posix/Makefile                   |   40 +
 src/spdk/lib/sock/posix/posix.c                    |  604 +++
 src/spdk/lib/sock/sock.c                           |  373 ++
 src/spdk/lib/sock/vpp/Makefile                     |   41 +
 src/spdk/lib/sock/vpp/vpp.c                        |  663 +++
 src/spdk/lib/thread/Makefile                       |   40 +
 src/spdk/lib/thread/thread.c                       |  768 +++
 src/spdk/lib/trace/Makefile                        |   40 +
 src/spdk/lib/trace/trace.c                         |  168 +
 src/spdk/lib/trace/trace_flags.c                   |  179 +
 src/spdk/lib/ut_mock/Makefile                      |   40 +
 src/spdk/lib/ut_mock/mock.c                        |   45 +
 src/spdk/lib/util/Makefile                         |   41 +
 src/spdk/lib/util/base64.c                         |  228 +
 src/spdk/lib/util/bit_array.c                      |  313 ++
 src/spdk/lib/util/cpuset.c                         |  320 ++
 src/spdk/lib/util/crc16.c                          |   53 +
 src/spdk/lib/util/crc32.c                          |   66 +
 src/spdk/lib/util/crc32_ieee.c                     |   48 +
 src/spdk/lib/util/crc32c.c                         |   89 +
 src/spdk/lib/util/fd.c                             |  103 +
 src/spdk/lib/util/strerror_tls.c                   |   43 +
 src/spdk/lib/util/string.c                         |  405 ++
 src/spdk/lib/util/uuid.c                           |   67 +
 src/spdk/lib/vhost/Makefile                        |   47 +
 src/spdk/lib/vhost/rte_vhost/Makefile              |   46 +
 src/spdk/lib/vhost/rte_vhost/fd_man.c              |  300 +
 src/spdk/lib/vhost/rte_vhost/fd_man.h              |   69 +
 src/spdk/lib/vhost/rte_vhost/rte_vhost.h           |  474 ++
 src/spdk/lib/vhost/rte_vhost/socket.c              |  819 +++
 src/spdk/lib/vhost/rte_vhost/vhost.c               |  482 ++
 src/spdk/lib/vhost/rte_vhost/vhost.h               |  321 ++
 src/spdk/lib/vhost/rte_vhost/vhost_user.c          | 1360 +++++
 src/spdk/lib/vhost/rte_vhost/vhost_user.h          |  182 +
 src/spdk/lib/vhost/vhost.c                         | 1503 +++++
 src/spdk/lib/vhost/vhost_blk.c                     |  901 +++
 src/spdk/lib/vhost/vhost_internal.h                |  277 +
 src/spdk/lib/vhost/vhost_nvme.c                    | 1465 +++++
 src/spdk/lib/vhost/vhost_rpc.c                     |  814 +++
 src/spdk/lib/vhost/vhost_scsi.c                    | 1271 +++++
 src/spdk/lib/virtio/Makefile                       |   42 +
 src/spdk/lib/virtio/virtio.c                       |  738 +++
 src/spdk/lib/virtio/virtio_pci.c                   |  590 ++
 src/spdk/lib/virtio/virtio_user.c                  |  621 +++
 src/spdk/lib/virtio/virtio_user/vhost.h            |  113 +
 src/spdk/lib/virtio/virtio_user/vhost_user.c       |  518 ++
 274 files changed, 119311 insertions(+)
 create mode 100644 src/spdk/lib/Makefile
 create mode 100644 src/spdk/lib/bdev/Makefile
 create mode 100644 src/spdk/lib/bdev/aio/Makefile
 create mode 100644 src/spdk/lib/bdev/aio/bdev_aio.c
 create mode 100644 src/spdk/lib/bdev/aio/bdev_aio.h
 create mode 100644 src/spdk/lib/bdev/aio/bdev_aio_rpc.c
 create mode 100644 src/spdk/lib/bdev/bdev.c
 create mode 100644 src/spdk/lib/bdev/crypto/Makefile
 create mode 100644 src/spdk/lib/bdev/crypto/vbdev_crypto.c
 create mode 100644 src/spdk/lib/bdev/crypto/vbdev_crypto.h
 create mode 100644 src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c
 create mode 100644 src/spdk/lib/bdev/error/Makefile
 create mode 100644 src/spdk/lib/bdev/error/vbdev_error.c
 create mode 100644 src/spdk/lib/bdev/error/vbdev_error.h
 create mode 100644 src/spdk/lib/bdev/error/vbdev_error_rpc.c
 create mode 100644 src/spdk/lib/bdev/gpt/Makefile
 create mode 100644 src/spdk/lib/bdev/gpt/gpt.c
 create mode 100644 src/spdk/lib/bdev/gpt/gpt.h
 create mode 100644 src/spdk/lib/bdev/gpt/vbdev_gpt.c
 create mode 100644 src/spdk/lib/bdev/iscsi/Makefile
 create mode 100644 src/spdk/lib/bdev/iscsi/bdev_iscsi.c
 create mode 100644 src/spdk/lib/bdev/iscsi/bdev_iscsi.h
 create mode 100644 src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c
 create mode 100644 src/spdk/lib/bdev/lvol/Makefile
 create mode 100644 src/spdk/lib/bdev/lvol/vbdev_lvol.c
 create mode 100644 src/spdk/lib/bdev/lvol/vbdev_lvol.h
 create mode 100644 src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c
 create mode 100644 src/spdk/lib/bdev/malloc/Makefile
 create mode 100644 src/spdk/lib/bdev/malloc/bdev_malloc.c
 create mode 100644 src/spdk/lib/bdev/malloc/bdev_malloc.h
 create mode 100644 src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c
 create mode 100644 src/spdk/lib/bdev/null/Makefile
 create mode 100644 src/spdk/lib/bdev/null/bdev_null.c
 create mode 100644 src/spdk/lib/bdev/null/bdev_null.h
 create mode 100644 src/spdk/lib/bdev/null/bdev_null_rpc.c
 create mode 100644 src/spdk/lib/bdev/nvme/Makefile
 create mode 100644 src/spdk/lib/bdev/nvme/bdev_nvme.c
 create mode 100644 src/spdk/lib/bdev/nvme/bdev_nvme.h
 create mode 100644 src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c
 create mode 100644 src/spdk/lib/bdev/nvme/nvme_rpc.c
 create mode 100644 src/spdk/lib/bdev/part.c
 create mode 100644 src/spdk/lib/bdev/passthru/Makefile
 create mode 100644 src/spdk/lib/bdev/passthru/vbdev_passthru.c
 create mode 100644 src/spdk/lib/bdev/passthru/vbdev_passthru.h
 create mode 100644 src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c
 create mode 100644 src/spdk/lib/bdev/pmem/Makefile
 create mode 100644 src/spdk/lib/bdev/pmem/bdev_pmem.c
 create mode 100644 src/spdk/lib/bdev/pmem/bdev_pmem.h
 create mode 100644 src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c
 create mode 100644 src/spdk/lib/bdev/raid/Makefile
 create mode 100644 src/spdk/lib/bdev/raid/bdev_raid.c
 create mode 100644 src/spdk/lib/bdev/raid/bdev_raid.h
 create mode 100644 src/spdk/lib/bdev/raid/bdev_raid_rpc.c
 create mode 100644 src/spdk/lib/bdev/rbd/Makefile
 create mode 100644 src/spdk/lib/bdev/rbd/bdev_rbd.c
 create mode 100644 src/spdk/lib/bdev/rbd/bdev_rbd.h
 create mode 100644 src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c
 create mode 100644 src/spdk/lib/bdev/rpc/Makefile
 create mode 100644 src/spdk/lib/bdev/rpc/bdev_rpc.c
 create mode 100644 src/spdk/lib/bdev/scsi_nvme.c
 create mode 100644 src/spdk/lib/bdev/split/Makefile
 create mode 100644 src/spdk/lib/bdev/split/vbdev_split.c
 create mode 100644 src/spdk/lib/bdev/split/vbdev_split.h
 create mode 100644 src/spdk/lib/bdev/split/vbdev_split_rpc.c
 create mode 100644 src/spdk/lib/bdev/virtio/Makefile
 create mode 100644 src/spdk/lib/bdev/virtio/bdev_virtio.h
 create mode 100644 src/spdk/lib/bdev/virtio/bdev_virtio_blk.c
 create mode 100644 src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c
 create mode 100644 src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c
 create mode 100644 src/spdk/lib/bdev/vtune.c
 create mode 100644 src/spdk/lib/blob/Makefile
 create mode 100644 src/spdk/lib/blob/bdev/Makefile
 create mode 100644 src/spdk/lib/blob/bdev/blob_bdev.c
 create mode 100644 src/spdk/lib/blob/blob_bs_dev.c
 create mode 100644 src/spdk/lib/blob/blobstore.c
 create mode 100644 src/spdk/lib/blob/blobstore.h
 create mode 100644 src/spdk/lib/blob/request.c
 create mode 100644 src/spdk/lib/blob/request.h
 create mode 100644 src/spdk/lib/blob/zeroes.c
 create mode 100644 src/spdk/lib/blobfs/Makefile
 create mode 100644 src/spdk/lib/blobfs/blobfs.c
 create mode 100644 src/spdk/lib/blobfs/blobfs_internal.h
 create mode 100644 src/spdk/lib/blobfs/tree.c
 create mode 100644 src/spdk/lib/blobfs/tree.h
 create mode 100644 src/spdk/lib/conf/Makefile
 create mode 100644 src/spdk/lib/conf/conf.c
 create mode 100644 src/spdk/lib/copy/Makefile
 create mode 100644 src/spdk/lib/copy/copy_engine.c
 create mode 100644 src/spdk/lib/copy/ioat/Makefile
 create mode 100644 src/spdk/lib/copy/ioat/copy_engine_ioat.c
 create mode 100644 src/spdk/lib/copy/ioat/copy_engine_ioat.h
 create mode 100644 src/spdk/lib/copy/ioat/copy_engine_ioat_rpc.c
 create mode 100644 src/spdk/lib/env_dpdk/Makefile
 create mode 100644 src/spdk/lib/env_dpdk/env.c
 create mode 100644 src/spdk/lib/env_dpdk/env.mk
 create mode 100644 src/spdk/lib/env_dpdk/env_internal.h
 create mode 100644 src/spdk/lib/env_dpdk/init.c
 create mode 100644 src/spdk/lib/env_dpdk/memory.c
 create mode 100644 src/spdk/lib/env_dpdk/pci.c
 create mode 100644 src/spdk/lib/env_dpdk/pci_ioat.c
 create mode 100644 src/spdk/lib/env_dpdk/pci_nvme.c
 create mode 100644 src/spdk/lib/env_dpdk/pci_virtio.c
 create mode 100644 src/spdk/lib/env_dpdk/threads.c
 create mode 100644 src/spdk/lib/env_dpdk/vtophys.c
 create mode 100644 src/spdk/lib/event/Makefile
 create mode 100644 src/spdk/lib/event/app.c
 create mode 100644 src/spdk/lib/event/reactor.c
 create mode 100644 src/spdk/lib/event/rpc.c
 create mode 100644 src/spdk/lib/event/rpc/Makefile
 create mode 100644 src/spdk/lib/event/rpc/app_rpc.c
 create mode 100644 src/spdk/lib/event/rpc/subsystem_rpc.c
 create mode 100644 src/spdk/lib/event/subsystem.c
 create mode 100644 src/spdk/lib/event/subsystems/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/bdev/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/bdev/bdev.c
 create mode 100644 src/spdk/lib/event/subsystems/bdev/bdev_rpc.c
 create mode 100644 src/spdk/lib/event/subsystems/copy/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/copy/copy.c
 create mode 100644 src/spdk/lib/event/subsystems/iscsi/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/iscsi/iscsi.c
 create mode 100644 src/spdk/lib/event/subsystems/iscsi/iscsi_rpc.c
 create mode 100644 src/spdk/lib/event/subsystems/nbd/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/nbd/nbd.c
 create mode 100644 src/spdk/lib/event/subsystems/net/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/net/net.c
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/conf.c
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/event_nvmf.h
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/nvmf_rpc.c
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/nvmf_rpc_deprecated.c
 create mode 100644 src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c
 create mode 100644 src/spdk/lib/event/subsystems/scsi/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/scsi/scsi.c
 create mode 100644 src/spdk/lib/event/subsystems/vhost/Makefile
 create mode 100644 src/spdk/lib/event/subsystems/vhost/vhost.c
 create mode 100644 src/spdk/lib/ioat/Makefile
 create mode 100644 src/spdk/lib/ioat/ioat.c
 create mode 100644 src/spdk/lib/ioat/ioat_internal.h
 create mode 100644 src/spdk/lib/iscsi/Makefile
 create mode 100644 src/spdk/lib/iscsi/acceptor.c
 create mode 100644 src/spdk/lib/iscsi/acceptor.h
 create mode 100644 src/spdk/lib/iscsi/conn.c
 create mode 100644 src/spdk/lib/iscsi/conn.h
 create mode 100644 src/spdk/lib/iscsi/init_grp.c
 create mode 100644 src/spdk/lib/iscsi/init_grp.h
 create mode 100644 src/spdk/lib/iscsi/iscsi.c
 create mode 100644 src/spdk/lib/iscsi/iscsi.h
 create mode 100644 src/spdk/lib/iscsi/iscsi_rpc.c
 create mode 100644 src/spdk/lib/iscsi/iscsi_subsystem.c
 create mode 100644 src/spdk/lib/iscsi/md5.c
 create mode 100644 src/spdk/lib/iscsi/md5.h
 create mode 100644 src/spdk/lib/iscsi/param.c
 create mode 100644 src/spdk/lib/iscsi/param.h
 create mode 100644 src/spdk/lib/iscsi/portal_grp.c
 create mode 100644 src/spdk/lib/iscsi/portal_grp.h
 create mode 100644 src/spdk/lib/iscsi/task.c
 create mode 100644 src/spdk/lib/iscsi/task.h
 create mode 100644 src/spdk/lib/iscsi/tgt_node.c
 create mode 100644 src/spdk/lib/iscsi/tgt_node.h
 create mode 100644 src/spdk/lib/json/Makefile
 create mode 100644 src/spdk/lib/json/json_parse.c
 create mode 100644 src/spdk/lib/json/json_util.c
 create mode 100644 src/spdk/lib/json/json_write.c
 create mode 100644 src/spdk/lib/jsonrpc/Makefile
 create mode 100644 src/spdk/lib/jsonrpc/jsonrpc_client.c
 create mode 100644 src/spdk/lib/jsonrpc/jsonrpc_client_tcp.c
 create mode 100644 src/spdk/lib/jsonrpc/jsonrpc_internal.h
 create mode 100644 src/spdk/lib/jsonrpc/jsonrpc_server.c
 create mode 100644 src/spdk/lib/jsonrpc/jsonrpc_server_tcp.c
 create mode 100644 src/spdk/lib/log/Makefile
 create mode 100644 src/spdk/lib/log/log.c
 create mode 100644 src/spdk/lib/log/log_flags.c
 create mode 100644 src/spdk/lib/log/rpc/Makefile
 create mode 100644 src/spdk/lib/log/rpc/log_rpc.c
 create mode 100644 src/spdk/lib/lvol/Makefile
 create mode 100644 src/spdk/lib/lvol/lvol.c
 create mode 100644 src/spdk/lib/nbd/Makefile
 create mode 100644 src/spdk/lib/nbd/nbd.c
 create mode 100644 src/spdk/lib/nbd/nbd_internal.h
 create mode 100644 src/spdk/lib/nbd/nbd_rpc.c
 create mode 100644 src/spdk/lib/net/Makefile
 create mode 100644 src/spdk/lib/net/interface.c
 create mode 100644 src/spdk/lib/net/net_internal.h
 create mode 100644 src/spdk/lib/net/net_rpc.c
 create mode 100644 src/spdk/lib/nvme/Makefile
 create mode 100644 src/spdk/lib/nvme/nvme.c
 create mode 100644 src/spdk/lib/nvme/nvme_ctrlr.c
 create mode 100644 src/spdk/lib/nvme/nvme_ctrlr_cmd.c
 create mode 100644 src/spdk/lib/nvme/nvme_ctrlr_ocssd_cmd.c
 create mode 100644 src/spdk/lib/nvme/nvme_fabric.c
 create mode 100644 src/spdk/lib/nvme/nvme_internal.h
 create mode 100644 src/spdk/lib/nvme/nvme_ns.c
 create mode 100644 src/spdk/lib/nvme/nvme_ns_cmd.c
 create mode 100644 src/spdk/lib/nvme/nvme_ns_ocssd_cmd.c
 create mode 100644 src/spdk/lib/nvme/nvme_pcie.c
 create mode 100644 src/spdk/lib/nvme/nvme_qpair.c
 create mode 100644 src/spdk/lib/nvme/nvme_quirks.c
 create mode 100644 src/spdk/lib/nvme/nvme_rdma.c
 create mode 100644 src/spdk/lib/nvme/nvme_transport.c
 create mode 100644 src/spdk/lib/nvme/nvme_uevent.c
 create mode 100644 src/spdk/lib/nvme/nvme_uevent.h
 create mode 100644 src/spdk/lib/nvmf/Makefile
 create mode 100644 src/spdk/lib/nvmf/ctrlr.c
 create mode 100644 src/spdk/lib/nvmf/ctrlr_bdev.c
 create mode 100644 src/spdk/lib/nvmf/ctrlr_discovery.c
 create mode 100644 src/spdk/lib/nvmf/nvmf.c
 create mode 100644 src/spdk/lib/nvmf/nvmf_fc.h
 create mode 100644 src/spdk/lib/nvmf/nvmf_internal.h
 create mode 100644 src/spdk/lib/nvmf/rdma.c
 create mode 100644 src/spdk/lib/nvmf/request.c
 create mode 100644 src/spdk/lib/nvmf/subsystem.c
 create mode 100644 src/spdk/lib/nvmf/transport.c
 create mode 100644 src/spdk/lib/nvmf/transport.h
 create mode 100644 src/spdk/lib/rocksdb/env_spdk.cc
 create mode 100644 src/spdk/lib/rocksdb/spdk.rocksdb.mk
 create mode 100644 src/spdk/lib/rpc/Makefile
 create mode 100644 src/spdk/lib/rpc/rpc.c
 create mode 100644 src/spdk/lib/scsi/Makefile
 create mode 100644 src/spdk/lib/scsi/dev.c
 create mode 100644 src/spdk/lib/scsi/lun.c
 create mode 100644 src/spdk/lib/scsi/port.c
 create mode 100644 src/spdk/lib/scsi/scsi.c
 create mode 100644 src/spdk/lib/scsi/scsi_bdev.c
 create mode 100644 src/spdk/lib/scsi/scsi_internal.h
 create mode 100644 src/spdk/lib/scsi/scsi_rpc.c
 create mode 100644 src/spdk/lib/scsi/task.c
 create mode 100644 src/spdk/lib/sock/Makefile
 create mode 100644 src/spdk/lib/sock/net_framework.c
 create mode 100644 src/spdk/lib/sock/posix/Makefile
 create mode 100644 src/spdk/lib/sock/posix/posix.c
 create mode 100644 src/spdk/lib/sock/sock.c
 create mode 100644 src/spdk/lib/sock/vpp/Makefile
 create mode 100644 src/spdk/lib/sock/vpp/vpp.c
 create mode 100644 src/spdk/lib/thread/Makefile
 create mode 100644 src/spdk/lib/thread/thread.c
 create mode 100644 src/spdk/lib/trace/Makefile
 create mode 100644 src/spdk/lib/trace/trace.c
 create mode 100644 src/spdk/lib/trace/trace_flags.c
 create mode 100644 src/spdk/lib/ut_mock/Makefile
 create mode 100644 src/spdk/lib/ut_mock/mock.c
 create mode 100644 src/spdk/lib/util/Makefile
 create mode 100644 src/spdk/lib/util/base64.c
 create mode 100644 src/spdk/lib/util/bit_array.c
 create mode 100644 src/spdk/lib/util/cpuset.c
 create mode 100644 src/spdk/lib/util/crc16.c
 create mode 100644 src/spdk/lib/util/crc32.c
 create mode 100644 src/spdk/lib/util/crc32_ieee.c
 create mode 100644 src/spdk/lib/util/crc32c.c
 create mode 100644 src/spdk/lib/util/fd.c
 create mode 100644 src/spdk/lib/util/strerror_tls.c
 create mode 100644 src/spdk/lib/util/string.c
 create mode 100644 src/spdk/lib/util/uuid.c
 create mode 100644 src/spdk/lib/vhost/Makefile
 create mode 100644 src/spdk/lib/vhost/rte_vhost/Makefile
 create mode 100644 src/spdk/lib/vhost/rte_vhost/fd_man.c
 create mode 100644 src/spdk/lib/vhost/rte_vhost/fd_man.h
 create mode 100644 src/spdk/lib/vhost/rte_vhost/rte_vhost.h
 create mode 100644 src/spdk/lib/vhost/rte_vhost/socket.c
 create mode 100644 src/spdk/lib/vhost/rte_vhost/vhost.c
 create mode 100644 src/spdk/lib/vhost/rte_vhost/vhost.h
 create mode 100644 src/spdk/lib/vhost/rte_vhost/vhost_user.c
 create mode 100644 src/spdk/lib/vhost/rte_vhost/vhost_user.h
 create mode 100644 src/spdk/lib/vhost/vhost.c
 create mode 100644 src/spdk/lib/vhost/vhost_blk.c
 create mode 100644 src/spdk/lib/vhost/vhost_internal.h
 create mode 100644 src/spdk/lib/vhost/vhost_nvme.c
 create mode 100644 src/spdk/lib/vhost/vhost_rpc.c
 create mode 100644 src/spdk/lib/vhost/vhost_scsi.c
 create mode 100644 src/spdk/lib/virtio/Makefile
 create mode 100644 src/spdk/lib/virtio/virtio.c
 create mode 100644 src/spdk/lib/virtio/virtio_pci.c
 create mode 100644 src/spdk/lib/virtio/virtio_user.c
 create mode 100644 src/spdk/lib/virtio/virtio_user/vhost.h
 create mode 100644 src/spdk/lib/virtio/virtio_user/vhost_user.c

(limited to 'src/spdk/lib')

diff --git a/src/spdk/lib/Makefile b/src/spdk/lib/Makefile
new file mode 100644
index 00000000..8de59e3a
--- /dev/null
+++ b/src/spdk/lib/Makefile
@@ -0,0 +1,58 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+DIRS-y += bdev blob blobfs conf copy event json jsonrpc \
+          log lvol net rpc sock thread trace util nvme nvmf scsi ioat \
+	  ut_mock iscsi
+ifeq ($(OS),Linux)
+DIRS-y += nbd
+DIRS-$(CONFIG_VHOST) += vhost
+DIRS-$(CONFIG_VIRTIO) += virtio
+endif
+
+# If CONFIG_ENV is pointing at a directory in lib, build it.
+# Out-of-tree env implementations must be built separately by the user.
+ENV_NAME := $(notdir $(CONFIG_ENV))
+ifeq ($(abspath $(CONFIG_ENV)),$(SPDK_ROOT_DIR)/lib/$(ENV_NAME))
+DIRS-y += $(ENV_NAME)
+endif
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk
diff --git a/src/spdk/lib/bdev/Makefile b/src/spdk/lib/bdev/Makefile
new file mode 100644
index 00000000..a5d30a9c
--- /dev/null
+++ b/src/spdk/lib/bdev/Makefile
@@ -0,0 +1,60 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+ifeq ($(CONFIG_VTUNE),y)
+CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify
+endif
+
+C_SRCS = bdev.c part.c scsi_nvme.c
+C_SRCS-$(CONFIG_VTUNE) += vtune.c
+LIBNAME = bdev
+
+DIRS-y += error gpt lvol malloc null nvme passthru raid rpc split
+
+ifeq ($(CONFIG_CRYPTO),y)
+DIRS-y += crypto
+endif
+
+ifeq ($(OS),Linux)
+DIRS-y += aio
+DIRS-$(CONFIG_ISCSI_INITIATOR) += iscsi
+DIRS-$(CONFIG_VIRTIO) += virtio
+DIRS-$(CONFIG_PMDK) += pmem
+endif
+
+DIRS-$(CONFIG_RBD) += rbd
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/aio/Makefile b/src/spdk/lib/bdev/aio/Makefile
new file mode 100644
index 00000000..7a39e3d2
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_aio.c bdev_aio_rpc.c
+LIBNAME = bdev_aio
+LOCAL_SYS_LIBS = -laio
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/aio/bdev_aio.c b/src/spdk/lib/bdev/aio/bdev_aio.c
new file mode 100644
index 00000000..bb0289ed
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio.c
@@ -0,0 +1,751 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_aio.h"
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/fd.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+static int bdev_aio_initialize(void);
+static void bdev_aio_fini(void);
+static void aio_free_disk(struct file_disk *fdisk);
+static void bdev_aio_get_spdk_running_config(FILE *fp);
+static TAILQ_HEAD(, file_disk) g_aio_disk_head;
+
+#define SPDK_AIO_QUEUE_DEPTH 128
+#define MAX_EVENTS_PER_POLL 32
+
+static int
+bdev_aio_get_ctx_size(void)
+{
+	return sizeof(struct bdev_aio_task);
+}
+
+static struct spdk_bdev_module aio_if = {
+	.name		= "aio",
+	.module_init	= bdev_aio_initialize,
+	.module_fini	= bdev_aio_fini,
+	.config_text	= bdev_aio_get_spdk_running_config,
+	.get_ctx_size	= bdev_aio_get_ctx_size,
+};
+
+struct bdev_aio_group_channel {
+	struct spdk_poller	*poller;
+	int			epfd;
+};
+
+SPDK_BDEV_MODULE_REGISTER(&aio_if)
+
+static int
+bdev_aio_open(struct file_disk *disk)
+{
+	int fd;
+
+	fd = open(disk->filename, O_RDWR | O_DIRECT);
+	if (fd < 0) {
+		/* Try without O_DIRECT for non-disk files */
+		fd = open(disk->filename, O_RDWR);
+		if (fd < 0) {
+			SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
+				    disk->filename, errno, spdk_strerror(errno));
+			disk->fd = -1;
+			return -1;
+		}
+	}
+
+	disk->fd = fd;
+
+	return 0;
+}
+
+static int
+bdev_aio_close(struct file_disk *disk)
+{
+	int rc;
+
+	if (disk->fd == -1) {
+		return 0;
+	}
+
+	rc = close(disk->fd);
+	if (rc < 0) {
+		SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
+			    disk->fd, errno, spdk_strerror(errno));
+		return -1;
+	}
+
+	disk->fd = -1;
+
+	return 0;
+}
+
+static int64_t
+bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
+	       struct bdev_aio_task *aio_task,
+	       struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
+{
+	struct iocb *iocb = &aio_task->iocb;
+	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+	int rc;
+
+	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
+	iocb->data = aio_task;
+	aio_task->len = nbytes;
+	io_set_eventfd(iocb, aio_ch->efd);
+
+	SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
+		      iovcnt, nbytes, offset);
+
+	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
+	if (rc < 0) {
+		if (rc == -EAGAIN) {
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
+		} else {
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
+			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
+		}
+		return -1;
+	}
+	aio_ch->io_inflight++;
+	return nbytes;
+}
+
+static int64_t
+bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
+		struct bdev_aio_task *aio_task,
+		struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+	struct iocb *iocb = &aio_task->iocb;
+	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+	int rc;
+
+	io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
+	iocb->data = aio_task;
+	aio_task->len = len;
+	io_set_eventfd(iocb, aio_ch->efd);
+
+	SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
+		      iovcnt, len, offset);
+
+	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
+	if (rc < 0) {
+		if (rc == -EAGAIN) {
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
+		} else {
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
+			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
+		}
+		return -1;
+	}
+	aio_ch->io_inflight++;
+	return len;
+}
+
+static void
+bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
+{
+	int rc = fsync(fdisk->fd);
+
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
+			      rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static int
+bdev_aio_destruct(void *ctx)
+{
+	struct file_disk *fdisk = ctx;
+	int rc = 0;
+
+	TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
+	rc = bdev_aio_close(fdisk);
+	if (rc < 0) {
+		SPDK_ERRLOG("bdev_aio_close() failed\n");
+	}
+	return rc;
+}
+
+static int
+bdev_aio_initialize_io_channel(struct bdev_aio_io_channel *ch)
+{
+	ch->efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+	if (ch->efd == -1) {
+		SPDK_ERRLOG("Cannot create efd\n");
+		return -1;
+	}
+
+	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
+		close(ch->efd);
+		SPDK_ERRLOG("async I/O context setup failure\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+bdev_aio_group_poll(void *arg)
+{
+	struct bdev_aio_group_channel *group_ch = arg;
+	struct bdev_aio_io_channel *ch;
+	int nr, i, j, rc, total_nr = 0;
+	enum spdk_bdev_io_status status;
+	struct bdev_aio_task *aio_task;
+	struct timespec timeout;
+	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
+	struct epoll_event epevents[MAX_EVENTS_PER_POLL];
+
+	timeout.tv_sec = 0;
+	timeout.tv_nsec = 0;
+	rc = epoll_wait(group_ch->epfd, epevents, MAX_EVENTS_PER_POLL, 0);
+	if (rc == -1) {
+		SPDK_ERRLOG("epoll_wait error(%d): %s on ch=%p\n", errno, spdk_strerror(errno), group_ch);
+		return -1;
+	}
+
+	for (j = 0; j < rc; j++) {
+		ch = epevents[j].data.ptr;
+		nr = io_getevents(ch->io_ctx, 1, SPDK_AIO_QUEUE_DEPTH,
+				  events, &timeout);
+
+		if (nr < 0) {
+			SPDK_ERRLOG("Returned %d on bdev_aio_io_channel %p\n", nr, ch);
+			continue;
+		}
+
+		total_nr += nr;
+		for (i = 0; i < nr; i++) {
+			aio_task = events[i].data;
+			if (events[i].res != aio_task->len) {
+				status = SPDK_BDEV_IO_STATUS_FAILED;
+			} else {
+				status = SPDK_BDEV_IO_STATUS_SUCCESS;
+			}
+
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
+			ch->io_inflight--;
+		}
+	}
+
+	return total_nr;
+}
+
+static void
+_bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+
+	if (aio_ch->io_inflight) {
+		spdk_for_each_channel_continue(i, -1);
+		return;
+	}
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static int bdev_aio_reset_retry_timer(void *arg);
+
+static void
+_bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
+
+	if (status == -1) {
+		fdisk->reset_retry_timer = spdk_poller_register(bdev_aio_reset_retry_timer, fdisk, 500);
+		return;
+	}
+
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static int
+bdev_aio_reset_retry_timer(void *arg)
+{
+	struct file_disk *fdisk = arg;
+
+	if (fdisk->reset_retry_timer) {
+		spdk_poller_unregister(&fdisk->reset_retry_timer);
+	}
+
+	spdk_for_each_channel(fdisk,
+			      _bdev_aio_get_io_inflight,
+			      fdisk,
+			      _bdev_aio_get_io_inflight_done);
+
+	return -1;
+}
+
+static void
+bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
+{
+	fdisk->reset_task = aio_task;
+
+	bdev_aio_reset_retry_timer(fdisk);
+}
+
+static void bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
+		       ch,
+		       (struct bdev_aio_task *)bdev_io->driver_ctx,
+		       bdev_io->u.bdev.iovs,
+		       bdev_io->u.bdev.iovcnt,
+		       bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+		       bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+}
+
+static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
+				ch,
+				(struct bdev_aio_task *)bdev_io->driver_ctx,
+				bdev_io->u.bdev.iovs,
+				bdev_io->u.bdev.iovcnt,
+				bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+				bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
+			       (struct bdev_aio_task *)bdev_io->driver_ctx);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_RESET:
+		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
+			       (struct bdev_aio_task *)bdev_io->driver_ctx);
+		return 0;
+	default:
+		return -1;
+	}
+}
+
+static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static int
+bdev_aio_create_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_aio_io_channel *ch = ctx_buf;
+	struct bdev_aio_group_channel *group_ch_ctx;
+	struct epoll_event epevent;
+
+	if (bdev_aio_initialize_io_channel(ch) != 0) {
+		return -1;
+	}
+
+	ch->group_ch = spdk_get_io_channel(&aio_if);
+	group_ch_ctx = spdk_io_channel_get_ctx(ch->group_ch);
+
+	epevent.events = EPOLLIN | EPOLLET;
+	epevent.data.ptr = ch;
+	if (epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_ADD, ch->efd, &epevent)) {
+		close(ch->efd);
+		io_destroy(ch->io_ctx);
+		spdk_put_io_channel(ch->group_ch);
+		SPDK_ERRLOG("epoll_ctl error\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_aio_io_channel *io_channel = ctx_buf;
+	struct bdev_aio_group_channel *group_ch_ctx;
+	struct epoll_event event;
+
+	group_ch_ctx = spdk_io_channel_get_ctx(io_channel->group_ch);
+	epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_DEL, io_channel->efd, &event);
+	spdk_put_io_channel(io_channel->group_ch);
+	close(io_channel->efd);
+	io_destroy(io_channel->io_ctx);
+
+}
+
+static struct spdk_io_channel *
+bdev_aio_get_io_channel(void *ctx)
+{
+	struct file_disk *fdisk = ctx;
+
+	return spdk_get_io_channel(fdisk);
+}
+
+
+static int
+bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct file_disk *fdisk = ctx;
+
+	spdk_json_write_name(w, "aio");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "filename");
+	spdk_json_write_string(w, fdisk->filename);
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct file_disk *fdisk = bdev->ctxt;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_aio_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	if (fdisk->block_size_override) {
+		spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+	}
+	spdk_json_write_named_string(w, "filename", fdisk->filename);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table aio_fn_table = {
+	.destruct		= bdev_aio_destruct,
+	.submit_request		= bdev_aio_submit_request,
+	.io_type_supported	= bdev_aio_io_type_supported,
+	.get_io_channel		= bdev_aio_get_io_channel,
+	.dump_info_json		= bdev_aio_dump_info_json,
+	.write_config_json	= bdev_aio_write_json_config,
+};
+
+static void aio_free_disk(struct file_disk *fdisk)
+{
+	if (fdisk == NULL) {
+		return;
+	}
+	free(fdisk->filename);
+	free(fdisk->disk.name);
+	free(fdisk);
+}
+
+static int
+bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_aio_group_channel *ch = ctx_buf;
+
+	ch->epfd = epoll_create1(0);
+	if (ch->epfd == -1) {
+		SPDK_ERRLOG("cannot create epoll fd\n");
+		return -1;
+	}
+
+	ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0);
+	return 0;
+}
+
+static void
+bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_aio_group_channel *ch = ctx_buf;
+
+	close(ch->epfd);
+	spdk_poller_unregister(&ch->poller);
+}
+
+struct spdk_bdev *
+create_aio_disk(const char *name, const char *filename, uint32_t block_size)
+{
+	struct file_disk *fdisk;
+	uint32_t detected_block_size;
+	uint64_t disk_size;
+	int rc;
+
+	fdisk = calloc(1, sizeof(*fdisk));
+	if (!fdisk) {
+		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
+		return NULL;
+	}
+
+	fdisk->filename = strdup(filename);
+	if (!fdisk->filename) {
+		goto error_return;
+	}
+
+	if (bdev_aio_open(fdisk)) {
+		SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
+		goto error_return;
+	}
+
+	disk_size = spdk_fd_get_size(fdisk->fd);
+
+	fdisk->disk.name = strdup(name);
+	if (!fdisk->disk.name) {
+		goto error_return;
+	}
+	fdisk->disk.product_name = "AIO disk";
+	fdisk->disk.module = &aio_if;
+
+	fdisk->disk.need_aligned_buffer = 1;
+	fdisk->disk.write_cache = 1;
+
+	detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
+	if (block_size == 0) {
+		/* User did not specify block size - use autodetected block size. */
+		if (detected_block_size == 0) {
+			SPDK_ERRLOG("Block size could not be auto-detected\n");
+			goto error_return;
+		}
+		fdisk->block_size_override = false;
+		block_size = detected_block_size;
+	} else {
+		if (block_size < detected_block_size) {
+			SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
+				    "auto-detected block size %" PRIu32 "\n",
+				    block_size, detected_block_size);
+			goto error_return;
+		} else if (detected_block_size != 0 && block_size != detected_block_size) {
+			SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
+				     "auto-detected block size %" PRIu32 "\n",
+				     block_size, detected_block_size);
+		}
+		fdisk->block_size_override = true;
+	}
+
+	if (block_size < 512) {
+		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
+		goto error_return;
+	}
+
+	if (!spdk_u32_is_pow2(block_size)) {
+		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
+		goto error_return;
+	}
+
+	fdisk->disk.blocklen = block_size;
+
+	if (disk_size % fdisk->disk.blocklen != 0) {
+		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
+			    disk_size, fdisk->disk.blocklen);
+		goto error_return;
+	}
+
+	fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
+	fdisk->disk.ctxt = fdisk;
+
+	fdisk->disk.fn_table = &aio_fn_table;
+
+	spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
+				sizeof(struct bdev_aio_io_channel),
+				fdisk->disk.name);
+	rc = spdk_bdev_register(&fdisk->disk);
+	if (rc) {
+		spdk_io_device_unregister(fdisk, NULL);
+		goto error_return;
+	}
+
+	TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
+	return &fdisk->disk;
+
+error_return:
+	bdev_aio_close(fdisk);
+	aio_free_disk(fdisk);
+	return NULL;
+}
+
+static void
+aio_io_device_unregister_cb(void *io_device)
+{
+	struct file_disk *fdisk = io_device;
+	spdk_delete_aio_complete cb_fn = fdisk->delete_cb_fn;
+	void *cb_arg = fdisk->delete_cb_arg;
+
+	aio_free_disk(fdisk);
+	cb_fn(cb_arg, 0);
+}
+
+static void
+aio_bdev_unregister_cb(void *arg, int bdeverrno)
+{
+	struct file_disk *fdisk = arg;
+
+	if (bdeverrno != 0) {
+		fdisk->delete_cb_fn(fdisk->delete_cb_arg, bdeverrno);
+		return;
+	}
+
+	spdk_io_device_unregister(fdisk, aio_io_device_unregister_cb);
+}
+
+void
+delete_aio_disk(struct spdk_bdev *bdev, spdk_delete_aio_complete cb_fn, void *cb_arg)
+{
+	struct file_disk *fdisk;
+
+	if (!bdev || bdev->module != &aio_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	fdisk = bdev->ctxt;
+	fdisk->delete_cb_fn = cb_fn;
+	fdisk->delete_cb_arg = cb_arg;
+	spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, fdisk);
+}
+
+static int
+bdev_aio_initialize(void)
+{
+	size_t i;
+	struct spdk_conf_section *sp;
+	struct spdk_bdev *bdev;
+
+	TAILQ_INIT(&g_aio_disk_head);
+	spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
+				sizeof(struct bdev_aio_group_channel),
+				"aio_module");
+
+	sp = spdk_conf_find_section(NULL, "AIO");
+	if (!sp) {
+		return 0;
+	}
+
+	i = 0;
+	while (true) {
+		const char *file;
+		const char *name;
+		const char *block_size_str;
+		uint32_t block_size = 0;
+
+		file = spdk_conf_section_get_nmval(sp, "AIO", i, 0);
+		if (!file) {
+			break;
+		}
+
+		name = spdk_conf_section_get_nmval(sp, "AIO", i, 1);
+		if (!name) {
+			SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file);
+			i++;
+			continue;
+		}
+
+		block_size_str = spdk_conf_section_get_nmval(sp, "AIO", i, 2);
+		if (block_size_str) {
+			block_size = atoi(block_size_str);
+		}
+
+		bdev = create_aio_disk(name, file, block_size);
+		if (!bdev) {
+			SPDK_ERRLOG("Unable to create AIO bdev from file %s\n", file);
+			i++;
+			continue;
+		}
+
+		i++;
+	}
+
+	return 0;
+}
+
+static void
+bdev_aio_fini(void)
+{
+	spdk_io_device_unregister(&aio_if, NULL);
+}
+
+static void
+bdev_aio_get_spdk_running_config(FILE *fp)
+{
+	char			*file;
+	char			*name;
+	uint32_t		block_size;
+	struct file_disk	*fdisk;
+
+	fprintf(fp,
+		"\n"
+		"# Users must change this section to match the /dev/sdX devices to be\n"
+		"# exported as iSCSI LUNs. The devices are accessed using Linux AIO.\n"
+		"# The format is:\n"
+		"# AIO <file name> <bdev name> [<block size>]\n"
+		"# The file name is the backing device\n"
+		"# The bdev name can be referenced from elsewhere in the configuration file.\n"
+		"# Block size may be omitted to automatically detect the block size of a disk.\n"
+		"[AIO]\n");
+
+	TAILQ_FOREACH(fdisk, &g_aio_disk_head, link) {
+		file = fdisk->filename;
+		name = fdisk->disk.name;
+		block_size = fdisk->disk.blocklen;
+		fprintf(fp, "  AIO %s %s ", file, name);
+		if (fdisk->block_size_override) {
+			fprintf(fp, "%d", block_size);
+		}
+		fprintf(fp, "\n");
+	}
+	fprintf(fp, "\n");
+}
+
+SPDK_LOG_REGISTER_COMPONENT("aio", SPDK_LOG_AIO)
diff --git a/src/spdk/lib/bdev/aio/bdev_aio.h b/src/spdk/lib/bdev/aio/bdev_aio.h
new file mode 100644
index 00000000..f58e9822
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio.h
@@ -0,0 +1,80 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_AIO_H
+#define SPDK_BDEV_AIO_H
+
+#include "spdk/stdinc.h"
+
+#include <libaio.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+
+#include "spdk/queue.h"
+#include "spdk/bdev.h"
+
+#include "spdk/bdev_module.h"
+
+struct bdev_aio_task {
+	struct iocb			iocb;
+	uint64_t			len;
+	TAILQ_ENTRY(bdev_aio_task)	link;
+};
+
+struct bdev_aio_io_channel {
+	io_context_t				io_ctx;
+	uint64_t				io_inflight;
+	struct spdk_io_channel			*group_ch;
+	TAILQ_ENTRY(bdev_aio_io_channel)	link;
+	int					efd;
+};
+
+typedef void (*spdk_delete_aio_complete)(void *cb_arg, int bdeverrno);
+
+struct file_disk {
+	struct bdev_aio_task	*reset_task;
+	struct spdk_poller	*reset_retry_timer;
+	struct spdk_bdev	disk;
+	char			*filename;
+	int			fd;
+	TAILQ_ENTRY(file_disk)  link;
+	bool			block_size_override;
+	spdk_delete_aio_complete	delete_cb_fn;
+	void				*delete_cb_arg;
+};
+
+struct spdk_bdev *create_aio_disk(const char *name, const char *filename, uint32_t block_size);
+
+void delete_aio_disk(struct spdk_bdev *bdev, spdk_delete_aio_complete cb_fn, void *cb_arg);
+
+#endif // SPDK_BDEV_AIO_H
diff --git a/src/spdk/lib/bdev/aio/bdev_aio_rpc.c b/src/spdk/lib/bdev/aio/bdev_aio_rpc.c
new file mode 100644
index 00000000..10dd237a
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio_rpc.c
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_aio.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_aio {
+	char *name;
+	char *filename;
+	uint32_t block_size;
+};
+
+static void
+free_rpc_construct_aio(struct rpc_construct_aio *req)
+{
+	free(req->name);
+	free(req->filename);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_aio_decoders[] = {
+	{"name", offsetof(struct rpc_construct_aio, name), spdk_json_decode_string},
+	{"filename", offsetof(struct rpc_construct_aio, filename), spdk_json_decode_string, true},
+	{"block_size", offsetof(struct rpc_construct_aio, block_size), spdk_json_decode_uint32, true},
+};
+
+static void
+spdk_rpc_construct_aio_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_construct_aio req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev;
+
+	if (spdk_json_decode_object(params, rpc_construct_aio_decoders,
+				    SPDK_COUNTOF(rpc_construct_aio_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.filename == NULL) {
+		goto invalid;
+	}
+
+	bdev = create_aio_disk(req.name, req.filename, req.block_size);
+	if (bdev == NULL) {
+		goto invalid;
+	}
+
+	free_rpc_construct_aio(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_construct_aio(&req);
+}
+SPDK_RPC_REGISTER("construct_aio_bdev", spdk_rpc_construct_aio_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_aio {
+	char *name;
+};
+
+static void
+free_rpc_delete_aio(struct rpc_delete_aio *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_aio_decoders[] = {
+	{"name", offsetof(struct rpc_delete_aio, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_aio_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_aio_bdev(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct rpc_delete_aio req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_aio_decoders,
+				    SPDK_COUNTOF(rpc_delete_aio_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_aio_disk(bdev, _spdk_rpc_delete_aio_bdev_cb, request);
+
+	free_rpc_delete_aio(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_aio(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_aio_bdev", spdk_rpc_delete_aio_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/bdev.c b/src/spdk/lib/bdev/bdev.c
new file mode 100644
index 00000000..ab82fffd
--- /dev/null
+++ b/src/spdk/lib/bdev/bdev.c
@@ -0,0 +1,3950 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+
+#include "spdk/config.h"
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/util.h"
+#include "spdk/trace.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+
+#ifdef SPDK_CONFIG_VTUNE
+#include "ittnotify.h"
+#include "ittnotify_types.h"
+int __itt_init_ittlib(const char *, __itt_group_id);
+#endif
+
+#define SPDK_BDEV_IO_POOL_SIZE			(64 * 1024)
+#define SPDK_BDEV_IO_CACHE_SIZE			256
+#define BUF_SMALL_POOL_SIZE			8192
+#define BUF_LARGE_POOL_SIZE			1024
+#define NOMEM_THRESHOLD_COUNT			8
+#define ZERO_BUFFER_SIZE			0x100000
+
+#define OWNER_BDEV		0x2
+
+#define OBJECT_BDEV_IO		0x2
+
+#define TRACE_GROUP_BDEV	0x3
+#define TRACE_BDEV_IO_START	SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x0)
+#define TRACE_BDEV_IO_DONE	SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x1)
+
+#define SPDK_BDEV_QOS_TIMESLICE_IN_USEC		1000
+#define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE	1
+#define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE	512
+#define SPDK_BDEV_QOS_MIN_IOS_PER_SEC		10000
+#define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC		(10 * 1024 * 1024)
+#define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED		UINT64_MAX
+
+static const char *qos_conf_type[] = {"Limit_IOPS", "Limit_BPS"};
+static const char *qos_rpc_type[] = {"rw_ios_per_sec", "rw_mbytes_per_sec"};
+
+TAILQ_HEAD(spdk_bdev_list, spdk_bdev);
+
+struct spdk_bdev_mgr {
+	struct spdk_mempool *bdev_io_pool;
+
+	struct spdk_mempool *buf_small_pool;
+	struct spdk_mempool *buf_large_pool;
+
+	void *zero_buffer;
+
+	TAILQ_HEAD(bdev_module_list, spdk_bdev_module) bdev_modules;
+
+	struct spdk_bdev_list bdevs;
+
+	bool init_complete;
+	bool module_init_complete;
+
+#ifdef SPDK_CONFIG_VTUNE
+	__itt_domain	*domain;
+#endif
+};
+
+static struct spdk_bdev_mgr g_bdev_mgr = {
+	.bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
+	.bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
+	.init_complete = false,
+	.module_init_complete = false,
+};
+
+static struct spdk_bdev_opts	g_bdev_opts = {
+	.bdev_io_pool_size = SPDK_BDEV_IO_POOL_SIZE,
+	.bdev_io_cache_size = SPDK_BDEV_IO_CACHE_SIZE,
+};
+
+static spdk_bdev_init_cb	g_init_cb_fn = NULL;
+static void			*g_init_cb_arg = NULL;
+
+static spdk_bdev_fini_cb	g_fini_cb_fn = NULL;
+static void			*g_fini_cb_arg = NULL;
+static struct spdk_thread	*g_fini_thread = NULL;
+
+struct spdk_bdev_qos_limit {
+	/** IOs or bytes allowed per second (i.e., 1s). */
+	uint64_t limit;
+
+	/** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms).
+	 *  For remaining bytes, allowed to run negative if an I/O is submitted when
+	 *  some bytes are remaining, but the I/O is bigger than that amount. The
+	 *  excess will be deducted from the next timeslice.
+	 */
+	int64_t remaining_this_timeslice;
+
+	/** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
+	uint32_t min_per_timeslice;
+
+	/** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
+	uint32_t max_per_timeslice;
+};
+
+struct spdk_bdev_qos {
+	/** Types of structure of rate limits. */
+	struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+
+	/** The channel that all I/O are funneled through. */
+	struct spdk_bdev_channel *ch;
+
+	/** The thread on which the poller is running. */
+	struct spdk_thread *thread;
+
+	/** Queue of I/O waiting to be issued. */
+	bdev_io_tailq_t queued;
+
+	/** Size of a timeslice in tsc ticks. */
+	uint64_t timeslice_size;
+
+	/** Timestamp of start of last timeslice. */
+	uint64_t last_timeslice;
+
+	/** Poller that processes queued I/O commands each time slice. */
+	struct spdk_poller *poller;
+};
+
+struct spdk_bdev_mgmt_channel {
+	bdev_io_stailq_t need_buf_small;
+	bdev_io_stailq_t need_buf_large;
+
+	/*
+	 * Each thread keeps a cache of bdev_io - this allows
+	 *  bdev threads which are *not* DPDK threads to still
+	 *  benefit from a per-thread bdev_io cache.  Without
+	 *  this, non-DPDK threads fetching from the mempool
+	 *  incur a cmpxchg on get and put.
+	 */
+	bdev_io_stailq_t per_thread_cache;
+	uint32_t	per_thread_cache_count;
+	uint32_t	bdev_io_cache_size;
+
+	TAILQ_HEAD(, spdk_bdev_shared_resource)	shared_resources;
+	TAILQ_HEAD(, spdk_bdev_io_wait_entry)	io_wait_queue;
+};
+
+/*
+ * Per-module (or per-io_device) data. Multiple bdevs built on the same io_device
+ * will queue here their IO that awaits retry. It makes it possible to retry sending
+ * IO to one bdev after IO from other bdev completes.
+ */
+struct spdk_bdev_shared_resource {
+	/* The bdev management channel */
+	struct spdk_bdev_mgmt_channel *mgmt_ch;
+
+	/*
+	 * Count of I/O submitted to bdev module and waiting for completion.
+	 * Incremented before submit_request() is called on an spdk_bdev_io.
+	 */
+	uint64_t		io_outstanding;
+
+	/*
+	 * Queue of IO awaiting retry because of a previous NOMEM status returned
+	 *  on this channel.
+	 */
+	bdev_io_tailq_t		nomem_io;
+
+	/*
+	 * Threshold which io_outstanding must drop to before retrying nomem_io.
+	 */
+	uint64_t		nomem_threshold;
+
+	/* I/O channel allocated by a bdev module */
+	struct spdk_io_channel	*shared_ch;
+
+	/* Refcount of bdev channels using this resource */
+	uint32_t		ref;
+
+	TAILQ_ENTRY(spdk_bdev_shared_resource) link;
+};
+
+#define BDEV_CH_RESET_IN_PROGRESS	(1 << 0)
+#define BDEV_CH_QOS_ENABLED		(1 << 1)
+
+struct spdk_bdev_channel {
+	struct spdk_bdev	*bdev;
+
+	/* The channel for the underlying device */
+	struct spdk_io_channel	*channel;
+
+	/* Per io_device per thread data */
+	struct spdk_bdev_shared_resource *shared_resource;
+
+	struct spdk_bdev_io_stat stat;
+
+	/*
+	 * Count of I/O submitted through this channel and waiting for completion.
+	 * Incremented before submit_request() is called on an spdk_bdev_io.
+	 */
+	uint64_t		io_outstanding;
+
+	bdev_io_tailq_t		queued_resets;
+
+	uint32_t		flags;
+
+#ifdef SPDK_CONFIG_VTUNE
+	uint64_t		start_tsc;
+	uint64_t		interval_tsc;
+	__itt_string_handle	*handle;
+	struct spdk_bdev_io_stat prev_stat;
+#endif
+
+};
+
+struct spdk_bdev_desc {
+	struct spdk_bdev		*bdev;
+	struct spdk_thread		*thread;
+	spdk_bdev_remove_cb_t		remove_cb;
+	void				*remove_ctx;
+	bool				remove_scheduled;
+	bool				closed;
+	bool				write;
+	TAILQ_ENTRY(spdk_bdev_desc)	link;
+};
+
+struct spdk_bdev_iostat_ctx {
+	struct spdk_bdev_io_stat *stat;
+	spdk_bdev_get_device_stat_cb cb;
+	void *cb_arg;
+};
+
+#define __bdev_to_io_dev(bdev)		(((char *)bdev) + 1)
+#define __bdev_from_io_dev(io_dev)	((struct spdk_bdev *)(((char *)io_dev) - 1))
+
+static void _spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success,
+		void *cb_arg);
+static void _spdk_bdev_write_zero_buffer_next(void *_bdev_io);
+
+void
+spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
+{
+	*opts = g_bdev_opts;
+}
+
+int
+spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
+{
+	uint32_t min_pool_size;
+
+	/*
+	 * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
+	 *  initialization.  A second mgmt_ch will be created on the same thread when the application starts
+	 *  but before the deferred put_io_channel event is executed for the first mgmt_ch.
+	 */
+	min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
+	if (opts->bdev_io_pool_size < min_pool_size) {
+		SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
+			    " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
+			    spdk_thread_get_count());
+		SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
+		return -1;
+	}
+
+	g_bdev_opts = *opts;
+	return 0;
+}
+
+struct spdk_bdev *
+spdk_bdev_first(void)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
+	if (bdev) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
+	}
+
+	return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_next(struct spdk_bdev *prev)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = TAILQ_NEXT(prev, internal.link);
+	if (bdev) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
+	}
+
+	return bdev;
+}
+
+static struct spdk_bdev *
+_bdev_next_leaf(struct spdk_bdev *bdev)
+{
+	while (bdev != NULL) {
+		if (bdev->internal.claim_module == NULL) {
+			return bdev;
+		} else {
+			bdev = TAILQ_NEXT(bdev, internal.link);
+		}
+	}
+
+	return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_first_leaf(void)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
+
+	if (bdev) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
+	}
+
+	return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_next_leaf(struct spdk_bdev *prev)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = _bdev_next_leaf(TAILQ_NEXT(prev, internal.link));
+
+	if (bdev) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
+	}
+
+	return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_get_by_name(const char *bdev_name)
+{
+	struct spdk_bdev_alias *tmp;
+	struct spdk_bdev *bdev = spdk_bdev_first();
+
+	while (bdev != NULL) {
+		if (strcmp(bdev_name, bdev->name) == 0) {
+			return bdev;
+		}
+
+		TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
+			if (strcmp(bdev_name, tmp->alias) == 0) {
+				return bdev;
+			}
+		}
+
+		bdev = spdk_bdev_next(bdev);
+	}
+
+	return NULL;
+}
+
+void
+spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
+{
+	struct iovec *iovs;
+
+	iovs = bdev_io->u.bdev.iovs;
+
+	assert(iovs != NULL);
+	assert(bdev_io->u.bdev.iovcnt >= 1);
+
+	iovs[0].iov_base = buf;
+	iovs[0].iov_len = len;
+}
+
+static void
+spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_mempool *pool;
+	struct spdk_bdev_io *tmp;
+	void *buf, *aligned_buf;
+	bdev_io_stailq_t *stailq;
+	struct spdk_bdev_mgmt_channel *ch;
+
+	assert(bdev_io->u.bdev.iovcnt == 1);
+
+	buf = bdev_io->internal.buf;
+	ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+	bdev_io->internal.buf = NULL;
+
+	if (bdev_io->internal.buf_len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
+		pool = g_bdev_mgr.buf_small_pool;
+		stailq = &ch->need_buf_small;
+	} else {
+		pool = g_bdev_mgr.buf_large_pool;
+		stailq = &ch->need_buf_large;
+	}
+
+	if (STAILQ_EMPTY(stailq)) {
+		spdk_mempool_put(pool, buf);
+	} else {
+		tmp = STAILQ_FIRST(stailq);
+
+		aligned_buf = (void *)(((uintptr_t)buf + 511) & ~511UL);
+		spdk_bdev_io_set_buf(tmp, aligned_buf, tmp->internal.buf_len);
+
+		STAILQ_REMOVE_HEAD(stailq, internal.buf_link);
+		tmp->internal.buf = buf;
+		tmp->internal.get_buf_cb(tmp->internal.ch->channel, tmp);
+	}
+}
+
+void
+spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
+{
+	struct spdk_mempool *pool;
+	bdev_io_stailq_t *stailq;
+	void *buf, *aligned_buf;
+	struct spdk_bdev_mgmt_channel *mgmt_ch;
+
+	assert(cb != NULL);
+	assert(bdev_io->u.bdev.iovs != NULL);
+
+	if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) {
+		/* Buffer already present */
+		cb(bdev_io->internal.ch->channel, bdev_io);
+		return;
+	}
+
+	assert(len <= SPDK_BDEV_LARGE_BUF_MAX_SIZE);
+	mgmt_ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+	bdev_io->internal.buf_len = len;
+	bdev_io->internal.get_buf_cb = cb;
+	if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
+		pool = g_bdev_mgr.buf_small_pool;
+		stailq = &mgmt_ch->need_buf_small;
+	} else {
+		pool = g_bdev_mgr.buf_large_pool;
+		stailq = &mgmt_ch->need_buf_large;
+	}
+
+	buf = spdk_mempool_get(pool);
+
+	if (!buf) {
+		STAILQ_INSERT_TAIL(stailq, bdev_io, internal.buf_link);
+	} else {
+		aligned_buf = (void *)(((uintptr_t)buf + 511) & ~511UL);
+		spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
+
+		bdev_io->internal.buf = buf;
+		bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io);
+	}
+}
+
+static int
+spdk_bdev_module_get_max_ctx_size(void)
+{
+	struct spdk_bdev_module *bdev_module;
+	int max_bdev_module_size = 0;
+
+	TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
+			max_bdev_module_size = bdev_module->get_ctx_size();
+		}
+	}
+
+	return max_bdev_module_size;
+}
+
+void
+spdk_bdev_config_text(FILE *fp)
+{
+	struct spdk_bdev_module *bdev_module;
+
+	TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (bdev_module->config_text) {
+			bdev_module->config_text(fp);
+		}
+	}
+}
+
+static void
+spdk_bdev_qos_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	int i;
+	struct spdk_bdev_qos *qos = bdev->internal.qos;
+	uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+
+	if (!qos) {
+		return;
+	}
+
+	spdk_bdev_get_qos_rate_limits(bdev, limits);
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "set_bdev_qos_limit");
+	spdk_json_write_name(w, "params");
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "name", bdev->name);
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (limits[i] > 0) {
+			spdk_json_write_named_uint64(w, qos_rpc_type[i], limits[i]);
+		}
+	}
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_bdev_module *bdev_module;
+	struct spdk_bdev *bdev;
+
+	assert(w != NULL);
+
+	spdk_json_write_array_begin(w);
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "set_bdev_options");
+	spdk_json_write_name(w, "params");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_uint32(w, "bdev_io_pool_size", g_bdev_opts.bdev_io_pool_size);
+	spdk_json_write_named_uint32(w, "bdev_io_cache_size", g_bdev_opts.bdev_io_cache_size);
+	spdk_json_write_object_end(w);
+	spdk_json_write_object_end(w);
+
+	TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (bdev_module->config_json) {
+			bdev_module->config_json(w);
+		}
+	}
+
+	TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, internal.link) {
+		spdk_bdev_qos_config_json(bdev, w);
+
+		if (bdev->fn_table->write_config_json) {
+			bdev->fn_table->write_config_json(bdev, w);
+		}
+	}
+
+	spdk_json_write_array_end(w);
+}
+
+static int
+spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev_mgmt_channel *ch = ctx_buf;
+	struct spdk_bdev_io *bdev_io;
+	uint32_t i;
+
+	STAILQ_INIT(&ch->need_buf_small);
+	STAILQ_INIT(&ch->need_buf_large);
+
+	STAILQ_INIT(&ch->per_thread_cache);
+	ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;
+
+	/* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
+	ch->per_thread_cache_count = 0;
+	for (i = 0; i < ch->bdev_io_cache_size; i++) {
+		bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
+		assert(bdev_io != NULL);
+		ch->per_thread_cache_count++;
+		STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
+	}
+
+	TAILQ_INIT(&ch->shared_resources);
+	TAILQ_INIT(&ch->io_wait_queue);
+
+	return 0;
+}
+
+static void
+spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev_mgmt_channel *ch = ctx_buf;
+	struct spdk_bdev_io *bdev_io;
+
+	if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) {
+		SPDK_ERRLOG("Pending I/O list wasn't empty on mgmt channel free\n");
+	}
+
+	if (!TAILQ_EMPTY(&ch->shared_resources)) {
+		SPDK_ERRLOG("Module channel list wasn't empty on mgmt channel free\n");
+	}
+
+	while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
+		bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
+		STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
+		ch->per_thread_cache_count--;
+		spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
+	}
+
+	assert(ch->per_thread_cache_count == 0);
+}
+
+static void
+spdk_bdev_init_complete(int rc)
+{
+	spdk_bdev_init_cb cb_fn = g_init_cb_fn;
+	void *cb_arg = g_init_cb_arg;
+	struct spdk_bdev_module *m;
+
+	g_bdev_mgr.init_complete = true;
+	g_init_cb_fn = NULL;
+	g_init_cb_arg = NULL;
+
+	/*
+	 * For modules that need to know when subsystem init is complete,
+	 * inform them now.
+	 */
+	if (rc == 0) {
+		TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+			if (m->init_complete) {
+				m->init_complete();
+			}
+		}
+	}
+
+	cb_fn(cb_arg, rc);
+}
+
+static void
+spdk_bdev_module_action_complete(void)
+{
+	struct spdk_bdev_module *m;
+
+	/*
+	 * Don't finish bdev subsystem initialization if
+	 * module pre-initialization is still in progress, or
+	 * the subsystem been already initialized.
+	 */
+	if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
+		return;
+	}
+
+	/*
+	 * Check all bdev modules for inits/examinations in progress. If any
+	 * exist, return immediately since we cannot finish bdev subsystem
+	 * initialization until all are completed.
+	 */
+	TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (m->internal.action_in_progress > 0) {
+			return;
+		}
+	}
+
+	/*
+	 * Modules already finished initialization - now that all
+	 * the bdev modules have finished their asynchronous I/O
+	 * processing, the entire bdev layer can be marked as complete.
+	 */
+	spdk_bdev_init_complete(0);
+}
+
+static void
+spdk_bdev_module_action_done(struct spdk_bdev_module *module)
+{
+	assert(module->internal.action_in_progress > 0);
+	module->internal.action_in_progress--;
+	spdk_bdev_module_action_complete();
+}
+
+void
+spdk_bdev_module_init_done(struct spdk_bdev_module *module)
+{
+	spdk_bdev_module_action_done(module);
+}
+
+void
+spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
+{
+	spdk_bdev_module_action_done(module);
+}
+
+/** The last initialized bdev module */
+static struct spdk_bdev_module *g_resume_bdev_module = NULL;
+
+static int
+spdk_bdev_modules_init(void)
+{
+	struct spdk_bdev_module *module;
+	int rc = 0;
+
+	TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		g_resume_bdev_module = module;
+		rc = module->module_init();
+		if (rc != 0) {
+			return rc;
+		}
+	}
+
+	g_resume_bdev_module = NULL;
+	return 0;
+}
+
+
+static void
+spdk_bdev_init_failed_complete(void *cb_arg)
+{
+	spdk_bdev_init_complete(-1);
+}
+
+static void
+spdk_bdev_init_failed(void *cb_arg)
+{
+	spdk_bdev_finish(spdk_bdev_init_failed_complete, NULL);
+}
+
+void
+spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
+{
+	struct spdk_conf_section *sp;
+	struct spdk_bdev_opts bdev_opts;
+	int32_t bdev_io_pool_size, bdev_io_cache_size;
+	int cache_size;
+	int rc = 0;
+	char mempool_name[32];
+
+	assert(cb_fn != NULL);
+
+	sp = spdk_conf_find_section(NULL, "Bdev");
+	if (sp != NULL) {
+		spdk_bdev_get_opts(&bdev_opts);
+
+		bdev_io_pool_size = spdk_conf_section_get_intval(sp, "BdevIoPoolSize");
+		if (bdev_io_pool_size >= 0) {
+			bdev_opts.bdev_io_pool_size = bdev_io_pool_size;
+		}
+
+		bdev_io_cache_size = spdk_conf_section_get_intval(sp, "BdevIoCacheSize");
+		if (bdev_io_cache_size >= 0) {
+			bdev_opts.bdev_io_cache_size = bdev_io_cache_size;
+		}
+
+		if (spdk_bdev_set_opts(&bdev_opts)) {
+			spdk_bdev_init_complete(-1);
+			return;
+		}
+
+		assert(memcmp(&bdev_opts, &g_bdev_opts, sizeof(bdev_opts)) == 0);
+	}
+
+	g_init_cb_fn = cb_fn;
+	g_init_cb_arg = cb_arg;
+
+	snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
+
+	g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
+				  g_bdev_opts.bdev_io_pool_size,
+				  sizeof(struct spdk_bdev_io) +
+				  spdk_bdev_module_get_max_ctx_size(),
+				  0,
+				  SPDK_ENV_SOCKET_ID_ANY);
+
+	if (g_bdev_mgr.bdev_io_pool == NULL) {
+		SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
+		spdk_bdev_init_complete(-1);
+		return;
+	}
+
+	/**
+	 * Ensure no more than half of the total buffers end up local caches, by
+	 *   using spdk_thread_get_count() to determine how many local caches we need
+	 *   to account for.
+	 */
+	cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_thread_get_count());
+	snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid());
+
+	g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name,
+				    BUF_SMALL_POOL_SIZE,
+				    SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512,
+				    cache_size,
+				    SPDK_ENV_SOCKET_ID_ANY);
+	if (!g_bdev_mgr.buf_small_pool) {
+		SPDK_ERRLOG("create rbuf small pool failed\n");
+		spdk_bdev_init_complete(-1);
+		return;
+	}
+
+	cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_thread_get_count());
+	snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid());
+
+	g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name,
+				    BUF_LARGE_POOL_SIZE,
+				    SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512,
+				    cache_size,
+				    SPDK_ENV_SOCKET_ID_ANY);
+	if (!g_bdev_mgr.buf_large_pool) {
+		SPDK_ERRLOG("create rbuf large pool failed\n");
+		spdk_bdev_init_complete(-1);
+		return;
+	}
+
+	g_bdev_mgr.zero_buffer = spdk_dma_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
+				 NULL);
+	if (!g_bdev_mgr.zero_buffer) {
+		SPDK_ERRLOG("create bdev zero buffer failed\n");
+		spdk_bdev_init_complete(-1);
+		return;
+	}
+
+#ifdef SPDK_CONFIG_VTUNE
+	g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
+#endif
+
+	spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create,
+				spdk_bdev_mgmt_channel_destroy,
+				sizeof(struct spdk_bdev_mgmt_channel),
+				"bdev_mgr");
+
+	rc = spdk_bdev_modules_init();
+	g_bdev_mgr.module_init_complete = true;
+	if (rc != 0) {
+		SPDK_ERRLOG("bdev modules init failed\n");
+		spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_init_failed, NULL);
+		return;
+	}
+
+	spdk_bdev_module_action_complete();
+}
+
+static void
+spdk_bdev_mgr_unregister_cb(void *io_device)
+{
+	spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
+
+	if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != g_bdev_opts.bdev_io_pool_size) {
+		SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
+			    spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
+			    g_bdev_opts.bdev_io_pool_size);
+	}
+
+	if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) {
+		SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n",
+			    spdk_mempool_count(g_bdev_mgr.buf_small_pool),
+			    BUF_SMALL_POOL_SIZE);
+		assert(false);
+	}
+
+	if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) {
+		SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n",
+			    spdk_mempool_count(g_bdev_mgr.buf_large_pool),
+			    BUF_LARGE_POOL_SIZE);
+		assert(false);
+	}
+
+	spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
+	spdk_mempool_free(g_bdev_mgr.buf_small_pool);
+	spdk_mempool_free(g_bdev_mgr.buf_large_pool);
+	spdk_dma_free(g_bdev_mgr.zero_buffer);
+
+	cb_fn(g_fini_cb_arg);
+	g_fini_cb_fn = NULL;
+	g_fini_cb_arg = NULL;
+	g_bdev_mgr.init_complete = false;
+	g_bdev_mgr.module_init_complete = false;
+}
+
+static void
+spdk_bdev_module_finish_iter(void *arg)
+{
+	struct spdk_bdev_module *bdev_module;
+
+	/* Start iterating from the last touched module */
+	if (!g_resume_bdev_module) {
+		bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
+	} else {
+		bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list,
+					 internal.tailq);
+	}
+
+	while (bdev_module) {
+		if (bdev_module->async_fini) {
+			/* Save our place so we can resume later. We must
+			 * save the variable here, before calling module_fini()
+			 * below, because in some cases the module may immediately
+			 * call spdk_bdev_module_finish_done() and re-enter
+			 * this function to continue iterating. */
+			g_resume_bdev_module = bdev_module;
+		}
+
+		if (bdev_module->module_fini) {
+			bdev_module->module_fini();
+		}
+
+		if (bdev_module->async_fini) {
+			return;
+		}
+
+		bdev_module = TAILQ_PREV(bdev_module, bdev_module_list,
+					 internal.tailq);
+	}
+
+	g_resume_bdev_module = NULL;
+	spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_mgr_unregister_cb);
+}
+
+void
+spdk_bdev_module_finish_done(void)
+{
+	if (spdk_get_thread() != g_fini_thread) {
+		spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL);
+	} else {
+		spdk_bdev_module_finish_iter(NULL);
+	}
+}
+
+static void
+_spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
+{
+	struct spdk_bdev *bdev = cb_arg;
+
+	if (bdeverrno && bdev) {
+		SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
+			     bdev->name);
+
+		/*
+		 * Since the call to spdk_bdev_unregister() failed, we have no way to free this
+		 *  bdev; try to continue by manually removing this bdev from the list and continue
+		 *  with the next bdev in the list.
+		 */
+		TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
+	}
+
+	if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Done unregistering bdevs\n");
+		/*
+		 * Bdev module finish need to be deffered as we might be in the middle of some context
+		 * (like bdev part free) that will use this bdev (or private bdev driver ctx data)
+		 * after returning.
+		 */
+		spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_module_finish_iter, NULL);
+		return;
+	}
+
+	/*
+	 * Unregister the last bdev in the list.  The last bdev in the list should be a bdev
+	 * that has no bdevs that depend on it.
+	 */
+	bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Unregistering bdev '%s'\n", bdev->name);
+	spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev);
+}
+
+void
+spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
+{
+	struct spdk_bdev_module *m;
+
+	assert(cb_fn != NULL);
+
+	g_fini_thread = spdk_get_thread();
+
+	g_fini_cb_fn = cb_fn;
+	g_fini_cb_arg = cb_arg;
+
+	TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (m->fini_start) {
+			m->fini_start();
+		}
+	}
+
+	_spdk_bdev_finish_unregister_bdevs_iter(NULL, 0);
+}
+
+static struct spdk_bdev_io *
+spdk_bdev_get_io(struct spdk_bdev_channel *channel)
+{
+	struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch;
+	struct spdk_bdev_io *bdev_io;
+
+	if (ch->per_thread_cache_count > 0) {
+		bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
+		STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
+		ch->per_thread_cache_count--;
+	} else if (spdk_unlikely(!TAILQ_EMPTY(&ch->io_wait_queue))) {
+		/*
+		 * Don't try to look for bdev_ios in the global pool if there are
+		 * waiters on bdev_ios - we don't want this caller to jump the line.
+		 */
+		bdev_io = NULL;
+	} else {
+		bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
+	}
+
+	return bdev_io;
+}
+
+void
+spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev_mgmt_channel *ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+	assert(bdev_io != NULL);
+	assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING);
+
+	if (bdev_io->internal.buf != NULL) {
+		spdk_bdev_io_put_buf(bdev_io);
+	}
+
+	if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
+		ch->per_thread_cache_count++;
+		STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
+		while (ch->per_thread_cache_count > 0 && !TAILQ_EMPTY(&ch->io_wait_queue)) {
+			struct spdk_bdev_io_wait_entry *entry;
+
+			entry = TAILQ_FIRST(&ch->io_wait_queue);
+			TAILQ_REMOVE(&ch->io_wait_queue, entry, link);
+			entry->cb_fn(entry->cb_arg);
+		}
+	} else {
+		/* We should never have a full cache with entries on the io wait queue. */
+		assert(TAILQ_EMPTY(&ch->io_wait_queue));
+		spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
+	}
+}
+
+static bool
+_spdk_bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit)
+{
+	assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
+
+	switch (limit) {
+	case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
+		return true;
+	case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
+		return false;
+	case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
+	default:
+		return false;
+	}
+}
+
+static bool
+_spdk_bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io)
+{
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_NVME_IO:
+	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static uint64_t
+_spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev	*bdev = bdev_io->bdev;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_NVME_IO:
+	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+		return bdev_io->u.nvme_passthru.nbytes;
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return bdev_io->u.bdev.num_blocks * bdev->blocklen;
+	default:
+		return 0;
+	}
+}
+
+static void
+_spdk_bdev_qos_update_per_io(struct spdk_bdev_qos *qos, uint64_t io_size_in_byte)
+{
+	int i;
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			continue;
+		}
+
+		switch (i) {
+		case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
+			qos->rate_limits[i].remaining_this_timeslice--;
+			break;
+		case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
+			qos->rate_limits[i].remaining_this_timeslice -= io_size_in_byte;
+			break;
+		case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
+		default:
+			break;
+		}
+	}
+}
+
+static void
+_spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
+{
+	struct spdk_bdev_io		*bdev_io = NULL;
+	struct spdk_bdev		*bdev = ch->bdev;
+	struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
+	int				i;
+	bool				to_limit_io;
+	uint64_t			io_size_in_byte;
+
+	while (!TAILQ_EMPTY(&qos->queued)) {
+		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+			if (qos->rate_limits[i].max_per_timeslice > 0 &&
+			    (qos->rate_limits[i].remaining_this_timeslice <= 0)) {
+				return;
+			}
+		}
+
+		bdev_io = TAILQ_FIRST(&qos->queued);
+		TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
+		ch->io_outstanding++;
+		shared_resource->io_outstanding++;
+		to_limit_io = _spdk_bdev_qos_io_to_limit(bdev_io);
+		if (to_limit_io == true) {
+			io_size_in_byte = _spdk_bdev_get_io_size_in_byte(bdev_io);
+			_spdk_bdev_qos_update_per_io(qos, io_size_in_byte);
+		}
+		bdev->fn_table->submit_request(ch->channel, bdev_io);
+	}
+}
+
+static void
+_spdk_bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb_fn)
+{
+	int rc;
+
+	bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
+	bdev_io->internal.waitq_entry.cb_fn = cb_fn;
+	bdev_io->internal.waitq_entry.cb_arg = bdev_io;
+	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
+				     &bdev_io->internal.waitq_entry);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue IO failed, rc=%d\n", rc);
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+		bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+	}
+}
+
+static bool
+_spdk_bdev_io_type_can_split(uint8_t type)
+{
+	assert(type != SPDK_BDEV_IO_TYPE_INVALID);
+	assert(type < SPDK_BDEV_NUM_IO_TYPES);
+
+	/* Only split READ and WRITE I/O.  Theoretically other types of I/O like
+	 * UNMAP could be split, but these types of I/O are typically much larger
+	 * in size (sometimes the size of the entire block device), and the bdev
+	 * module can more efficiently split these types of I/O.  Plus those types
+	 * of I/O do not have a payload, which makes the splitting process simpler.
+	 */
+	if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static bool
+_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
+{
+	uint64_t start_stripe, end_stripe;
+	uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
+
+	if (io_boundary == 0) {
+		return false;
+	}
+
+	if (!_spdk_bdev_io_type_can_split(bdev_io->type)) {
+		return false;
+	}
+
+	start_stripe = bdev_io->u.bdev.offset_blocks;
+	end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
+	/* Avoid expensive div operations if possible.  These spdk_u32 functions are very cheap. */
+	if (spdk_likely(spdk_u32_is_pow2(io_boundary))) {
+		start_stripe >>= spdk_u32log2(io_boundary);
+		end_stripe >>= spdk_u32log2(io_boundary);
+	} else {
+		start_stripe /= io_boundary;
+		end_stripe /= io_boundary;
+	}
+	return (start_stripe != end_stripe);
+}
+
+static uint32_t
+_to_next_boundary(uint64_t offset, uint32_t boundary)
+{
+	return (boundary - (offset % boundary));
+}
+
+static void
+_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+
+static void
+_spdk_bdev_io_split_with_payload(void *_bdev_io)
+{
+	struct spdk_bdev_io *bdev_io = _bdev_io;
+	uint64_t current_offset, remaining;
+	uint32_t blocklen, to_next_boundary, to_next_boundary_bytes;
+	struct iovec *parent_iov, *iov;
+	uint64_t parent_iov_offset, iov_len;
+	uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt;
+	int rc;
+
+	remaining = bdev_io->u.bdev.split_remaining_num_blocks;
+	current_offset = bdev_io->u.bdev.split_current_offset_blocks;
+	blocklen = bdev_io->bdev->blocklen;
+	parent_iov_offset = (current_offset - bdev_io->u.bdev.offset_blocks) * blocklen;
+	parent_iovcnt = bdev_io->u.bdev.iovcnt;
+
+	for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
+		parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
+		if (parent_iov_offset < parent_iov->iov_len) {
+			break;
+		}
+		parent_iov_offset -= parent_iov->iov_len;
+	}
+
+	child_iovcnt = 0;
+	while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
+		to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
+		to_next_boundary = spdk_min(remaining, to_next_boundary);
+		to_next_boundary_bytes = to_next_boundary * blocklen;
+		iov = &bdev_io->child_iov[child_iovcnt];
+		iovcnt = 0;
+		while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
+		       child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
+			parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
+			iov_len = spdk_min(to_next_boundary_bytes, parent_iov->iov_len - parent_iov_offset);
+			to_next_boundary_bytes -= iov_len;
+
+			bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
+			bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
+
+			if (iov_len < parent_iov->iov_len - parent_iov_offset) {
+				parent_iov_offset += iov_len;
+			} else {
+				parent_iovpos++;
+				parent_iov_offset = 0;
+			}
+			child_iovcnt++;
+			iovcnt++;
+		}
+
+		if (to_next_boundary_bytes > 0) {
+			/* We had to stop this child I/O early because we ran out of
+			 *  child_iov space.  Make sure the iovs collected are valid and
+			 *  then adjust to_next_boundary before starting the child I/O.
+			 */
+			if ((to_next_boundary_bytes % blocklen) != 0) {
+				SPDK_ERRLOG("Remaining %" PRIu32 " is not multiple of block size %" PRIu32 "\n",
+					    to_next_boundary_bytes, blocklen);
+				bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+				if (bdev_io->u.bdev.split_outstanding == 0) {
+					bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+				}
+				return;
+			}
+			to_next_boundary -= to_next_boundary_bytes / blocklen;
+		}
+
+		bdev_io->u.bdev.split_outstanding++;
+
+		if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+			rc = spdk_bdev_readv_blocks(bdev_io->internal.desc,
+						    spdk_io_channel_from_ctx(bdev_io->internal.ch),
+						    iov, iovcnt, current_offset, to_next_boundary,
+						    _spdk_bdev_io_split_done, bdev_io);
+		} else {
+			rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
+						     spdk_io_channel_from_ctx(bdev_io->internal.ch),
+						     iov, iovcnt, current_offset, to_next_boundary,
+						     _spdk_bdev_io_split_done, bdev_io);
+		}
+
+		if (rc == 0) {
+			current_offset += to_next_boundary;
+			remaining -= to_next_boundary;
+			bdev_io->u.bdev.split_current_offset_blocks = current_offset;
+			bdev_io->u.bdev.split_remaining_num_blocks = remaining;
+		} else {
+			bdev_io->u.bdev.split_outstanding--;
+			if (rc == -ENOMEM) {
+				if (bdev_io->u.bdev.split_outstanding == 0) {
+					/* No I/O is outstanding. Hence we should wait here. */
+					_spdk_bdev_queue_io_wait_with_cb(bdev_io,
+									 _spdk_bdev_io_split_with_payload);
+				}
+			} else {
+				bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+				if (bdev_io->u.bdev.split_outstanding == 0) {
+					bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+				}
+			}
+
+			return;
+		}
+	}
+}
+
+static void
+_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *parent_io = cb_arg;
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (!success) {
+		parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+	}
+	parent_io->u.bdev.split_outstanding--;
+	if (parent_io->u.bdev.split_outstanding != 0) {
+		return;
+	}
+
+	/*
+	 * Parent I/O finishes when all blocks are consumed or there is any failure of
+	 * child I/O and no outstanding child I/O.
+	 */
+	if (parent_io->u.bdev.split_remaining_num_blocks == 0 ||
+	    parent_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS) {
+		parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
+				       parent_io->internal.caller_ctx);
+		return;
+	}
+
+	/*
+	 * Continue with the splitting process.  This function will complete the parent I/O if the
+	 * splitting is done.
+	 */
+	_spdk_bdev_io_split_with_payload(parent_io);
+}
+
+static void
+_spdk_bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	assert(_spdk_bdev_io_type_can_split(bdev_io->type));
+
+	bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
+	bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
+	bdev_io->u.bdev.split_outstanding = 0;
+	bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	_spdk_bdev_io_split_with_payload(bdev_io);
+}
+
+static void
+_spdk_bdev_io_submit(void *ctx)
+{
+	struct spdk_bdev_io *bdev_io = ctx;
+	struct spdk_bdev *bdev = bdev_io->bdev;
+	struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+	struct spdk_io_channel *ch = bdev_ch->channel;
+	struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+	uint64_t tsc;
+
+	tsc = spdk_get_ticks();
+	bdev_io->internal.submit_tsc = tsc;
+	spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type);
+	bdev_ch->io_outstanding++;
+	shared_resource->io_outstanding++;
+	bdev_io->internal.in_submit_request = true;
+	if (spdk_likely(bdev_ch->flags == 0)) {
+		if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
+			bdev->fn_table->submit_request(ch, bdev_io);
+		} else {
+			bdev_ch->io_outstanding--;
+			shared_resource->io_outstanding--;
+			TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
+		}
+	} else if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	} else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
+		bdev_ch->io_outstanding--;
+		shared_resource->io_outstanding--;
+		TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
+		_spdk_bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
+	} else {
+		SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+	bdev_io->internal.in_submit_request = false;
+}
+
+static void
+spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev *bdev = bdev_io->bdev;
+	struct spdk_thread *thread = spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
+
+	assert(thread != NULL);
+	assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
+
+	if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
+		if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+			spdk_bdev_io_get_buf(bdev_io, _spdk_bdev_io_split,
+					     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		} else {
+			_spdk_bdev_io_split(NULL, bdev_io);
+		}
+		return;
+	}
+
+	if (bdev_io->internal.ch->flags & BDEV_CH_QOS_ENABLED) {
+		if ((thread == bdev->internal.qos->thread) || !bdev->internal.qos->thread) {
+			_spdk_bdev_io_submit(bdev_io);
+		} else {
+			bdev_io->internal.io_submit_ch = bdev_io->internal.ch;
+			bdev_io->internal.ch = bdev->internal.qos->ch;
+			spdk_thread_send_msg(bdev->internal.qos->thread, _spdk_bdev_io_submit, bdev_io);
+		}
+	} else {
+		_spdk_bdev_io_submit(bdev_io);
+	}
+}
+
+static void
+spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev *bdev = bdev_io->bdev;
+	struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+	struct spdk_io_channel *ch = bdev_ch->channel;
+
+	assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
+
+	bdev_io->internal.in_submit_request = true;
+	bdev->fn_table->submit_request(ch, bdev_io);
+	bdev_io->internal.in_submit_request = false;
+}
+
+static void
+spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
+		  struct spdk_bdev *bdev, void *cb_arg,
+		  spdk_bdev_io_completion_cb cb)
+{
+	bdev_io->bdev = bdev;
+	bdev_io->internal.caller_ctx = cb_arg;
+	bdev_io->internal.cb = cb;
+	bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
+	bdev_io->internal.in_submit_request = false;
+	bdev_io->internal.buf = NULL;
+	bdev_io->internal.io_submit_ch = NULL;
+}
+
+static bool
+_spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
+{
+	return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
+}
+
+bool
+spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
+{
+	bool supported;
+
+	supported = _spdk_bdev_io_type_supported(bdev, io_type);
+
+	if (!supported) {
+		switch (io_type) {
+		case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+			/* The bdev layer will emulate write zeroes as long as write is supported. */
+			supported = _spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return supported;
+}
+
+int
+spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	if (bdev->fn_table->dump_info_json) {
+		return bdev->fn_table->dump_info_json(bdev->ctxt, w);
+	}
+
+	return 0;
+}
+
+static void
+spdk_bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
+{
+	uint32_t max_per_timeslice = 0;
+	int i;
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			qos->rate_limits[i].max_per_timeslice = 0;
+			continue;
+		}
+
+		max_per_timeslice = qos->rate_limits[i].limit *
+				    SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC;
+
+		qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
+							qos->rate_limits[i].min_per_timeslice);
+
+		qos->rate_limits[i].remaining_this_timeslice = qos->rate_limits[i].max_per_timeslice;
+	}
+}
+
+static int
+spdk_bdev_channel_poll_qos(void *arg)
+{
+	struct spdk_bdev_qos *qos = arg;
+	uint64_t now = spdk_get_ticks();
+	int i;
+
+	if (now < (qos->last_timeslice + qos->timeslice_size)) {
+		/* We received our callback earlier than expected - return
+		 *  immediately and wait to do accounting until at least one
+		 *  timeslice has actually expired.  This should never happen
+		 *  with a well-behaved timer implementation.
+		 */
+		return 0;
+	}
+
+	/* Reset for next round of rate limiting */
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		/* We may have allowed the IOs or bytes to slightly overrun in the last
+		 * timeslice. remaining_this_timeslice is signed, so if it's negative
+		 * here, we'll account for the overrun so that the next timeslice will
+		 * be appropriately reduced.
+		 */
+		if (qos->rate_limits[i].remaining_this_timeslice > 0) {
+			qos->rate_limits[i].remaining_this_timeslice = 0;
+		}
+	}
+
+	while (now >= (qos->last_timeslice + qos->timeslice_size)) {
+		qos->last_timeslice += qos->timeslice_size;
+		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+			qos->rate_limits[i].remaining_this_timeslice +=
+				qos->rate_limits[i].max_per_timeslice;
+		}
+	}
+
+	_spdk_bdev_qos_io_submit(qos->ch, qos);
+
+	return -1;
+}
+
+static void
+_spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
+{
+	struct spdk_bdev_shared_resource *shared_resource;
+
+	if (!ch) {
+		return;
+	}
+
+	if (ch->channel) {
+		spdk_put_io_channel(ch->channel);
+	}
+
+	assert(ch->io_outstanding == 0);
+
+	shared_resource = ch->shared_resource;
+	if (shared_resource) {
+		assert(ch->io_outstanding == 0);
+		assert(shared_resource->ref > 0);
+		shared_resource->ref--;
+		if (shared_resource->ref == 0) {
+			assert(shared_resource->io_outstanding == 0);
+			TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link);
+			spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch));
+			free(shared_resource);
+		}
+	}
+}
+
+/* Caller must hold bdev->internal.mutex. */
+static void
+_spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
+{
+	struct spdk_bdev_qos	*qos = bdev->internal.qos;
+	int			i;
+
+	/* Rate limiting on this bdev enabled */
+	if (qos) {
+		if (qos->ch == NULL) {
+			struct spdk_io_channel *io_ch;
+
+			SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch,
+				      bdev->name, spdk_get_thread());
+
+			/* No qos channel has been selected, so set one up */
+
+			/* Take another reference to ch */
+			io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev));
+			qos->ch = ch;
+
+			qos->thread = spdk_io_channel_get_thread(io_ch);
+
+			TAILQ_INIT(&qos->queued);
+
+			for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+				if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+					qos->rate_limits[i].min_per_timeslice =
+						SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE;
+				} else {
+					qos->rate_limits[i].min_per_timeslice =
+						SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE;
+				}
+
+				if (qos->rate_limits[i].limit == 0) {
+					qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+				}
+			}
+			spdk_bdev_qos_update_max_quota_per_timeslice(qos);
+			qos->timeslice_size =
+				SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
+			qos->last_timeslice = spdk_get_ticks();
+			qos->poller = spdk_poller_register(spdk_bdev_channel_poll_qos,
+							   qos,
+							   SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
+		}
+
+		ch->flags |= BDEV_CH_QOS_ENABLED;
+	}
+}
+
+static int
+spdk_bdev_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev		*bdev = __bdev_from_io_dev(io_device);
+	struct spdk_bdev_channel	*ch = ctx_buf;
+	struct spdk_io_channel		*mgmt_io_ch;
+	struct spdk_bdev_mgmt_channel	*mgmt_ch;
+	struct spdk_bdev_shared_resource *shared_resource;
+
+	ch->bdev = bdev;
+	ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
+	if (!ch->channel) {
+		return -1;
+	}
+
+	mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
+	if (!mgmt_io_ch) {
+		return -1;
+	}
+
+	mgmt_ch = spdk_io_channel_get_ctx(mgmt_io_ch);
+	TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) {
+		if (shared_resource->shared_ch == ch->channel) {
+			spdk_put_io_channel(mgmt_io_ch);
+			shared_resource->ref++;
+			break;
+		}
+	}
+
+	if (shared_resource == NULL) {
+		shared_resource = calloc(1, sizeof(*shared_resource));
+		if (shared_resource == NULL) {
+			spdk_put_io_channel(mgmt_io_ch);
+			return -1;
+		}
+
+		shared_resource->mgmt_ch = mgmt_ch;
+		shared_resource->io_outstanding = 0;
+		TAILQ_INIT(&shared_resource->nomem_io);
+		shared_resource->nomem_threshold = 0;
+		shared_resource->shared_ch = ch->channel;
+		shared_resource->ref = 1;
+		TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link);
+	}
+
+	memset(&ch->stat, 0, sizeof(ch->stat));
+	ch->stat.ticks_rate = spdk_get_ticks_hz();
+	ch->io_outstanding = 0;
+	TAILQ_INIT(&ch->queued_resets);
+	ch->flags = 0;
+	ch->shared_resource = shared_resource;
+
+#ifdef SPDK_CONFIG_VTUNE
+	{
+		char *name;
+		__itt_init_ittlib(NULL, 0);
+		name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
+		if (!name) {
+			_spdk_bdev_channel_destroy_resource(ch);
+			return -1;
+		}
+		ch->handle = __itt_string_handle_create(name);
+		free(name);
+		ch->start_tsc = spdk_get_ticks();
+		ch->interval_tsc = spdk_get_ticks_hz() / 100;
+		memset(&ch->prev_stat, 0, sizeof(ch->prev_stat));
+	}
+#endif
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	_spdk_bdev_enable_qos(bdev, ch);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	return 0;
+}
+
+/*
+ * Abort I/O that are waiting on a data buffer.  These types of I/O are
+ *  linked using the spdk_bdev_io internal.buf_link TAILQ_ENTRY.
+ */
+static void
+_spdk_bdev_abort_buf_io(bdev_io_stailq_t *queue, struct spdk_bdev_channel *ch)
+{
+	bdev_io_stailq_t tmp;
+	struct spdk_bdev_io *bdev_io;
+
+	STAILQ_INIT(&tmp);
+
+	while (!STAILQ_EMPTY(queue)) {
+		bdev_io = STAILQ_FIRST(queue);
+		STAILQ_REMOVE_HEAD(queue, internal.buf_link);
+		if (bdev_io->internal.ch == ch) {
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		} else {
+			STAILQ_INSERT_TAIL(&tmp, bdev_io, internal.buf_link);
+		}
+	}
+
+	STAILQ_SWAP(&tmp, queue, spdk_bdev_io);
+}
+
+/*
+ * Abort I/O that are queued waiting for submission.  These types of I/O are
+ *  linked using the spdk_bdev_io link TAILQ_ENTRY.
+ */
+static void
+_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
+{
+	struct spdk_bdev_io *bdev_io, *tmp;
+
+	TAILQ_FOREACH_SAFE(bdev_io, queue, internal.link, tmp) {
+		if (bdev_io->internal.ch == ch) {
+			TAILQ_REMOVE(queue, bdev_io, internal.link);
+			/*
+			 * spdk_bdev_io_complete() assumes that the completed I/O had
+			 *  been submitted to the bdev module.  Since in this case it
+			 *  hadn't, bump io_outstanding to account for the decrement
+			 *  that spdk_bdev_io_complete() will do.
+			 */
+			if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
+				ch->io_outstanding++;
+				ch->shared_resource->io_outstanding++;
+			}
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+static void
+spdk_bdev_qos_channel_destroy(void *cb_arg)
+{
+	struct spdk_bdev_qos *qos = cb_arg;
+
+	spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
+	spdk_poller_unregister(&qos->poller);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Free QoS %p.\n", qos);
+
+	free(qos);
+}
+
+static int
+spdk_bdev_qos_destroy(struct spdk_bdev *bdev)
+{
+	int i;
+
+	/*
+	 * Cleanly shutting down the QoS poller is tricky, because
+	 * during the asynchronous operation the user could open
+	 * a new descriptor and create a new channel, spawning
+	 * a new QoS poller.
+	 *
+	 * The strategy is to create a new QoS structure here and swap it
+	 * in. The shutdown path then continues to refer to the old one
+	 * until it completes and then releases it.
+	 */
+	struct spdk_bdev_qos *new_qos, *old_qos;
+
+	old_qos = bdev->internal.qos;
+
+	new_qos = calloc(1, sizeof(*new_qos));
+	if (!new_qos) {
+		SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n");
+		return -ENOMEM;
+	}
+
+	/* Copy the old QoS data into the newly allocated structure */
+	memcpy(new_qos, old_qos, sizeof(*new_qos));
+
+	/* Zero out the key parts of the QoS structure */
+	new_qos->ch = NULL;
+	new_qos->thread = NULL;
+	new_qos->poller = NULL;
+	TAILQ_INIT(&new_qos->queued);
+	/*
+	 * The limit member of spdk_bdev_qos_limit structure is not zeroed.
+	 * It will be used later for the new QoS structure.
+	 */
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		new_qos->rate_limits[i].remaining_this_timeslice = 0;
+		new_qos->rate_limits[i].min_per_timeslice = 0;
+		new_qos->rate_limits[i].max_per_timeslice = 0;
+	}
+
+	bdev->internal.qos = new_qos;
+
+	if (old_qos->thread == NULL) {
+		free(old_qos);
+	} else {
+		spdk_thread_send_msg(old_qos->thread, spdk_bdev_qos_channel_destroy,
+				     old_qos);
+	}
+
+	/* It is safe to continue with destroying the bdev even though the QoS channel hasn't
+	 * been destroyed yet. The destruction path will end up waiting for the final
+	 * channel to be put before it releases resources. */
+
+	return 0;
+}
+
+static void
+_spdk_bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
+{
+	total->bytes_read += add->bytes_read;
+	total->num_read_ops += add->num_read_ops;
+	total->bytes_written += add->bytes_written;
+	total->num_write_ops += add->num_write_ops;
+	total->read_latency_ticks += add->read_latency_ticks;
+	total->write_latency_ticks += add->write_latency_ticks;
+}
+
+static void
+spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev_channel	*ch = ctx_buf;
+	struct spdk_bdev_mgmt_channel	*mgmt_ch;
+	struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
+		      spdk_get_thread());
+
+	/* This channel is going away, so add its statistics into the bdev so that they don't get lost. */
+	pthread_mutex_lock(&ch->bdev->internal.mutex);
+	_spdk_bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat);
+	pthread_mutex_unlock(&ch->bdev->internal.mutex);
+
+	mgmt_ch = shared_resource->mgmt_ch;
+
+	_spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
+	_spdk_bdev_abort_queued_io(&shared_resource->nomem_io, ch);
+	_spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_small, ch);
+	_spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_large, ch);
+
+	_spdk_bdev_channel_destroy_resource(ch);
+}
+
+int
+spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
+{
+	struct spdk_bdev_alias *tmp;
+
+	if (alias == NULL) {
+		SPDK_ERRLOG("Empty alias passed\n");
+		return -EINVAL;
+	}
+
+	if (spdk_bdev_get_by_name(alias)) {
+		SPDK_ERRLOG("Bdev name/alias: %s already exists\n", alias);
+		return -EEXIST;
+	}
+
+	tmp = calloc(1, sizeof(*tmp));
+	if (tmp == NULL) {
+		SPDK_ERRLOG("Unable to allocate alias\n");
+		return -ENOMEM;
+	}
+
+	tmp->alias = strdup(alias);
+	if (tmp->alias == NULL) {
+		free(tmp);
+		SPDK_ERRLOG("Unable to allocate alias\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
+
+	return 0;
+}
+
+int
+spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
+{
+	struct spdk_bdev_alias *tmp;
+
+	TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
+		if (strcmp(alias, tmp->alias) == 0) {
+			TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
+			free(tmp->alias);
+			free(tmp);
+			return 0;
+		}
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_BDEV, "Alias %s does not exists\n", alias);
+
+	return -ENOENT;
+}
+
+void
+spdk_bdev_alias_del_all(struct spdk_bdev *bdev)
+{
+	struct spdk_bdev_alias *p, *tmp;
+
+	TAILQ_FOREACH_SAFE(p, &bdev->aliases, tailq, tmp) {
+		TAILQ_REMOVE(&bdev->aliases, p, tailq);
+		free(p->alias);
+		free(p);
+	}
+}
+
+struct spdk_io_channel *
+spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
+{
+	return spdk_get_io_channel(__bdev_to_io_dev(desc->bdev));
+}
+
+const char *
+spdk_bdev_get_name(const struct spdk_bdev *bdev)
+{
+	return bdev->name;
+}
+
+const char *
+spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
+{
+	return bdev->product_name;
+}
+
+const struct spdk_bdev_aliases_list *
+spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
+{
+	return &bdev->aliases;
+}
+
+uint32_t
+spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
+{
+	return bdev->blocklen;
+}
+
+uint64_t
+spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
+{
+	return bdev->blockcnt;
+}
+
+const char *
+spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type)
+{
+	return qos_rpc_type[type];
+}
+
+void
+spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
+{
+	int i;
+
+	memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	if (bdev->internal.qos) {
+		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+			if (bdev->internal.qos->rate_limits[i].limit !=
+			    SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+				limits[i] = bdev->internal.qos->rate_limits[i].limit;
+				if (_spdk_bdev_qos_is_iops_rate_limit(i) == false) {
+					/* Change from Byte to Megabyte which is user visible. */
+					limits[i] = limits[i] / 1024 / 1024;
+				}
+			}
+		}
+	}
+	pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+size_t
+spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
+{
+	/* TODO: push this logic down to the bdev modules */
+	if (bdev->need_aligned_buffer) {
+		return bdev->blocklen;
+	}
+
+	return 1;
+}
+
+uint32_t
+spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
+{
+	return bdev->optimal_io_boundary;
+}
+
+bool
+spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
+{
+	return bdev->write_cache;
+}
+
+const struct spdk_uuid *
+spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
+{
+	return &bdev->uuid;
+}
+
+uint64_t
+spdk_bdev_get_qd(const struct spdk_bdev *bdev)
+{
+	return bdev->internal.measured_queue_depth;
+}
+
+uint64_t
+spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev)
+{
+	return bdev->internal.period;
+}
+
+uint64_t
+spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev)
+{
+	return bdev->internal.weighted_io_time;
+}
+
+uint64_t
+spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
+{
+	return bdev->internal.io_time;
+}
+
+static void
+_calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
+
+	bdev->internal.measured_queue_depth = bdev->internal.temporary_queue_depth;
+
+	if (bdev->internal.measured_queue_depth) {
+		bdev->internal.io_time += bdev->internal.period;
+		bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
+	}
+}
+
+static void
+_calculate_measured_qd(struct spdk_io_channel_iter *i)
+{
+	struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch);
+
+	bdev->internal.temporary_queue_depth += ch->io_outstanding;
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static int
+spdk_bdev_calculate_measured_queue_depth(void *ctx)
+{
+	struct spdk_bdev *bdev = ctx;
+	bdev->internal.temporary_queue_depth = 0;
+	spdk_for_each_channel(__bdev_to_io_dev(bdev), _calculate_measured_qd, bdev,
+			      _calculate_measured_qd_cpl);
+	return 0;
+}
+
+void
+spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
+{
+	bdev->internal.period = period;
+
+	if (bdev->internal.qd_poller != NULL) {
+		spdk_poller_unregister(&bdev->internal.qd_poller);
+		bdev->internal.measured_queue_depth = UINT64_MAX;
+	}
+
+	if (period != 0) {
+		bdev->internal.qd_poller = spdk_poller_register(spdk_bdev_calculate_measured_queue_depth, bdev,
+					   period);
+	}
+}
+
+int
+spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
+{
+	int ret;
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+
+	/* bdev has open descriptors */
+	if (!TAILQ_EMPTY(&bdev->internal.open_descs) &&
+	    bdev->blockcnt > size) {
+		ret = -EBUSY;
+	} else {
+		bdev->blockcnt = size;
+		ret = 0;
+	}
+
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	return ret;
+}
+
+/*
+ * Convert I/O offset and length from bytes to blocks.
+ *
+ * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
+ */
+static uint64_t
+spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks,
+			  uint64_t num_bytes, uint64_t *num_blocks)
+{
+	uint32_t block_size = bdev->blocklen;
+
+	*offset_blocks = offset_bytes / block_size;
+	*num_blocks = num_bytes / block_size;
+
+	return (offset_bytes % block_size) | (num_bytes % block_size);
+}
+
+static bool
+spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
+{
+	/* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
+	 * has been an overflow and hence the offset has been wrapped around */
+	if (offset_blocks + num_blocks < offset_blocks) {
+		return false;
+	}
+
+	/* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
+	if (offset_blocks + num_blocks > bdev->blockcnt) {
+		return false;
+	}
+
+	return true;
+}
+
+int
+spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+	       void *buf, uint64_t offset, uint64_t nbytes,
+	       spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		      void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+		      spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
+	bdev_io->u.bdev.iovs = &bdev_io->iov;
+	bdev_io->u.bdev.iovs[0].iov_base = buf;
+	bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
+	bdev_io->u.bdev.iovcnt = 1;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		struct iovec *iov, int iovcnt,
+		uint64_t offset, uint64_t nbytes,
+		spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			   struct iovec *iov, int iovcnt,
+			   uint64_t offset_blocks, uint64_t num_blocks,
+			   spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
+	bdev_io->u.bdev.iovs = iov;
+	bdev_io->u.bdev.iovcnt = iovcnt;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		void *buf, uint64_t offset, uint64_t nbytes,
+		spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		       void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+		       spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
+	bdev_io->u.bdev.iovs = &bdev_io->iov;
+	bdev_io->u.bdev.iovs[0].iov_base = buf;
+	bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
+	bdev_io->u.bdev.iovcnt = 1;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		 struct iovec *iov, int iovcnt,
+		 uint64_t offset, uint64_t len,
+		 spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			struct iovec *iov, int iovcnt,
+			uint64_t offset_blocks, uint64_t num_blocks,
+			spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
+	bdev_io->u.bdev.iovs = iov;
+	bdev_io->u.bdev.iovcnt = iovcnt;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		       uint64_t offset, uint64_t len,
+		       spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			      uint64_t offset_blocks, uint64_t num_blocks,
+			      spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
+		spdk_bdev_io_submit(bdev_io);
+		return 0;
+	} else if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE)) {
+		assert(spdk_bdev_get_block_size(bdev) <= ZERO_BUFFER_SIZE);
+		bdev_io->u.bdev.split_remaining_num_blocks = num_blocks;
+		bdev_io->u.bdev.split_current_offset_blocks = offset_blocks;
+		_spdk_bdev_write_zero_buffer_next(bdev_io);
+		return 0;
+	} else {
+		spdk_bdev_free_io(bdev_io);
+		return -ENOTSUP;
+	}
+}
+
+int
+spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		uint64_t offset, uint64_t nbytes,
+		spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		       uint64_t offset_blocks, uint64_t num_blocks,
+		       spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	if (num_blocks == 0) {
+		SPDK_ERRLOG("Can't unmap 0 bytes\n");
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
+
+	bdev_io->u.bdev.iovs = &bdev_io->iov;
+	bdev_io->u.bdev.iovs[0].iov_base = NULL;
+	bdev_io->u.bdev.iovs[0].iov_len = 0;
+	bdev_io->u.bdev.iovcnt = 1;
+
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		uint64_t offset, uint64_t length,
+		spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	uint64_t offset_blocks, num_blocks;
+
+	if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) {
+		return -EINVAL;
+	}
+
+	return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		       uint64_t offset_blocks, uint64_t num_blocks,
+		       spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+		return -EINVAL;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
+	bdev_io->u.bdev.iovs = NULL;
+	bdev_io->u.bdev.iovcnt = 0;
+	bdev_io->u.bdev.offset_blocks = offset_blocks;
+	bdev_io->u.bdev.num_blocks = num_blocks;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+static void
+_spdk_bdev_reset_dev(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_bdev_channel *ch = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_bdev_io *bdev_io;
+
+	bdev_io = TAILQ_FIRST(&ch->queued_resets);
+	TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
+	spdk_bdev_io_submit_reset(bdev_io);
+}
+
+static void
+_spdk_bdev_reset_freeze_channel(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel		*ch;
+	struct spdk_bdev_channel	*channel;
+	struct spdk_bdev_mgmt_channel	*mgmt_channel;
+	struct spdk_bdev_shared_resource *shared_resource;
+	bdev_io_tailq_t			tmp_queued;
+
+	TAILQ_INIT(&tmp_queued);
+
+	ch = spdk_io_channel_iter_get_channel(i);
+	channel = spdk_io_channel_get_ctx(ch);
+	shared_resource = channel->shared_resource;
+	mgmt_channel = shared_resource->mgmt_ch;
+
+	channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
+
+	if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
+		/* The QoS object is always valid and readable while
+		 * the channel flag is set, so the lock here should not
+		 * be necessary. We're not in the fast path though, so
+		 * just take it anyway. */
+		pthread_mutex_lock(&channel->bdev->internal.mutex);
+		if (channel->bdev->internal.qos->ch == channel) {
+			TAILQ_SWAP(&channel->bdev->internal.qos->queued, &tmp_queued, spdk_bdev_io, internal.link);
+		}
+		pthread_mutex_unlock(&channel->bdev->internal.mutex);
+	}
+
+	_spdk_bdev_abort_queued_io(&shared_resource->nomem_io, channel);
+	_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
+	_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
+	_spdk_bdev_abort_queued_io(&tmp_queued, channel);
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_start_reset(void *ctx)
+{
+	struct spdk_bdev_channel *ch = ctx;
+
+	spdk_for_each_channel(__bdev_to_io_dev(ch->bdev), _spdk_bdev_reset_freeze_channel,
+			      ch, _spdk_bdev_reset_dev);
+}
+
+static void
+_spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
+{
+	struct spdk_bdev *bdev = ch->bdev;
+
+	assert(!TAILQ_EMPTY(&ch->queued_resets));
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	if (bdev->internal.reset_in_progress == NULL) {
+		bdev->internal.reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
+		/*
+		 * Take a channel reference for the target bdev for the life of this
+		 *  reset.  This guards against the channel getting destroyed while
+		 *  spdk_for_each_channel() calls related to this reset IO are in
+		 *  progress.  We will release the reference when this reset is
+		 *  completed.
+		 */
+		bdev->internal.reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
+		_spdk_bdev_start_reset(ch);
+	}
+	pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+int
+spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+		spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
+	bdev_io->u.reset.ch_ref = NULL;
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, internal.link);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	_spdk_bdev_channel_start_reset(channel);
+
+	return 0;
+}
+
+void
+spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+		      struct spdk_bdev_io_stat *stat)
+{
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	*stat = channel->stat;
+}
+
+static void
+_spdk_bdev_get_device_stat_done(struct spdk_io_channel_iter *i, int status)
+{
+	void *io_device = spdk_io_channel_iter_get_io_device(i);
+	struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
+
+	bdev_iostat_ctx->cb(__bdev_from_io_dev(io_device), bdev_iostat_ctx->stat,
+			    bdev_iostat_ctx->cb_arg, 0);
+	free(bdev_iostat_ctx);
+}
+
+static void
+_spdk_bdev_get_each_channel_stat(struct spdk_io_channel_iter *i)
+{
+	struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	_spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &channel->stat);
+	spdk_for_each_channel_continue(i, 0);
+}
+
+void
+spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
+			  spdk_bdev_get_device_stat_cb cb, void *cb_arg)
+{
+	struct spdk_bdev_iostat_ctx *bdev_iostat_ctx;
+
+	assert(bdev != NULL);
+	assert(stat != NULL);
+	assert(cb != NULL);
+
+	bdev_iostat_ctx = calloc(1, sizeof(struct spdk_bdev_iostat_ctx));
+	if (bdev_iostat_ctx == NULL) {
+		SPDK_ERRLOG("Unable to allocate memory for spdk_bdev_iostat_ctx\n");
+		cb(bdev, stat, cb_arg, -ENOMEM);
+		return;
+	}
+
+	bdev_iostat_ctx->stat = stat;
+	bdev_iostat_ctx->cb = cb;
+	bdev_iostat_ctx->cb_arg = cb_arg;
+
+	/* Start with the statistics from previously deleted channels. */
+	pthread_mutex_lock(&bdev->internal.mutex);
+	_spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &bdev->internal.stat);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	/* Then iterate and add the statistics from each existing channel. */
+	spdk_for_each_channel(__bdev_to_io_dev(bdev),
+			      _spdk_bdev_get_each_channel_stat,
+			      bdev_iostat_ctx,
+			      _spdk_bdev_get_device_stat_done);
+}
+
+int
+spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			      const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
+			      spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		return -EBADF;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
+	bdev_io->u.nvme_passthru.cmd = *cmd;
+	bdev_io->u.nvme_passthru.buf = buf;
+	bdev_io->u.nvme_passthru.nbytes = nbytes;
+	bdev_io->u.nvme_passthru.md_buf = NULL;
+	bdev_io->u.nvme_passthru.md_len = 0;
+
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			   const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
+			   spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		/*
+		 * Do not try to parse the NVMe command - we could maybe use bits in the opcode
+		 *  to easily determine if the command is a read or write, but for now just
+		 *  do not allow io_passthru with a read-only descriptor.
+		 */
+		return -EBADF;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
+	bdev_io->u.nvme_passthru.cmd = *cmd;
+	bdev_io->u.nvme_passthru.buf = buf;
+	bdev_io->u.nvme_passthru.nbytes = nbytes;
+	bdev_io->u.nvme_passthru.md_buf = NULL;
+	bdev_io->u.nvme_passthru.md_len = 0;
+
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+			      const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
+			      spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+	if (!desc->write) {
+		/*
+		 * Do not try to parse the NVMe command - we could maybe use bits in the opcode
+		 *  to easily determine if the command is a read or write, but for now just
+		 *  do not allow io_passthru with a read-only descriptor.
+		 */
+		return -EBADF;
+	}
+
+	bdev_io = spdk_bdev_get_io(channel);
+	if (!bdev_io) {
+		return -ENOMEM;
+	}
+
+	bdev_io->internal.ch = channel;
+	bdev_io->internal.desc = desc;
+	bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
+	bdev_io->u.nvme_passthru.cmd = *cmd;
+	bdev_io->u.nvme_passthru.buf = buf;
+	bdev_io->u.nvme_passthru.nbytes = nbytes;
+	bdev_io->u.nvme_passthru.md_buf = md_buf;
+	bdev_io->u.nvme_passthru.md_len = md_len;
+
+	spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+	spdk_bdev_io_submit(bdev_io);
+	return 0;
+}
+
+int
+spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+			struct spdk_bdev_io_wait_entry *entry)
+{
+	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+	struct spdk_bdev_mgmt_channel *mgmt_ch = channel->shared_resource->mgmt_ch;
+
+	if (bdev != entry->bdev) {
+		SPDK_ERRLOG("bdevs do not match\n");
+		return -EINVAL;
+	}
+
+	if (mgmt_ch->per_thread_cache_count > 0) {
+		SPDK_ERRLOG("Cannot queue io_wait if spdk_bdev_io available in per-thread cache\n");
+		return -EINVAL;
+	}
+
+	TAILQ_INSERT_TAIL(&mgmt_ch->io_wait_queue, entry, link);
+	return 0;
+}
+
+static void
+_spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
+{
+	struct spdk_bdev *bdev = bdev_ch->bdev;
+	struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+	struct spdk_bdev_io *bdev_io;
+
+	if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
+		/*
+		 * Allow some more I/O to complete before retrying the nomem_io queue.
+		 *  Some drivers (such as nvme) cannot immediately take a new I/O in
+		 *  the context of a completion, because the resources for the I/O are
+		 *  not released until control returns to the bdev poller.  Also, we
+		 *  may require several small I/O to complete before a larger I/O
+		 *  (that requires splitting) can be submitted.
+		 */
+		return;
+	}
+
+	while (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
+		bdev_io = TAILQ_FIRST(&shared_resource->nomem_io);
+		TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, internal.link);
+		bdev_io->internal.ch->io_outstanding++;
+		shared_resource->io_outstanding++;
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
+		bdev->fn_table->submit_request(bdev_io->internal.ch->channel, bdev_io);
+		if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
+			break;
+		}
+	}
+}
+
+static inline void
+_spdk_bdev_io_complete(void *ctx)
+{
+	struct spdk_bdev_io *bdev_io = ctx;
+	uint64_t tsc;
+
+	if (spdk_unlikely(bdev_io->internal.in_submit_request || bdev_io->internal.io_submit_ch)) {
+		/*
+		 * Send the completion to the thread that originally submitted the I/O,
+		 * which may not be the current thread in the case of QoS.
+		 */
+		if (bdev_io->internal.io_submit_ch) {
+			bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
+			bdev_io->internal.io_submit_ch = NULL;
+		}
+
+		/*
+		 * Defer completion to avoid potential infinite recursion if the
+		 * user's completion callback issues a new I/O.
+		 */
+		spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->internal.ch->channel),
+				     _spdk_bdev_io_complete, bdev_io);
+		return;
+	}
+
+	tsc = spdk_get_ticks();
+	spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)bdev_io, 0);
+
+	if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+		switch (bdev_io->type) {
+		case SPDK_BDEV_IO_TYPE_READ:
+			bdev_io->internal.ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+			bdev_io->internal.ch->stat.num_read_ops++;
+			bdev_io->internal.ch->stat.read_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+			break;
+		case SPDK_BDEV_IO_TYPE_WRITE:
+			bdev_io->internal.ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+			bdev_io->internal.ch->stat.num_write_ops++;
+			bdev_io->internal.ch->stat.write_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+			break;
+		default:
+			break;
+		}
+	}
+
+#ifdef SPDK_CONFIG_VTUNE
+	uint64_t now_tsc = spdk_get_ticks();
+	if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
+		uint64_t data[5];
+
+		data[0] = bdev_io->internal.ch->stat.num_read_ops - bdev_io->internal.ch->prev_stat.num_read_ops;
+		data[1] = bdev_io->internal.ch->stat.bytes_read - bdev_io->internal.ch->prev_stat.bytes_read;
+		data[2] = bdev_io->internal.ch->stat.num_write_ops - bdev_io->internal.ch->prev_stat.num_write_ops;
+		data[3] = bdev_io->internal.ch->stat.bytes_written - bdev_io->internal.ch->prev_stat.bytes_written;
+		data[4] = bdev_io->bdev->fn_table->get_spin_time ?
+			  bdev_io->bdev->fn_table->get_spin_time(bdev_io->internal.ch->channel) : 0;
+
+		__itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->internal.ch->handle,
+				   __itt_metadata_u64, 5, data);
+
+		bdev_io->internal.ch->prev_stat = bdev_io->internal.ch->stat;
+		bdev_io->internal.ch->start_tsc = now_tsc;
+	}
+#endif
+
+	assert(bdev_io->internal.cb != NULL);
+	assert(spdk_get_thread() == spdk_io_channel_get_thread(bdev_io->internal.ch->channel));
+
+	bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
+			     bdev_io->internal.caller_ctx);
+}
+
+static void
+_spdk_bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
+
+	if (bdev_io->u.reset.ch_ref != NULL) {
+		spdk_put_io_channel(bdev_io->u.reset.ch_ref);
+		bdev_io->u.reset.ch_ref = NULL;
+	}
+
+	_spdk_bdev_io_complete(bdev_io);
+}
+
+static void
+_spdk_bdev_unfreeze_channel(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
+
+	ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
+	if (!TAILQ_EMPTY(&ch->queued_resets)) {
+		_spdk_bdev_channel_start_reset(ch);
+	}
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+void
+spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
+{
+	struct spdk_bdev *bdev = bdev_io->bdev;
+	struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+	struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+
+	bdev_io->internal.status = status;
+
+	if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
+		bool unlock_channels = false;
+
+		if (status == SPDK_BDEV_IO_STATUS_NOMEM) {
+			SPDK_ERRLOG("NOMEM returned for reset\n");
+		}
+		pthread_mutex_lock(&bdev->internal.mutex);
+		if (bdev_io == bdev->internal.reset_in_progress) {
+			bdev->internal.reset_in_progress = NULL;
+			unlock_channels = true;
+		}
+		pthread_mutex_unlock(&bdev->internal.mutex);
+
+		if (unlock_channels) {
+			spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_unfreeze_channel,
+					      bdev_io, _spdk_bdev_reset_complete);
+			return;
+		}
+	} else {
+		assert(bdev_ch->io_outstanding > 0);
+		assert(shared_resource->io_outstanding > 0);
+		bdev_ch->io_outstanding--;
+		shared_resource->io_outstanding--;
+
+		if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) {
+			TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
+			/*
+			 * Wait for some of the outstanding I/O to complete before we
+			 *  retry any of the nomem_io.  Normally we will wait for
+			 *  NOMEM_THRESHOLD_COUNT I/O to complete but for low queue
+			 *  depth channels we will instead wait for half to complete.
+			 */
+			shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2,
+							   (int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT);
+			return;
+		}
+
+		if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
+			_spdk_bdev_ch_retry_io(bdev_ch);
+		}
+	}
+
+	_spdk_bdev_io_complete(bdev_io);
+}
+
+void
+spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
+				  enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
+{
+	if (sc == SPDK_SCSI_STATUS_GOOD) {
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	} else {
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
+		bdev_io->internal.error.scsi.sc = sc;
+		bdev_io->internal.error.scsi.sk = sk;
+		bdev_io->internal.error.scsi.asc = asc;
+		bdev_io->internal.error.scsi.ascq = ascq;
+	}
+
+	spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
+}
+
+void
+spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
+			     int *sc, int *sk, int *asc, int *ascq)
+{
+	assert(sc != NULL);
+	assert(sk != NULL);
+	assert(asc != NULL);
+	assert(ascq != NULL);
+
+	switch (bdev_io->internal.status) {
+	case SPDK_BDEV_IO_STATUS_SUCCESS:
+		*sc = SPDK_SCSI_STATUS_GOOD;
+		*sk = SPDK_SCSI_SENSE_NO_SENSE;
+		*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+		*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+		break;
+	case SPDK_BDEV_IO_STATUS_NVME_ERROR:
+		spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
+		break;
+	case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
+		*sc = bdev_io->internal.error.scsi.sc;
+		*sk = bdev_io->internal.error.scsi.sk;
+		*asc = bdev_io->internal.error.scsi.asc;
+		*ascq = bdev_io->internal.error.scsi.ascq;
+		break;
+	default:
+		*sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+		*sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+		*asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+		*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+		break;
+	}
+}
+
+void
+spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc)
+{
+	if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) {
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	} else {
+		bdev_io->internal.error.nvme.sct = sct;
+		bdev_io->internal.error.nvme.sc = sc;
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
+	}
+
+	spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
+}
+
+void
+spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc)
+{
+	assert(sct != NULL);
+	assert(sc != NULL);
+
+	if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
+		*sct = bdev_io->internal.error.nvme.sct;
+		*sc = bdev_io->internal.error.nvme.sc;
+	} else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+		*sct = SPDK_NVME_SCT_GENERIC;
+		*sc = SPDK_NVME_SC_SUCCESS;
+	} else {
+		*sct = SPDK_NVME_SCT_GENERIC;
+		*sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+	}
+}
+
+struct spdk_thread *
+spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
+{
+	return spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
+}
+
+static void
+_spdk_bdev_qos_config_limit(struct spdk_bdev *bdev, uint64_t *limits)
+{
+	uint64_t	min_qos_set;
+	int		i;
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			break;
+		}
+	}
+
+	if (i == SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
+		SPDK_ERRLOG("Invalid rate limits set.\n");
+		return;
+	}
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			continue;
+		}
+
+		if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+			min_qos_set = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
+		} else {
+			min_qos_set = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
+		}
+
+		if (limits[i] == 0 || limits[i] % min_qos_set) {
+			SPDK_ERRLOG("Assigned limit %" PRIu64 " on bdev %s is not multiple of %" PRIu64 "\n",
+				    limits[i], bdev->name, min_qos_set);
+			SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name);
+			return;
+		}
+	}
+
+	if (!bdev->internal.qos) {
+		bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
+		if (!bdev->internal.qos) {
+			SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
+			return;
+		}
+	}
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		bdev->internal.qos->rate_limits[i].limit = limits[i];
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS type:%d set:%lu\n",
+			      bdev->name, i, limits[i]);
+	}
+
+	return;
+}
+
+static void
+_spdk_bdev_qos_config(struct spdk_bdev *bdev)
+{
+	struct spdk_conf_section	*sp = NULL;
+	const char			*val = NULL;
+	int				i = 0, j = 0;
+	uint64_t			limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {};
+	bool				config_qos = false;
+
+	sp = spdk_conf_find_section(NULL, "QoS");
+	if (!sp) {
+		return;
+	}
+
+	while (j < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
+		limits[j] = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+
+		i = 0;
+		while (true) {
+			val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 0);
+			if (!val) {
+				break;
+			}
+
+			if (strcmp(bdev->name, val) != 0) {
+				i++;
+				continue;
+			}
+
+			val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 1);
+			if (val) {
+				if (_spdk_bdev_qos_is_iops_rate_limit(j) == true) {
+					limits[j] = strtoull(val, NULL, 10);
+				} else {
+					limits[j] = strtoull(val, NULL, 10) * 1024 * 1024;
+				}
+				config_qos = true;
+			}
+
+			break;
+		}
+
+		j++;
+	}
+
+	if (config_qos == true) {
+		_spdk_bdev_qos_config_limit(bdev, limits);
+	}
+
+	return;
+}
+
+static int
+spdk_bdev_init(struct spdk_bdev *bdev)
+{
+	char *bdev_name;
+
+	assert(bdev->module != NULL);
+
+	if (!bdev->name) {
+		SPDK_ERRLOG("Bdev name is NULL\n");
+		return -EINVAL;
+	}
+
+	if (spdk_bdev_get_by_name(bdev->name)) {
+		SPDK_ERRLOG("Bdev name:%s already exists\n", bdev->name);
+		return -EEXIST;
+	}
+
+	/* Users often register their own I/O devices using the bdev name. In
+	 * order to avoid conflicts, prepend bdev_. */
+	bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);
+	if (!bdev_name) {
+		SPDK_ERRLOG("Unable to allocate memory for internal bdev name.\n");
+		return -ENOMEM;
+	}
+
+	bdev->internal.status = SPDK_BDEV_STATUS_READY;
+	bdev->internal.measured_queue_depth = UINT64_MAX;
+	bdev->internal.claim_module = NULL;
+	bdev->internal.qd_poller = NULL;
+	bdev->internal.qos = NULL;
+
+	TAILQ_INIT(&bdev->internal.open_descs);
+
+	TAILQ_INIT(&bdev->aliases);
+
+	bdev->internal.reset_in_progress = NULL;
+
+	_spdk_bdev_qos_config(bdev);
+
+	spdk_io_device_register(__bdev_to_io_dev(bdev),
+				spdk_bdev_channel_create, spdk_bdev_channel_destroy,
+				sizeof(struct spdk_bdev_channel),
+				bdev_name);
+
+	free(bdev_name);
+
+	pthread_mutex_init(&bdev->internal.mutex, NULL);
+	return 0;
+}
+
+static void
+spdk_bdev_destroy_cb(void *io_device)
+{
+	int			rc;
+	struct spdk_bdev	*bdev;
+	spdk_bdev_unregister_cb	cb_fn;
+	void			*cb_arg;
+
+	bdev = __bdev_from_io_dev(io_device);
+	cb_fn = bdev->internal.unregister_cb;
+	cb_arg = bdev->internal.unregister_ctx;
+
+	rc = bdev->fn_table->destruct(bdev->ctxt);
+	if (rc < 0) {
+		SPDK_ERRLOG("destruct failed\n");
+	}
+	if (rc <= 0 && cb_fn != NULL) {
+		cb_fn(cb_arg, rc);
+	}
+}
+
+
+static void
+spdk_bdev_fini(struct spdk_bdev *bdev)
+{
+	pthread_mutex_destroy(&bdev->internal.mutex);
+
+	free(bdev->internal.qos);
+
+	spdk_io_device_unregister(__bdev_to_io_dev(bdev), spdk_bdev_destroy_cb);
+}
+
+static void
+spdk_bdev_start(struct spdk_bdev *bdev)
+{
+	struct spdk_bdev_module *module;
+	uint32_t action;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
+	TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
+
+	/* Examine configuration before initializing I/O */
+	TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (module->examine_config) {
+			action = module->internal.action_in_progress;
+			module->internal.action_in_progress++;
+			module->examine_config(bdev);
+			if (action != module->internal.action_in_progress) {
+				SPDK_ERRLOG("examine_config for module %s did not call spdk_bdev_module_examine_done()\n",
+					    module->name);
+			}
+		}
+	}
+
+	if (bdev->internal.claim_module) {
+		return;
+	}
+
+	TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (module->examine_disk) {
+			module->internal.action_in_progress++;
+			module->examine_disk(bdev);
+		}
+	}
+}
+
+int
+spdk_bdev_register(struct spdk_bdev *bdev)
+{
+	int rc = spdk_bdev_init(bdev);
+
+	if (rc == 0) {
+		spdk_bdev_start(bdev);
+	}
+
+	return rc;
+}
+
+int
+spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
+{
+	int rc;
+
+	rc = spdk_bdev_init(vbdev);
+	if (rc) {
+		return rc;
+	}
+
+	spdk_bdev_start(vbdev);
+	return 0;
+}
+
+void
+spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
+{
+	if (bdev->internal.unregister_cb != NULL) {
+		bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
+	}
+}
+
+static void
+_remove_notify(void *arg)
+{
+	struct spdk_bdev_desc *desc = arg;
+
+	desc->remove_scheduled = false;
+
+	if (desc->closed) {
+		free(desc);
+	} else {
+		desc->remove_cb(desc->remove_ctx);
+	}
+}
+
+void
+spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
+{
+	struct spdk_bdev_desc	*desc, *tmp;
+	bool			do_destruct = true;
+	struct spdk_thread	*thread;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);
+
+	thread = spdk_get_thread();
+	if (!thread) {
+		/* The user called this from a non-SPDK thread. */
+		if (cb_fn != NULL) {
+			cb_fn(cb_arg, -ENOTSUP);
+		}
+		return;
+	}
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+
+	bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
+	bdev->internal.unregister_cb = cb_fn;
+	bdev->internal.unregister_ctx = cb_arg;
+
+	TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
+		if (desc->remove_cb) {
+			do_destruct = false;
+			/*
+			 * Defer invocation of the remove_cb to a separate message that will
+			 *  run later on its thread.  This ensures this context unwinds and
+			 *  we don't recursively unregister this bdev again if the remove_cb
+			 *  immediately closes its descriptor.
+			 */
+			if (!desc->remove_scheduled) {
+				/* Avoid scheduling removal of the same descriptor multiple times. */
+				desc->remove_scheduled = true;
+				spdk_thread_send_msg(desc->thread, _remove_notify, desc);
+			}
+		}
+	}
+
+	if (!do_destruct) {
+		pthread_mutex_unlock(&bdev->internal.mutex);
+		return;
+	}
+
+	TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	spdk_bdev_fini(bdev);
+}
+
+int
+spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
+	       void *remove_ctx, struct spdk_bdev_desc **_desc)
+{
+	struct spdk_bdev_desc *desc;
+	struct spdk_thread *thread;
+
+	thread = spdk_get_thread();
+	if (!thread) {
+		SPDK_ERRLOG("Cannot open bdev from non-SPDK thread.\n");
+		return -ENOTSUP;
+	}
+
+	desc = calloc(1, sizeof(*desc));
+	if (desc == NULL) {
+		SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
+		return -ENOMEM;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
+		      spdk_get_thread());
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+
+	if (write && bdev->internal.claim_module) {
+		SPDK_ERRLOG("Could not open %s - %s module already claimed it\n",
+			    bdev->name, bdev->internal.claim_module->name);
+		free(desc);
+		pthread_mutex_unlock(&bdev->internal.mutex);
+		return -EPERM;
+	}
+
+	TAILQ_INSERT_TAIL(&bdev->internal.open_descs, desc, link);
+
+	desc->bdev = bdev;
+	desc->thread = thread;
+	desc->remove_cb = remove_cb;
+	desc->remove_ctx = remove_ctx;
+	desc->write = write;
+	*_desc = desc;
+
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	return 0;
+}
+
+void
+spdk_bdev_close(struct spdk_bdev_desc *desc)
+{
+	struct spdk_bdev *bdev = desc->bdev;
+	bool do_unregister = false;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
+		      spdk_get_thread());
+
+	assert(desc->thread == spdk_get_thread());
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+
+	TAILQ_REMOVE(&bdev->internal.open_descs, desc, link);
+
+	desc->closed = true;
+
+	if (!desc->remove_scheduled) {
+		free(desc);
+	}
+
+	/* If no more descriptors, kill QoS channel */
+	if (bdev->internal.qos && TAILQ_EMPTY(&bdev->internal.open_descs)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n",
+			      bdev->name, spdk_get_thread());
+
+		if (spdk_bdev_qos_destroy(bdev)) {
+			/* There isn't anything we can do to recover here. Just let the
+			 * old QoS poller keep running. The QoS handling won't change
+			 * cores when the user allocates a new channel, but it won't break. */
+			SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n");
+		}
+	}
+
+	spdk_bdev_set_qd_sampling_period(bdev, 0);
+
+	if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
+		do_unregister = true;
+	}
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	if (do_unregister == true) {
+		spdk_bdev_unregister(bdev, bdev->internal.unregister_cb, bdev->internal.unregister_ctx);
+	}
+}
+
+int
+spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			    struct spdk_bdev_module *module)
+{
+	if (bdev->internal.claim_module != NULL) {
+		SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name,
+			    bdev->internal.claim_module->name);
+		return -EPERM;
+	}
+
+	if (desc && !desc->write) {
+		desc->write = true;
+	}
+
+	bdev->internal.claim_module = module;
+	return 0;
+}
+
+void
+spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
+{
+	assert(bdev->internal.claim_module != NULL);
+	bdev->internal.claim_module = NULL;
+}
+
+struct spdk_bdev *
+spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
+{
+	return desc->bdev;
+}
+
+void
+spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
+{
+	struct iovec *iovs;
+	int iovcnt;
+
+	if (bdev_io == NULL) {
+		return;
+	}
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		iovs = bdev_io->u.bdev.iovs;
+		iovcnt = bdev_io->u.bdev.iovcnt;
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		iovs = bdev_io->u.bdev.iovs;
+		iovcnt = bdev_io->u.bdev.iovcnt;
+		break;
+	default:
+		iovs = NULL;
+		iovcnt = 0;
+		break;
+	}
+
+	if (iovp) {
+		*iovp = iovs;
+	}
+	if (iovcntp) {
+		*iovcntp = iovcnt;
+	}
+}
+
+void
+spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
+{
+
+	if (spdk_bdev_module_list_find(bdev_module->name)) {
+		SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name);
+		assert(false);
+	}
+
+	if (bdev_module->async_init) {
+		bdev_module->internal.action_in_progress = 1;
+	}
+
+	/*
+	 * Modules with examine callbacks must be initialized first, so they are
+	 *  ready to handle examine callbacks from later modules that will
+	 *  register physical bdevs.
+	 */
+	if (bdev_module->examine_config != NULL || bdev_module->examine_disk != NULL) {
+		TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
+	} else {
+		TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
+	}
+}
+
+struct spdk_bdev_module *
+spdk_bdev_module_list_find(const char *name)
+{
+	struct spdk_bdev_module *bdev_module;
+
+	TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+		if (strcmp(name, bdev_module->name) == 0) {
+			break;
+		}
+	}
+
+	return bdev_module;
+}
+
+static void
+_spdk_bdev_write_zero_buffer_next(void *_bdev_io)
+{
+	struct spdk_bdev_io *bdev_io = _bdev_io;
+	uint64_t num_bytes, num_blocks;
+	int rc;
+
+	num_bytes = spdk_min(spdk_bdev_get_block_size(bdev_io->bdev) *
+			     bdev_io->u.bdev.split_remaining_num_blocks,
+			     ZERO_BUFFER_SIZE);
+	num_blocks = num_bytes / spdk_bdev_get_block_size(bdev_io->bdev);
+
+	rc = spdk_bdev_write_blocks(bdev_io->internal.desc,
+				    spdk_io_channel_from_ctx(bdev_io->internal.ch),
+				    g_bdev_mgr.zero_buffer,
+				    bdev_io->u.bdev.split_current_offset_blocks, num_blocks,
+				    _spdk_bdev_write_zero_buffer_done, bdev_io);
+	if (rc == 0) {
+		bdev_io->u.bdev.split_remaining_num_blocks -= num_blocks;
+		bdev_io->u.bdev.split_current_offset_blocks += num_blocks;
+	} else if (rc == -ENOMEM) {
+		_spdk_bdev_queue_io_wait_with_cb(bdev_io, _spdk_bdev_write_zero_buffer_next);
+	} else {
+		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+		bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+	}
+}
+
+static void
+_spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *parent_io = cb_arg;
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (!success) {
+		parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+		parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
+		return;
+	}
+
+	if (parent_io->u.bdev.split_remaining_num_blocks == 0) {
+		parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+		parent_io->internal.cb(parent_io, true, parent_io->internal.caller_ctx);
+		return;
+	}
+
+	_spdk_bdev_write_zero_buffer_next(parent_io);
+}
+
+struct set_qos_limit_ctx {
+	void (*cb_fn)(void *cb_arg, int status);
+	void *cb_arg;
+	struct spdk_bdev *bdev;
+};
+
+static void
+_spdk_bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
+{
+	pthread_mutex_lock(&ctx->bdev->internal.mutex);
+	ctx->bdev->internal.qos_mod_in_progress = false;
+	pthread_mutex_unlock(&ctx->bdev->internal.mutex);
+
+	ctx->cb_fn(ctx->cb_arg, status);
+	free(ctx);
+}
+
+static void
+_spdk_bdev_disable_qos_done(void *cb_arg)
+{
+	struct set_qos_limit_ctx *ctx = cb_arg;
+	struct spdk_bdev *bdev = ctx->bdev;
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_bdev_qos *qos;
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	qos = bdev->internal.qos;
+	bdev->internal.qos = NULL;
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	while (!TAILQ_EMPTY(&qos->queued)) {
+		/* Send queued I/O back to their original thread for resubmission. */
+		bdev_io = TAILQ_FIRST(&qos->queued);
+		TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
+
+		if (bdev_io->internal.io_submit_ch) {
+			/*
+			 * Channel was changed when sending it to the QoS thread - change it back
+			 *  before sending it back to the original thread.
+			 */
+			bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
+			bdev_io->internal.io_submit_ch = NULL;
+		}
+
+		spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->internal.ch->channel),
+				     _spdk_bdev_io_submit, bdev_io);
+	}
+
+	spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
+	spdk_poller_unregister(&qos->poller);
+
+	free(qos);
+
+	_spdk_bdev_set_qos_limit_done(ctx, 0);
+}
+
+static void
+_spdk_bdev_disable_qos_msg_done(struct spdk_io_channel_iter *i, int status)
+{
+	void *io_device = spdk_io_channel_iter_get_io_device(i);
+	struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
+	struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_thread *thread;
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	thread = bdev->internal.qos->thread;
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	spdk_thread_send_msg(thread, _spdk_bdev_disable_qos_done, ctx);
+}
+
+static void
+_spdk_bdev_disable_qos_msg(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
+
+	bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_update_qos_rate_limit_msg(void *cb_arg)
+{
+	struct set_qos_limit_ctx *ctx = cb_arg;
+	struct spdk_bdev *bdev = ctx->bdev;
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	spdk_bdev_qos_update_max_quota_per_timeslice(bdev->internal.qos);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+
+	_spdk_bdev_set_qos_limit_done(ctx, 0);
+}
+
+static void
+_spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i)
+{
+	void *io_device = spdk_io_channel_iter_get_io_device(i);
+	struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	_spdk_bdev_enable_qos(bdev, bdev_ch);
+	pthread_mutex_unlock(&bdev->internal.mutex);
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+	_spdk_bdev_set_qos_limit_done(ctx, status);
+}
+
+static void
+_spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
+{
+	int i;
+
+	assert(bdev->internal.qos != NULL);
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			bdev->internal.qos->rate_limits[i].limit = limits[i];
+
+			if (limits[i] == 0) {
+				bdev->internal.qos->rate_limits[i].limit =
+					SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+			}
+		}
+	}
+}
+
+void
+spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
+			      void (*cb_fn)(void *cb_arg, int status), void *cb_arg)
+{
+	struct set_qos_limit_ctx	*ctx;
+	uint32_t			limit_set_complement;
+	uint64_t			min_limit_per_sec;
+	int				i;
+	bool				disable_rate_limit = true;
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+			continue;
+		}
+
+		if (limits[i] > 0) {
+			disable_rate_limit = false;
+		}
+
+		if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+			min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
+		} else {
+			/* Change from megabyte to byte rate limit */
+			limits[i] = limits[i] * 1024 * 1024;
+			min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
+		}
+
+		limit_set_complement = limits[i] % min_limit_per_sec;
+		if (limit_set_complement) {
+			SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n",
+				    limits[i], min_limit_per_sec);
+			limits[i] += min_limit_per_sec - limit_set_complement;
+			SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]);
+		}
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+	ctx->bdev = bdev;
+
+	pthread_mutex_lock(&bdev->internal.mutex);
+	if (bdev->internal.qos_mod_in_progress) {
+		pthread_mutex_unlock(&bdev->internal.mutex);
+		free(ctx);
+		cb_fn(cb_arg, -EAGAIN);
+		return;
+	}
+	bdev->internal.qos_mod_in_progress = true;
+
+	if (disable_rate_limit == true && bdev->internal.qos) {
+		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+			if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED &&
+			    (bdev->internal.qos->rate_limits[i].limit > 0 &&
+			     bdev->internal.qos->rate_limits[i].limit !=
+			     SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) {
+				disable_rate_limit = false;
+				break;
+			}
+		}
+	}
+
+	if (disable_rate_limit == false) {
+		if (bdev->internal.qos == NULL) {
+			/* Enabling */
+			bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
+			if (!bdev->internal.qos) {
+				pthread_mutex_unlock(&bdev->internal.mutex);
+				SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
+				free(ctx);
+				cb_fn(cb_arg, -ENOMEM);
+				return;
+			}
+
+			_spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+			spdk_for_each_channel(__bdev_to_io_dev(bdev),
+					      _spdk_bdev_enable_qos_msg, ctx,
+					      _spdk_bdev_enable_qos_done);
+		} else {
+			/* Updating */
+			_spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+			spdk_thread_send_msg(bdev->internal.qos->thread,
+					     _spdk_bdev_update_qos_rate_limit_msg, ctx);
+		}
+	} else {
+		if (bdev->internal.qos != NULL) {
+			_spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+			/* Disabling */
+			spdk_for_each_channel(__bdev_to_io_dev(bdev),
+					      _spdk_bdev_disable_qos_msg, ctx,
+					      _spdk_bdev_disable_qos_msg_done);
+		} else {
+			pthread_mutex_unlock(&bdev->internal.mutex);
+			_spdk_bdev_set_qos_limit_done(ctx, 0);
+			return;
+		}
+	}
+
+	pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV)
+
+SPDK_TRACE_REGISTER_FN(bdev_trace)
+{
+	spdk_trace_register_owner(OWNER_BDEV, 'b');
+	spdk_trace_register_object(OBJECT_BDEV_IO, 'i');
+	spdk_trace_register_description("BDEV_IO_START", "", TRACE_BDEV_IO_START, OWNER_BDEV,
+					OBJECT_BDEV_IO, 1, 0, "type:   ");
+	spdk_trace_register_description("BDEV_IO_DONE", "", TRACE_BDEV_IO_DONE, OWNER_BDEV,
+					OBJECT_BDEV_IO, 0, 0, "");
+}
diff --git a/src/spdk/lib/bdev/crypto/Makefile b/src/spdk/lib/bdev/crypto/Makefile
new file mode 100644
index 00000000..c3eb1b74
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(ENV_CFLAGS)
+
+C_SRCS = vbdev_crypto.c vbdev_crypto_rpc.c
+LIBNAME = vbdev_crypto
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto.c b/src/spdk/lib/bdev/crypto/vbdev_crypto.c
new file mode 100644
index 00000000..510e8496
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto.c
@@ -0,0 +1,1506 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_crypto.h"
+
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/io_channel.h"
+#include "spdk/bdev_module.h"
+
+#include <rte_config.h>
+#include <rte_bus_vdev.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+
+/* To add support for new device types, follow the examples of the following...
+ * Note that the string names are defined by the DPDK PMD in question so be
+ * sure to use the exact names.
+ */
+#define MAX_NUM_DRV_TYPES 2
+#define AESNI_MB "crypto_aesni_mb"
+#define QAT "crypto_qat"
+const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
+
+/* Global list of available crypto devices. */
+struct vbdev_dev {
+	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
+	uint8_t				cdev_id;	/* identifier for the device */
+	TAILQ_ENTRY(vbdev_dev)		link;
+};
+static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
+
+/* Global list and lock for unique device/queue pair combos */
+struct device_qp {
+	struct vbdev_dev		*device;	/* ptr to crypto device */
+	uint8_t				qp;		/* queue pair for this node */
+	bool				in_use;		/* whether this node is in use or not */
+	TAILQ_ENTRY(device_qp)		link;
+};
+static TAILQ_HEAD(, device_qp) g_device_qp = TAILQ_HEAD_INITIALIZER(g_device_qp);
+static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+/* In order to limit the number of resources we need to do one crypto
+ * operation per LBA (we use LBA as IV), we tell the bdev layer that
+ * our max IO size is something reasonable. Units here are in bytes.
+ */
+#define CRYPTO_MAX_IO		(64 * 1024)
+
+/* This controls how many ops will be dequeued from the crypto driver in one run
+ * of the poller. It is mainly a performance knob as it effectively determines how
+ * much work the poller has to do.  However even that can vary between crypto drivers
+ * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
+ * QAT drvier just dequeues what has been completed already.
+ */
+#define MAX_DEQUEUE_BURST_SIZE	64
+
+/* When enqueueing, we need to supply the crypto driver with an array of pointers to
+ * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
+ * value in conjunction with the the other defines to make sure we're not using crazy amounts
+ * of memory. All of these numbers can and probably should be adjusted based on the
+ * workload. By default we'll use the worst case (smallest) block size for the
+ * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
+ * blocks would give us an enqueue array size of 128.
+ */
+#define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
+
+/* The number of MBUFS we need must be a power of two and to support other small IOs
+ * in addition to the limits mentioned above, we go to the next power of two. It is
+ * big number because it is one mempool for source and desitnation mbufs. It may
+ * need to be bigger to support multiple crypto drivers at once.
+ */
+#define NUM_MBUFS		32768
+#define POOL_CACHE_SIZE		256
+#define NUM_SESSIONS		NUM_MBUFS
+#define SESS_MEMPOOL_CACHE_SIZE 256
+
+/* This is the max number of IOs we can supply to any crypto device QP at one time.
+ * It can vary between drivers.
+ */
+#define CRYPTO_QP_DESCRIPTORS	2048
+
+/* Specific to AES_CBC. */
+#define AES_CBC_IV_LENGTH	16
+#define AES_CBC_KEY_LENGTH	16
+
+/* Common for suported devices. */
+#define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
+				sizeof(struct rte_crypto_sym_op))
+
+static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void vbdev_crypto_examine(struct spdk_bdev *bdev);
+static int vbdev_crypto_claim(struct spdk_bdev *bdev);
+
+/* list of crypto_bdev names and their base bdevs via configuration file.
+ * Used so we can parse the conf once at init and use this list in examine().
+ */
+struct bdev_names {
+	char			*vbdev_name;	/* name of the vbdev to create */
+	char			*bdev_name;	/* base bdev name */
+
+	/* Note, for dev/test we allow use of key in the config file, for production
+	 * use, you must use an RPC to specify the key for security reasons.
+	 */
+	uint8_t			*key;		/* key per bdev */
+	char			*drv_name;	/* name of the crypto device driver */
+	TAILQ_ENTRY(bdev_names)	link;
+};
+static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
+
+/* List of virtual bdevs and associated info for each. We keep the device friendly name here even
+ * though its also in the device struct because we use it early on.
+ */
+struct vbdev_crypto {
+	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
+	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
+	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
+	uint8_t				*key;			/* key per bdev */
+	char				*drv_name;		/* name of the crypto device driver */
+	TAILQ_ENTRY(vbdev_crypto)	link;
+};
+static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
+
+/* Shared mempools between all devices on this system */
+static struct spdk_mempool *g_session_mp = NULL;	/* session mempool */
+static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
+static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
+
+/* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
+ * We store things in here that are needed on per thread basis like the base_channel for this thread,
+ * and the poller for this thread.
+ */
+struct crypto_io_channel {
+	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
+	struct spdk_poller		*poller;		/* completion poller */
+	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
+};
+
+/* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
+ * each IO for us.
+ */
+struct crypto_bdev_io {
+	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
+	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
+	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
+	enum rte_crypto_cipher_operation crypto_op;	/* the crypto control struct */
+	struct rte_crypto_sym_xform	cipher_xform;	/* crypto control struct for this IO */
+	struct spdk_bdev_io *orig_io;			/* the original IO */
+	struct spdk_bdev_io *read_io;			/* the read IO we issued */
+
+	/* Used for the single contigous buffer that serves as the crypto destination target for writes */
+	uint64_t cry_num_blocks;			/* num of blocks for the contiguous buffer */
+	uint64_t cry_offset_blocks;			/* block offset on media */
+	struct iovec cry_iov;				/* iov representing contig write buffer */
+};
+
+/* This is called from the module's init function. We setup all crypto devices early on as we are unable
+ * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
+ * configure the max capabilities of each device and assign threads to queue pairs as channels are
+ * requested.
+ */
+static int
+vbdev_crypto_init_crypto_drivers(void)
+{
+	uint8_t cdev_count;
+	uint8_t cdrv_id, cdev_id, i, j;
+	int rc = 0;
+	struct vbdev_dev *device = NULL;
+	struct device_qp *dev_qp = NULL;
+	unsigned int max_sess_size = 0, sess_size;
+	uint16_t num_lcores = rte_lcore_count();
+
+	/* Only the first call, via RPC or module init should init the crypto drivers. */
+	if (g_session_mp != NULL) {
+		return 0;
+	}
+
+	/* We always init AESNI_MB */
+	rc = rte_vdev_init(AESNI_MB, NULL);
+	if (rc == 0) {
+		SPDK_NOTICELOG("created virtual PMD %s\n", AESNI_MB);
+	} else {
+		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
+		return -EINVAL;
+	}
+
+	/* If we have no crypto devices, there's no reason to continue. */
+	cdev_count = rte_cryptodev_count();
+	if (cdev_count == 0) {
+		return 0;
+	}
+
+	/*
+	 * Create global mempools, shared by all devices regardless of type.
+	 */
+
+	/* First determine max session size, most pools are shared by all the devices,
+	 * so we need to find the global max sessions size.
+	 */
+	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
+		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
+		if (sess_size > max_sess_size) {
+			max_sess_size = sess_size;
+		}
+	}
+
+	g_session_mp = spdk_mempool_create("session_mp", NUM_SESSIONS * 2, max_sess_size,
+					   SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+					   SPDK_ENV_SOCKET_ID_ANY);
+	if (g_session_mp == NULL) {
+		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
+		return -ENOMEM;
+	}
+
+	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
+					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+					SPDK_ENV_SOCKET_ID_ANY);
+	if (g_mbuf_mp == NULL) {
+		SPDK_ERRLOG("Cannot create mbuf pool\n");
+		rc = -ENOMEM;
+		goto error_create_mbuf;
+	}
+
+	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
+			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+			 NUM_MBUFS,
+			 POOL_CACHE_SIZE,
+			 AES_CBC_IV_LENGTH,
+			 rte_socket_id());
+	if (g_crypto_op_mp == NULL) {
+		SPDK_ERRLOG("Cannot create op pool\n");
+		rc = -ENOMEM;
+		goto error_create_op;
+	}
+
+	/*
+	 * Now lets configure each device.
+	 */
+	for (i = 0; i < cdev_count; i++) {
+		device = calloc(1, sizeof(struct vbdev_dev));
+		if (!device) {
+			rc = -ENOMEM;
+			goto error_create_device;
+		}
+
+		/* Get details about this device. */
+		rte_cryptodev_info_get(i, &device->cdev_info);
+		cdrv_id = device->cdev_info.driver_id;
+		cdev_id = device->cdev_id = i;
+
+		/* Before going any further, make sure we have enough resources for this
+		 * device type to function.  We need a unique queue pair per core accross each
+		 * device type to remain lockless....
+		 */
+		if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
+		     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
+			SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
+				    device->cdev_info.driver_name);
+			SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
+			rc = -EINVAL;
+			goto error_qp;
+		}
+
+		/* Setup queue pairs. */
+		struct rte_cryptodev_config conf = {
+			.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
+			.socket_id = SPDK_ENV_SOCKET_ID_ANY
+		};
+
+		rc = rte_cryptodev_configure(cdev_id, &conf);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to configure cryptodev %u", cdev_id);
+			rc = -EINVAL;
+			goto error_dev_config;
+		}
+
+		struct rte_cryptodev_qp_conf qp_conf = {
+			.nb_descriptors = CRYPTO_QP_DESCRIPTORS
+		};
+
+		/* Pre-setup all pottential qpairs now and assign them in the channel
+		 * callback. If we were to create them there, we'd have to stop the
+		 * entire device affecting all other threads that might be using it
+		 * even on other queue pairs.
+		 */
+		for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
+			rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY,
+							    (struct rte_mempool *)g_session_mp);
+
+			if (rc < 0) {
+				SPDK_ERRLOG("Failed to setup queue pair %u on "
+					    "cryptodev %u", j, cdev_id);
+				rc = -EINVAL;
+				goto error_qp_setup;
+			}
+		}
+
+		rc = rte_cryptodev_start(cdev_id);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to start device %u: error %d\n",
+				    cdev_id, rc);
+			rc = -EINVAL;
+			goto error_device_start;
+		}
+
+		/* Add to our list of available crypto devices. */
+		TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
+
+		/* Build up list of device/qp combinations */
+		for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
+			dev_qp = calloc(1, sizeof(struct device_qp));
+			if (!dev_qp) {
+				rc = -ENOMEM;
+				goto error_create_devqp;
+			}
+			dev_qp->device = device;
+			dev_qp->qp = j;
+			dev_qp->in_use = false;
+			TAILQ_INSERT_TAIL(&g_device_qp, dev_qp, link);
+		}
+	}
+	return 0;
+
+	/* Error cleanup paths. */
+error_create_devqp:
+	while ((dev_qp = TAILQ_FIRST(&g_device_qp))) {
+		TAILQ_REMOVE(&g_device_qp, dev_qp, link);
+		free(dev_qp);
+	}
+error_device_start:
+error_qp_setup:
+error_dev_config:
+error_qp:
+	free(device);
+error_create_device:
+	rte_mempool_free(g_crypto_op_mp);
+error_create_op:
+	spdk_mempool_free(g_mbuf_mp);
+error_create_mbuf:
+	spdk_mempool_free(g_session_mp);
+	return rc;
+}
+
+/* Following an encrypt or decrypt we need to then either write the encrypted data or finish
+ * the read on decrypted data. Do that here.
+ */
+static void
+_crypto_operation_complete(struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+					   crypto_bdev);
+	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
+	struct spdk_bdev_io *free_me = io_ctx->read_io;
+	int rc = 0;
+
+	if (bdev_io->internal.status != SPDK_BDEV_IO_STATUS_FAILED) {
+		if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+
+			/* Complete the original IO and then free the one that we created
+			 * as a result of issuing an IO via submit_reqeust.
+			 */
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+			spdk_bdev_free_io(free_me);
+
+		} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+
+			/* Write the encrypted data. */
+			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+						     &io_ctx->cry_iov, 1, io_ctx->cry_offset_blocks,
+						     io_ctx->cry_num_blocks, _complete_internal_write,
+						     bdev_io);
+		} else {
+
+			/* Something really went haywire if this function got called with a type
+			 * other than read or write.
+			 */
+			rc = -1;
+		}
+	} else {
+		/* If the poller found that one of the crypto ops had failed as part of this
+		 * bdev_io it would have updated the internal status indicate failure.
+		 */
+		rc = -1;
+	}
+
+	if (rc != 0) {
+		SPDK_ERRLOG("ERROR on crypto operation completion!\n");
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+
+}
+
+/* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
+ * the device. Then we need to decide if what we've got so far (including previous poller
+ * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
+ * accordingly. This means either completing a read or issuing a new write.
+ */
+static int
+crypto_dev_poller(void *args)
+{
+	struct crypto_io_channel *crypto_ch = args;
+	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
+	int i, num_dequeued_ops;
+	struct spdk_bdev_io *bdev_io = NULL;
+	struct crypto_bdev_io *io_ctx = NULL;
+	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
+	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
+	int num_mbufs = 0;
+
+	/* Each run of the poller will get just what the device has available
+	 * at the moment we call it, we don't check again after draining the
+	 * first batch.
+	 */
+	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
+			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
+
+	/* Check if operation was processed successfully */
+	for (i = 0; i < num_dequeued_ops; i++) {
+
+		/* We don't know the order or association of the crypto ops wrt any
+		 * partiular bdev_io so need to look at each and determine if it's
+		 * the last one for it's bdev_io or not.
+		 */
+		bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
+		assert(bdev_io != NULL);
+
+		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
+			SPDK_ERRLOG("error with op %d status %u\n", i,
+				    dequeued_ops[i]->status);
+			/* Update the bdev status to error, we'll still process the
+			 * rest of the crypto ops for this bdev_io though so they
+			 * aren't left hanging.
+			 */
+			bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+		}
+
+		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+		assert(io_ctx->cryop_cnt_remaining > 0);
+
+		/* Return the associated src and dst mbufs by collecting them into
+		 * an array that we can use the bulk API to free after the loop.
+		 */
+		dequeued_ops[i]->sym->m_src->userdata = NULL;
+		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
+		if (dequeued_ops[i]->sym->m_dst) {
+			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
+		}
+
+		/* done encrypting, complete the bdev_io */
+		if (--io_ctx->cryop_cnt_remaining == 0) {
+
+			/* Complete the IO */
+			_crypto_operation_complete(bdev_io);
+
+			/* Return session */
+			rte_cryptodev_sym_session_clear(cdev_id, dequeued_ops[i]->sym->session);
+			rte_cryptodev_sym_session_free(dequeued_ops[i]->sym->session);
+		}
+	}
+
+	/* Now bulk free both mbufs and crypto operations. */
+	if (num_dequeued_ops > 0) {
+		rte_mempool_put_bulk(g_crypto_op_mp,
+				     (void **)dequeued_ops,
+				     num_dequeued_ops);
+		assert(num_mbufs > 0);
+		spdk_mempool_put_bulk(g_mbuf_mp,
+				      (void **)mbufs_to_free,
+				      num_mbufs);
+	}
+
+	return num_dequeued_ops;
+}
+
+/* We're either encrypting on the way down or decrypting on the way back. */
+static int
+_crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op)
+{
+	struct rte_cryptodev_sym_session *session;
+	uint16_t num_enqueued_ops = 0;
+	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
+	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
+	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
+	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
+	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
+	int rc;
+	uint32_t enqueued = 0;
+	uint32_t iov_index = 0;
+	uint32_t allocated = 0;
+	uint8_t *current_iov = NULL;
+	uint64_t total_remaining = 0;
+	uint64_t current_iov_remaining = 0;
+	int completed = 0;
+	int crypto_index = 0;
+	uint32_t en_offset = 0;
+	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
+	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
+	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
+	int burst;
+
+	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
+
+	/* Get the number of source mbufs that we need. These will always be 1:1 because we
+	 * don't support chaining. The reason we don't is because of our decision to use
+	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
+	 * op would be > 1 LBA.
+	 */
+	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
+	if (rc) {
+		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
+		return -ENOMEM;
+	}
+
+	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
+	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
+		if (rc) {
+			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
+			rc = -ENOMEM;
+			goto error_get_dst;
+		}
+	}
+
+	/* Allocate crypto operations. */
+	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
+					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+					     crypto_ops, cryop_cnt);
+	if (allocated < cryop_cnt) {
+		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
+		rc = -ENOMEM;
+		goto error_get_ops;
+	}
+
+	/* Get sessions. */
+	session = rte_cryptodev_sym_session_create((struct rte_mempool *)g_session_mp);
+	if (NULL == session) {
+		SPDK_ERRLOG("ERROR trying to create crypto session!\n");
+		rc = -EINVAL;
+		goto error_session_create;
+	}
+
+	/* Init our session with the desired cipher options. */
+	io_ctx->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+	io_ctx->cipher_xform.cipher.key.data = io_ctx->crypto_bdev->key;
+	io_ctx->cipher_xform.cipher.op = io_ctx->crypto_op = crypto_op;
+	io_ctx->cipher_xform.cipher.iv.offset = IV_OFFSET;
+	io_ctx->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
+	io_ctx->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
+	io_ctx->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
+
+	rc = rte_cryptodev_sym_session_init(cdev_id, session,
+					    &io_ctx->cipher_xform,
+					    (struct rte_mempool *)g_session_mp);
+	if (rc < 0) {
+		SPDK_ERRLOG("ERROR trying to init crypto session!\n");
+		rc = -EINVAL;
+		goto error_session_init;
+	}
+
+	/* For encryption, we need to prepare a single contiguous buffer as the encryption
+	 * destination, we'll then pass that along for the write after encryption is done.
+	 * This is done to avoiding encrypting the provided write buffer which may be
+	 * undesirable in some use cases.
+	 */
+	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+		io_ctx->cry_iov.iov_len = total_length;
+		/* For now just allocate in the I/O path, not optimal but the current bdev API
+		 * for getting a buffer from the pool won't work if the bdev_io passed in
+		 * has a buffer, which ours always will.  So, until we modify that API
+		 * or better yet the current ZCOPY work lands, this is the best we can do.
+		 */
+		io_ctx->cry_iov.iov_base = spdk_dma_malloc(total_length, 0x10, NULL);
+		if (!io_ctx->cry_iov.iov_base) {
+			SPDK_ERRLOG("ERROR trying to allocate write buffer for encryption!\n");
+			rc = -ENOMEM;
+			goto error_get_write_buffer;
+		}
+		io_ctx->cry_offset_blocks = bdev_io->u.bdev.offset_blocks;
+		io_ctx->cry_num_blocks = bdev_io->u.bdev.num_blocks;
+	}
+
+	/* This value is used in the completion callback to determine when the bdev_io is
+	 * complete.
+	 */
+	io_ctx->cryop_cnt_remaining = cryop_cnt;
+
+	/* As we don't support chaining because of a decision to use LBA as IV, construction
+	 * of crypto operaations is straightforward. We build both the op, the mbuf and the
+	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
+	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
+	 * LBA sized chunck of memory will correspond 1:1 to a crypto operation and a single
+	 * mbuf per crypto operation.
+	 */
+	total_remaining = total_length;
+	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
+	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
+	do {
+		uint8_t *iv_ptr;
+		uint64_t op_block_offset;
+
+		/* Set the mbuf elements address and length. Null out the next pointer. */
+		src_mbufs[crypto_index]->buf_addr = current_iov;
+		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov);
+		src_mbufs[crypto_index]->data_len = crypto_len;
+		src_mbufs[crypto_index]->next = NULL;
+		/* Store context in every mbuf as we don't know anything about completion order */
+		src_mbufs[crypto_index]->userdata = bdev_io;
+
+		/* Set the IV - we use the LBA of the crypto_op */
+		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
+						   IV_OFFSET);
+		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
+		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
+		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
+
+		/* Set the data to encrypt/decrypt length */
+		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
+		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
+
+		/* link the mbuf to the crypto op. */
+		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
+		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
+		} else {
+			crypto_ops[crypto_index]->sym->m_dst = NULL;
+		}
+
+		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
+		 * that will be used to process the write on completion to the same buffer. Setting
+		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
+		 */
+		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+
+			/* Set the relevant destination en_mbuf elements. */
+			dst_mbufs[crypto_index]->buf_addr = io_ctx->cry_iov.iov_base + en_offset;
+			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr);
+			dst_mbufs[crypto_index]->data_len = crypto_len;
+			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
+			en_offset += crypto_len;
+			dst_mbufs[crypto_index]->next = NULL;
+		}
+
+		/* Attach the crypto session to the operation */
+		rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], session);
+		if (rc) {
+			rc = -EINVAL;
+			goto error_attach_session;
+		}
+
+		/* Subtract our running totals for the op in progress and the overall bdev io */
+		total_remaining -= crypto_len;
+		current_iov_remaining -= crypto_len;
+
+		/* move our current IOV pointer accordingly. */
+		current_iov += crypto_len;
+
+		/* move on to the next crypto operation */
+		crypto_index++;
+
+		/* If we're done with this IOV, move to the next one. */
+		if (current_iov_remaining == 0 && total_remaining > 0) {
+			iov_index++;
+			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
+			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
+		}
+	} while (total_remaining > 0);
+
+	/* Enqueue everything we've got but limit by the max number of descriptors we
+	 * configured the crypto device for.
+	 */
+	do {
+		burst = spdk_min((cryop_cnt - enqueued), CRYPTO_QP_DESCRIPTORS);
+		num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
+				   &crypto_ops[enqueued],
+				   burst);
+		enqueued += num_enqueued_ops;
+
+		/* Dequeue all inline if the device is full. We don't defer anything simply
+		 * because of the complexity involved as we're building 1 or more crypto
+		 * ops per IO. Dequeue will free up space for more enqueue.
+		 */
+		if (enqueued < cryop_cnt) {
+
+			/* Dequeue everything, this may include ops that were already
+			 * in the device before this submission....
+			 */
+			do {
+				completed = crypto_dev_poller(crypto_ch);
+			} while (completed > 0);
+		}
+	} while (enqueued < cryop_cnt);
+
+	return rc;
+
+	/* Error cleanup paths. */
+error_attach_session:
+error_get_write_buffer:
+error_session_init:
+	rte_cryptodev_sym_session_clear(cdev_id, session);
+	rte_cryptodev_sym_session_free(session);
+error_session_create:
+	rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops, cryop_cnt);
+	allocated = 0;
+error_get_ops:
+	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
+				      cryop_cnt);
+	}
+	if (allocated > 0) {
+		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
+				     allocated);
+	}
+error_get_dst:
+	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
+			      cryop_cnt);
+	return rc;
+}
+
+/* Completion callback for IO that were issued from this bdev other than read/write.
+ * They have their own for readability.
+ */
+static void
+_complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *orig_io = cb_arg;
+	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+	spdk_bdev_io_complete(orig_io, status);
+	spdk_bdev_free_io(bdev_io);
+}
+
+/* Completion callback for writes that were issued from this bdev. */
+static void
+_complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *orig_io = cb_arg;
+	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
+
+	spdk_dma_free(orig_ctx->cry_iov.iov_base);
+	spdk_bdev_io_complete(orig_io, status);
+	spdk_bdev_free_io(bdev_io);
+}
+
+/* Completion callback for reads that were issued from this bdev. */
+static void
+_complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *orig_io = cb_arg;
+	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
+
+	if (success) {
+
+		/* Save off this bdev_io so it can be freed after decryption. */
+		orig_ctx->read_io = bdev_io;
+
+		if (_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT)) {
+			SPDK_ERRLOG("ERROR decrypting");
+			spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
+			spdk_bdev_free_io(bdev_io);
+		}
+	} else {
+		SPDK_ERRLOG("ERROR on read prior to decrypting");
+		spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
+		spdk_bdev_free_io(bdev_io);
+	}
+}
+
+/* Callback for getting a buf from the bdev pool in the event that the caller passed
+ * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
+ * beneath us before we're done with it.
+ */
+static void
+crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+					   crypto_bdev);
+	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
+	int rc;
+
+	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
+				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
+				    bdev_io);
+	if (rc != 0) {
+		SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+/* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
+ * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
+ * and call our cpl callback provided below along with the original bdev_io so that we can
+ * complete it once this IO completes. For crypto operations, we'll either encrypt it first
+ * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
+ * on the way back for decryption.
+ */
+static void
+vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+					   crypto_bdev);
+	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
+	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+	int rc = 0;
+
+	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
+	io_ctx->crypto_bdev = crypto_bdev;
+	io_ctx->crypto_ch = crypto_ch;
+	io_ctx->orig_io = bdev_io;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+					    bdev_io->u.bdev.offset_blocks,
+					    bdev_io->u.bdev.num_blocks,
+					    _complete_internal_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+					    bdev_io->u.bdev.offset_blocks,
+					    bdev_io->u.bdev.num_blocks,
+					    _complete_internal_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
+				     _complete_internal_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+	default:
+		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	if (rc != 0) {
+		SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+/* We'll just call the base bdev and let it answer except for WZ command which
+ * we always say we don't support so that the bdev layer will actually send us
+ * real writes that we can encrypt.
+ */
+static bool
+vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_RESET:
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+	/* Force the bdev layer to issue actual writes of zeroes so we can
+	 * encrypt them as regular writes.
+	 */
+	default:
+		return false;
+	}
+}
+
+/* Called after we've unregistered following a hot remove callback.
+ * Our finish entry point will be called next.
+ */
+static int
+vbdev_crypto_destruct(void *ctx)
+{
+	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+	/* Unclaim the underlying bdev. */
+	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
+
+	/* Close the underlying bdev. */
+	spdk_bdev_close(crypto_bdev->base_desc);
+
+	/* Done with this crypto_bdev. */
+	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+	free(crypto_bdev->drv_name);
+	free(crypto_bdev->key);
+	free(crypto_bdev->crypto_bdev.name);
+	free(crypto_bdev);
+	return 0;
+}
+
+/* We supplied this as an entry point for upper layers who want to communicate to this
+ * bdev.  This is how they get a channel. We are passed the same context we provided when
+ * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
+ * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
+ * struct and we'll keep it in our crypto node.
+ */
+static struct spdk_io_channel *
+vbdev_crypto_get_io_channel(void *ctx)
+{
+	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+	/* The IO channel code will allocate a channel for us which consists of
+	 * the SPDK cahnnel structure plus the size of our crypto_io_channel struct
+	 * that we passed in when we registered our IO device. It will then call
+	 * our channel create callback to populate any elements that we need to
+	 * update.
+	 */
+	return spdk_get_io_channel(crypto_bdev);
+}
+
+/* This is the output for get_bdevs() for this vbdev */
+static int
+vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+	spdk_json_write_name(w, "crypto");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
+	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
+	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
+	spdk_json_write_named_string(w, "key", crypto_bdev->key);
+	spdk_json_write_object_end(w);
+	return 0;
+}
+
+static int
+vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
+{
+	struct vbdev_crypto *crypto_bdev, *tmp;
+
+	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "construct_crypto_bdev");
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
+		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
+		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
+		spdk_json_write_named_string(w, "key", crypto_bdev->key);
+		spdk_json_write_object_end(w);
+		spdk_json_write_object_end(w);
+	}
+	return 0;
+}
+
+/* We provide this callback for the SPDK channel code to create a channel using
+ * the channel struct we provided in our module get_io_channel() entry point. Here
+ * we get and save off an underlying base channel of the device below us so that
+ * we can communicate with the base bdev on a per channel basis. We also register the
+ * poller used to complete crypto operations from the device.
+ */
+static int
+crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
+{
+	struct crypto_io_channel *crypto_ch = ctx_buf;
+	struct vbdev_crypto *crypto_bdev = io_device;
+	struct device_qp *device_qp;
+
+	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
+	crypto_ch->poller = spdk_poller_register(crypto_dev_poller, crypto_ch, 0);
+	crypto_ch->device_qp = NULL;
+
+	pthread_mutex_lock(&g_device_qp_lock);
+	TAILQ_FOREACH(device_qp, &g_device_qp, link) {
+		if ((strcmp(device_qp->device->cdev_info.driver_name, crypto_bdev->drv_name) == 0) &&
+		    (device_qp->in_use == false)) {
+			crypto_ch->device_qp = device_qp;
+			device_qp->in_use = true;
+			SPDK_NOTICELOG("Device queue pair assignment: ch %p device %p qpid %u %s\n",
+				       crypto_ch, device_qp->device, crypto_ch->device_qp->qp, crypto_bdev->drv_name);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_device_qp_lock);
+	assert(crypto_ch->device_qp);
+	return 0;
+}
+
+/* We provide this callback for the SPDK channel code to destroy a channel
+ * created with our create callback. We just need to undo anything we did
+ * when we created.
+ */
+static void
+crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct crypto_io_channel *crypto_ch = ctx_buf;
+
+	pthread_mutex_lock(&g_device_qp_lock);
+	crypto_ch->device_qp->in_use = false;
+	pthread_mutex_unlock(&g_device_qp_lock);
+
+	spdk_poller_unregister(&crypto_ch->poller);
+	spdk_put_io_channel(crypto_ch->base_ch);
+}
+
+/* Create the association from the bdev and vbdev name and insert
+ * on the global list. */
+static int
+vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
+			 const char *crypto_pmd, const char *key)
+{
+	struct bdev_names *name;
+	int rc, j;
+	bool found = false;
+
+	name = calloc(1, sizeof(struct bdev_names));
+	if (!name) {
+		SPDK_ERRLOG("could not allocate bdev_names\n");
+		return -ENOMEM;
+	}
+
+	name->bdev_name = strdup(bdev_name);
+	if (!name->bdev_name) {
+		SPDK_ERRLOG("could not allocate name->bdev_name\n");
+		rc = -ENOMEM;
+		goto error_alloc_bname;
+	}
+
+	name->vbdev_name = strdup(vbdev_name);
+	if (!name->vbdev_name) {
+		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
+		rc = -ENOMEM;
+		goto error_alloc_vname;
+	}
+
+	name->drv_name = strdup(crypto_pmd);
+	if (!name->drv_name) {
+		SPDK_ERRLOG("could not allocate name->drv_name\n");
+		rc = -ENOMEM;
+		goto error_alloc_dname;
+	}
+	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
+		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
+		rc = -EINVAL;
+		goto error_invalid_pmd;
+	}
+
+	name->key = strdup(key);
+	if (!name->key) {
+		SPDK_ERRLOG("could not allocate name->key\n");
+		rc = -ENOMEM;
+		goto error_alloc_key;
+	}
+	if (strlen(name->key) != AES_CBC_KEY_LENGTH) {
+		SPDK_ERRLOG("invalid AES_CCB key length\n");
+		rc = -EINVAL;
+		goto error_invalid_key;
+	}
+
+	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
+
+	return 0;
+
+	/* Error cleanup paths. */
+error_invalid_key:
+error_alloc_key:
+error_invalid_pmd:
+	free(name->drv_name);
+error_alloc_dname:
+	free(name->vbdev_name);
+error_alloc_vname:
+	free(name->bdev_name);
+error_alloc_bname:
+	free(name);
+	return rc;
+}
+
+/* RPC entry point for crypto creation. */
+int
+create_crypto_disk(const char *bdev_name, const char *vbdev_name,
+		   const char *crypto_pmd, const char *key)
+{
+	struct spdk_bdev *bdev = NULL;
+	struct vbdev_crypto *crypto_bdev, *tmp;
+	int rc = 0;
+
+	bdev = spdk_bdev_get_by_name(bdev_name);
+
+	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key);
+	if (rc) {
+		return rc;
+	}
+
+	if (!bdev) {
+		return 0;
+	}
+
+	rc = vbdev_crypto_claim(bdev);
+	if (rc) {
+		return rc;
+	}
+
+	rc = vbdev_crypto_init_crypto_drivers();
+	if (rc) {
+		return rc;
+	}
+
+	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+		if (strcmp(crypto_bdev->base_bdev->name, bdev->name) == 0) {
+			rc = spdk_vbdev_register(&crypto_bdev->crypto_bdev,
+						 &crypto_bdev->base_bdev, 1);
+			if (rc) {
+				SPDK_ERRLOG("could not register crypto_bdev\n");
+				spdk_bdev_close(crypto_bdev->base_desc);
+				TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+				free(crypto_bdev->crypto_bdev.name);
+				free(crypto_bdev->key);
+				free(crypto_bdev);
+			}
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/* Called at driver init time, parses config file to preapre for examine calls,
+ * also fully initializes the crypto drivers.
+ */
+static int
+vbdev_crypto_init(void)
+{
+	struct spdk_conf_section *sp = NULL;
+	const char *conf_bdev_name = NULL;
+	const char *conf_vbdev_name = NULL;
+	const char *crypto_pmd = NULL;
+	int i;
+	int rc = 0;
+	const char *key = NULL;
+
+	/* Fully configure both SW and HW drivers. */
+	rc = vbdev_crypto_init_crypto_drivers();
+	if (rc) {
+		SPDK_ERRLOG("Error setting up crypto devices\n");
+		return rc;
+	}
+
+	sp = spdk_conf_find_section(NULL, "crypto");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	for (i = 0; ; i++) {
+
+		if (!spdk_conf_section_get_nval(sp, "CRY", i)) {
+			break;
+		}
+
+		conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0);
+		if (!conf_bdev_name) {
+			SPDK_ERRLOG("crypto configuration missing bdev name\n");
+			return -EINVAL;
+		}
+
+		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1);
+		if (!conf_vbdev_name) {
+			SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n");
+			return -EINVAL;
+		}
+
+		key = spdk_conf_section_get_nmval(sp, "CRY", i, 2);
+		if (!key) {
+			SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n");
+			return -EINVAL;
+		}
+		SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n");
+
+		crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3);
+		if (!crypto_pmd) {
+			SPDK_ERRLOG("crypto configuration missing driver type\n");
+			return -EINVAL;
+		}
+
+		rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name,
+					      crypto_pmd, key);
+		if (rc != 0) {
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
+/* Called when the entire module is being torn down. */
+static void
+vbdev_crypto_finish(void)
+{
+	struct bdev_names *name;
+	struct vbdev_dev *device;
+	struct device_qp *dev_qp;
+
+	while ((name = TAILQ_FIRST(&g_bdev_names))) {
+		TAILQ_REMOVE(&g_bdev_names, name, link);
+		free(name->drv_name);
+		free(name->key);
+		free(name->bdev_name);
+		free(name->vbdev_name);
+		free(name);
+	}
+
+	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
+		TAILQ_REMOVE(&g_vbdev_devs, device, link);
+		rte_cryptodev_stop(device->cdev_id);
+		free(device);
+	}
+
+	while ((dev_qp = TAILQ_FIRST(&g_device_qp))) {
+		TAILQ_REMOVE(&g_device_qp, dev_qp, link);
+		free(dev_qp);
+	}
+
+	rte_mempool_free(g_crypto_op_mp);
+	spdk_mempool_free(g_mbuf_mp);
+	spdk_mempool_free(g_session_mp);
+}
+
+/* During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_crypto_get_ctx_size(void)
+{
+	return sizeof(struct crypto_bdev_io);
+}
+
+/* Called when SPDK wants to save the current config of this vbdev module to
+ * a file.
+ */
+static void
+vbdev_crypto_get_spdk_running_config(FILE *fp)
+{
+	struct bdev_names *names = NULL;
+	fprintf(fp, "\n[crypto]\n");
+	TAILQ_FOREACH(names, &g_bdev_names, link) {
+		fprintf(fp, "  crypto %s %s ", names->bdev_name, names->vbdev_name);
+		fprintf(fp, "\n");
+	}
+
+	fprintf(fp, "\n");
+}
+
+/* Called when the underlying base bdev goes away. */
+static void
+vbdev_crypto_examine_hotremove_cb(void *ctx)
+{
+	struct vbdev_crypto *crypto_bdev, *tmp;
+	struct spdk_bdev *bdev_find = ctx;
+
+	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+		if (bdev_find == crypto_bdev->base_bdev) {
+			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
+		}
+	}
+}
+
+static void
+vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* No config per bdev needed */
+}
+
+/* When we register our bdev this is how we specify our entry points. */
+static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
+	.destruct		= vbdev_crypto_destruct,
+	.submit_request		= vbdev_crypto_submit_request,
+	.io_type_supported	= vbdev_crypto_io_type_supported,
+	.get_io_channel		= vbdev_crypto_get_io_channel,
+	.dump_info_json		= vbdev_crypto_dump_info_json,
+	.write_config_json	= vbdev_crypto_write_config_json
+};
+
+static struct spdk_bdev_module crypto_if = {
+	.name = "crypto",
+	.module_init = vbdev_crypto_init,
+	.config_text = vbdev_crypto_get_spdk_running_config,
+	.get_ctx_size = vbdev_crypto_get_ctx_size,
+	.examine_config = vbdev_crypto_examine,
+	.module_fini = vbdev_crypto_finish,
+	.config_json = vbdev_crypto_config_json
+};
+
+SPDK_BDEV_MODULE_REGISTER(&crypto_if)
+
+static int
+vbdev_crypto_claim(struct spdk_bdev *bdev)
+{
+	struct bdev_names *name;
+	struct vbdev_crypto *vbdev;
+	int rc = 0;
+
+	/* Check our list of names from config versus this bdev and if
+	 * there's a match, create the crypto_bdev & bdev accordingly.
+	 */
+	TAILQ_FOREACH(name, &g_bdev_names, link) {
+		if (strcmp(name->bdev_name, bdev->name) != 0) {
+			continue;
+		}
+
+		SPDK_NOTICELOG("Match on %s\n", bdev->name);
+		vbdev = calloc(1, sizeof(struct vbdev_crypto));
+		if (!vbdev) {
+			SPDK_ERRLOG("could not allocate crypto_bdev\n");
+			rc = -ENOMEM;
+			goto error_vbdev_alloc;
+		}
+
+		/* The base bdev that we're attaching to. */
+		vbdev->base_bdev = bdev;
+		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
+		if (!vbdev->crypto_bdev.name) {
+			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
+			rc = -ENOMEM;
+			goto error_bdev_name;
+		}
+
+		vbdev->key = strdup(name->key);
+		if (!vbdev->key) {
+			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
+			rc = -ENOMEM;
+			goto error_alloc_key;
+		}
+
+		vbdev->drv_name = strdup(name->drv_name);
+		if (!vbdev->drv_name) {
+			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
+			rc = -ENOMEM;
+			goto error_drv_name;
+		}
+
+		vbdev->crypto_bdev.product_name = "crypto";
+		vbdev->crypto_bdev.write_cache = bdev->write_cache;
+		vbdev->crypto_bdev.need_aligned_buffer = bdev->need_aligned_buffer;
+		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
+		 * in units of blocks.
+		 */
+		if (bdev->optimal_io_boundary > 0) {
+			vbdev->crypto_bdev.optimal_io_boundary =
+				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
+		} else {
+			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
+		}
+		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
+		vbdev->crypto_bdev.blocklen = bdev->blocklen;
+		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
+
+		/* This is the context that is passed to us when the bdev
+		 * layer calls in so we'll save our crypto_bdev node here.
+		 */
+		vbdev->crypto_bdev.ctxt = vbdev;
+		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
+		vbdev->crypto_bdev.module = &crypto_if;
+		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
+
+		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
+					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
+
+		rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb,
+				    bdev, &vbdev->base_desc);
+		if (rc) {
+			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+			goto error_open;
+		}
+
+		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
+		if (rc) {
+			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
+			goto error_claim;
+		}
+
+		SPDK_NOTICELOG("registered crypto_bdev for: %s\n", name->vbdev_name);
+	}
+
+	return rc;
+
+	/* Error cleanup paths. */
+error_claim:
+	spdk_bdev_close(vbdev->base_desc);
+error_open:
+	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
+	spdk_io_device_unregister(vbdev, NULL);
+	free(vbdev->drv_name);
+error_drv_name:
+	free(vbdev->key);
+error_alloc_key:
+	free(vbdev->crypto_bdev.name);
+error_bdev_name:
+	free(vbdev);
+error_vbdev_alloc:
+	return rc;
+}
+
+/* RPC entry for deleting a crypto vbdev. */
+void
+delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
+		   void *cb_arg)
+{
+	struct bdev_names *name;
+
+	if (!bdev || bdev->module != &crypto_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
+	 * vbdev does not get re-created if the same bdev is constructed at some other time,
+	 * unless the underlying bdev was hot-removed.
+	 */
+	TAILQ_FOREACH(name, &g_bdev_names, link) {
+		if (strcmp(name->vbdev_name, bdev->name) == 0) {
+			TAILQ_REMOVE(&g_bdev_names, name, link);
+			free(name->bdev_name);
+			free(name->vbdev_name);
+			free(name->drv_name);
+			free(name->key);
+			free(name);
+			break;
+		}
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+/* Because we specified this function in our crypto bdev function table when we
+ * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
+ * Here we need to decide if we care about it and if so what to do. We
+ * parsed the config file at init so we check the new bdev against the list
+ * we built up at that time and if the user configured us to attach to this
+ * bdev, here's where we do it.
+ */
+static void
+vbdev_crypto_examine(struct spdk_bdev *bdev)
+{
+	struct vbdev_crypto *crypto_bdev, *tmp;
+	int rc;
+
+	vbdev_crypto_claim(bdev);
+
+	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+		if (strcmp(crypto_bdev->base_bdev->name, bdev->name) == 0) {
+			rc = spdk_vbdev_register(&crypto_bdev->crypto_bdev,
+						 &crypto_bdev->base_bdev, 1);
+			if (rc) {
+				SPDK_ERRLOG("could not register crypto_bdev\n");
+				spdk_bdev_close(crypto_bdev->base_desc);
+				TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+				free(crypto_bdev->crypto_bdev.name);
+				free(crypto_bdev->key);
+				free(crypto_bdev);
+			}
+			break;
+		}
+	}
+
+	spdk_bdev_module_examine_done(&crypto_if);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_VBDEV_crypto)
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto.h b/src/spdk/lib/bdev/crypto/vbdev_crypto.h
new file mode 100644
index 00000000..c8ef8d16
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto.h
@@ -0,0 +1,66 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_CRYPTO_H
+#define SPDK_VBDEV_CRYPTO_H
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_crypto_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new crypto bdev.
+ *
+ * \param bdev_name Bdev on which crypto vbdev will be created.
+ * \param bdev_name Vbdev name crypto_pmd key
+ * \return 0 on success, other on failure.
+ */
+int create_crypto_disk(const char *bdev_name, const char *vbdev_name,
+		       const char *crypto_pmd, const char *key);
+
+/**
+ * Delete crypto bdev.
+ *
+ * \param bdev Pointer to crypto bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
+			void *cb_arg);
+
+#endif /* SPDK_VBDEV_CRYPTO_H */
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c b/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c
new file mode 100644
index 00000000..cbf5a3b8
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c
@@ -0,0 +1,163 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_crypto.h"
+
+/* Structure to hold the parameters for this RPC method. */
+struct rpc_construct_crypto {
+	char *base_bdev_name;
+	char *name;
+	char *crypto_pmd;
+	char *key;
+};
+
+/* Free the allocated memory resource after the RPC handling. */
+static void
+free_rpc_construct_crypto(struct rpc_construct_crypto *r)
+{
+	free(r->base_bdev_name);
+	free(r->name);
+	free(r->crypto_pmd);
+	free(r->key);
+}
+
+/* Structure to decode the input parameters for this RPC method. */
+static const struct spdk_json_object_decoder rpc_construct_crypto_decoders[] = {
+	{"base_bdev_name", offsetof(struct rpc_construct_crypto, base_bdev_name), spdk_json_decode_string},
+	{"name", offsetof(struct rpc_construct_crypto, name), spdk_json_decode_string},
+	{"crypto_pmd", offsetof(struct rpc_construct_crypto, crypto_pmd), spdk_json_decode_string},
+	{"key", offsetof(struct rpc_construct_crypto, key), spdk_json_decode_string},
+};
+
+/* Decode the parameters for this RPC method and properly construct the crypto
+ * device. Error status returned in the failed cases.
+ */
+static void
+spdk_rpc_construct_crypto_bdev(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_construct_crypto req = {NULL};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_crypto_decoders,
+				    SPDK_COUNTOF(rpc_construct_crypto_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_crypto, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	rc = create_crypto_disk(req.base_bdev_name, req.name,
+				req.crypto_pmd, req.key);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_crypto(&req);
+		return;
+	}
+
+	spdk_json_write_string(w, req.name);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_construct_crypto(&req);
+	return;
+
+invalid:
+	free_rpc_construct_crypto(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_crypto_bdev", spdk_rpc_construct_crypto_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_crypto {
+	char *name;
+};
+
+static void
+free_rpc_delete_crypto(struct rpc_delete_crypto *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_crypto_decoders[] = {
+	{"name", offsetof(struct rpc_delete_crypto, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_crypto_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_crypto_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_delete_crypto req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_crypto_decoders,
+				    SPDK_COUNTOF(rpc_delete_crypto_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_crypto_disk(bdev, _spdk_rpc_delete_crypto_bdev_cb, request);
+
+	free_rpc_delete_crypto(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_crypto(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_crypto_bdev", spdk_rpc_delete_crypto_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/error/Makefile b/src/spdk/lib/bdev/error/Makefile
new file mode 100644
index 00000000..9dcee8bd
--- /dev/null
+++ b/src/spdk/lib/bdev/error/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_error.c vbdev_error_rpc.c
+LIBNAME = vbdev_error
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/error/vbdev_error.c b/src/spdk/lib/bdev/error/vbdev_error.c
new file mode 100644
index 00000000..4bab9426
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error.c
@@ -0,0 +1,513 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a module for test purpose which will simulate error cases for bdev.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/rpc.h"
+#include "spdk/conf.h"
+#include "spdk/util.h"
+#include "spdk/endian.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "vbdev_error.h"
+
+struct spdk_vbdev_error_config {
+	char *base_bdev;
+	TAILQ_ENTRY(spdk_vbdev_error_config) tailq;
+};
+
+static TAILQ_HEAD(, spdk_vbdev_error_config) g_error_config
+	= TAILQ_HEAD_INITIALIZER(g_error_config);
+
+struct vbdev_error_info {
+	bool				enabled;
+	uint32_t			error_type;
+	uint32_t			error_num;
+};
+
+/* Context for each error bdev */
+struct error_disk {
+	struct spdk_bdev_part		part;
+	struct vbdev_error_info		error_vector[SPDK_BDEV_IO_TYPE_RESET];
+	TAILQ_HEAD(, spdk_bdev_io)	pending_ios;
+};
+
+struct error_channel {
+	struct spdk_bdev_part_channel	part_ch;
+};
+
+static pthread_mutex_t g_vbdev_error_mutex = PTHREAD_MUTEX_INITIALIZER;
+static SPDK_BDEV_PART_TAILQ g_error_disks = TAILQ_HEAD_INITIALIZER(g_error_disks);
+
+static int vbdev_error_init(void);
+static void vbdev_error_fini(void);
+
+static void vbdev_error_examine(struct spdk_bdev *bdev);
+static int vbdev_error_config_json(struct spdk_json_write_ctx *w);
+
+static int vbdev_error_config_add(const char *base_bdev_name);
+static int vbdev_error_config_remove(const char *base_bdev_name);
+
+static struct spdk_bdev_module error_if = {
+	.name = "error",
+	.module_init = vbdev_error_init,
+	.module_fini = vbdev_error_fini,
+	.examine_config = vbdev_error_examine,
+	.config_json = vbdev_error_config_json,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&error_if)
+
+int
+spdk_vbdev_inject_error(char *name, uint32_t io_type, uint32_t error_type, uint32_t error_num)
+{
+	struct spdk_bdev *bdev;
+	struct spdk_bdev_part *part;
+	struct error_disk *error_disk = NULL;
+	uint32_t i;
+
+	pthread_mutex_lock(&g_vbdev_error_mutex);
+	bdev = spdk_bdev_get_by_name(name);
+	if (!bdev) {
+		SPDK_ERRLOG("Could not find ErrorInjection bdev %s\n", name);
+		pthread_mutex_unlock(&g_vbdev_error_mutex);
+		return -1;
+	}
+
+	TAILQ_FOREACH(part, &g_error_disks, tailq) {
+		if (bdev == spdk_bdev_part_get_bdev(part)) {
+			error_disk = (struct error_disk *)part;
+			break;
+		}
+	}
+
+	if (error_disk == NULL) {
+		SPDK_ERRLOG("Could not find ErrorInjection bdev %s\n", name);
+		pthread_mutex_unlock(&g_vbdev_error_mutex);
+		return -1;
+	}
+
+	if (0xffffffff == io_type) {
+		for (i = 0; i < SPDK_COUNTOF(error_disk->error_vector); i++) {
+			error_disk->error_vector[i].enabled = true;
+			error_disk->error_vector[i].error_type = error_type;
+			error_disk->error_vector[i].error_num = error_num;
+		}
+	} else if (0 == io_type) {
+		for (i = 0; i < SPDK_COUNTOF(error_disk->error_vector); i++) {
+			error_disk->error_vector[i].enabled = false;
+			error_disk->error_vector[i].error_num = 0;
+		}
+	} else {
+		error_disk->error_vector[io_type].enabled = true;
+		error_disk->error_vector[io_type].error_type = error_type;
+		error_disk->error_vector[io_type].error_num = error_num;
+	}
+	pthread_mutex_unlock(&g_vbdev_error_mutex);
+	return 0;
+}
+
+static void
+vbdev_error_reset(struct error_disk *error_disk, struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev_io *pending_io, *tmp;
+
+	TAILQ_FOREACH_SAFE(pending_io, &error_disk->pending_ios, module_link, tmp) {
+		TAILQ_REMOVE(&error_disk->pending_ios, pending_io, module_link);
+		spdk_bdev_io_complete(pending_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+	spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static uint32_t
+vbdev_error_get_error_type(struct error_disk *error_disk, uint32_t io_type)
+{
+	if (error_disk->error_vector[io_type].enabled &&
+	    error_disk->error_vector[io_type].error_num) {
+		return error_disk->error_vector[io_type].error_type;
+	}
+	return 0;
+}
+
+static void
+vbdev_error_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+	struct error_channel *ch = spdk_io_channel_get_ctx(_ch);
+	struct error_disk *error_disk = bdev_io->bdev->ctxt;
+	uint32_t error_type;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		vbdev_error_reset(error_disk, bdev_io);
+		return;
+	default:
+		SPDK_ERRLOG("Error Injection: unknown I/O type %d\n", bdev_io->type);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	error_type = vbdev_error_get_error_type(error_disk, bdev_io->type);
+	if (error_type == 0) {
+		int rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+
+		if (rc) {
+			SPDK_ERRLOG("bdev_error: submit request failed, rc=%d\n", rc);
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+		return;
+	} else if (error_type == VBDEV_IO_FAILURE) {
+		error_disk->error_vector[bdev_io->type].error_num--;
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	} else if (error_type == VBDEV_IO_PENDING) {
+		TAILQ_INSERT_TAIL(&error_disk->pending_ios, bdev_io, module_link);
+		error_disk->error_vector[bdev_io->type].error_num--;
+	}
+}
+
+static int
+vbdev_error_destruct(void *ctx)
+{
+	struct error_disk *error_disk = ctx;
+	struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(&error_disk->part);
+	int rc;
+
+	rc = vbdev_error_config_remove(base_bdev->name);
+	if (rc != 0) {
+		SPDK_ERRLOG("vbdev_error_config_remove() failed\n");
+	}
+
+	return spdk_bdev_part_free(&error_disk->part);
+}
+
+static int
+vbdev_error_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct error_disk *error_disk = ctx;
+	struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(&error_disk->part);
+
+	spdk_json_write_name(w, "error_disk");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "base_bdev");
+	spdk_json_write_string(w, base_bdev->name);
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+vbdev_error_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* No config per bdev. */
+}
+
+
+static struct spdk_bdev_fn_table vbdev_error_fn_table = {
+	.destruct		= vbdev_error_destruct,
+	.submit_request		= vbdev_error_submit_request,
+	.dump_info_json		= vbdev_error_dump_info_json,
+	.write_config_json	= vbdev_error_write_config_json
+};
+
+static void
+spdk_vbdev_error_base_bdev_hotremove_cb(void *_base_bdev)
+{
+	spdk_bdev_part_base_hotremove(_base_bdev, &g_error_disks);
+}
+
+static int
+_spdk_vbdev_error_create(struct spdk_bdev *base_bdev)
+{
+	struct spdk_bdev_part_base *base = NULL;
+	struct error_disk *disk = NULL;
+	char *name;
+	int rc;
+
+	base = spdk_bdev_part_base_construct(base_bdev,
+					     spdk_vbdev_error_base_bdev_hotremove_cb,
+					     &error_if, &vbdev_error_fn_table, &g_error_disks,
+					     NULL, NULL, sizeof(struct error_channel),
+					     NULL, NULL);
+	if (!base) {
+		SPDK_ERRLOG("could not construct part base for bdev %s\n", spdk_bdev_get_name(base_bdev));
+		return -ENOMEM;
+	}
+
+	disk = calloc(1, sizeof(*disk));
+	if (!disk) {
+		SPDK_ERRLOG("Memory allocation failure\n");
+		spdk_bdev_part_base_free(base);
+		return -ENOMEM;
+	}
+
+	name = spdk_sprintf_alloc("EE_%s", spdk_bdev_get_name(base_bdev));
+	if (!name) {
+		SPDK_ERRLOG("name allocation failure\n");
+		spdk_bdev_part_base_free(base);
+		free(disk);
+		return -ENOMEM;
+	}
+
+	rc = spdk_bdev_part_construct(&disk->part, base, name, 0, base_bdev->blockcnt,
+				      "Error Injection Disk");
+	free(name);
+	if (rc) {
+		SPDK_ERRLOG("could not construct part for bdev %s\n", spdk_bdev_get_name(base_bdev));
+		/* spdk_bdev_part_construct will free name on failure */
+		spdk_bdev_part_base_free(base);
+		free(disk);
+		return rc;
+	}
+
+	TAILQ_INIT(&disk->pending_ios);
+
+	return 0;
+}
+
+int
+spdk_vbdev_error_create(const char *base_bdev_name)
+{
+	int rc;
+	struct spdk_bdev *base_bdev;
+
+	rc = vbdev_error_config_add(base_bdev_name);
+	if (rc != 0) {
+		SPDK_ERRLOG("Adding config for ErrorInjection bdev %s failed (rc=%d)\n",
+			    base_bdev_name, rc);
+		return rc;
+	}
+
+	base_bdev = spdk_bdev_get_by_name(base_bdev_name);
+	if (!base_bdev) {
+		return 0;
+	}
+
+	rc = _spdk_vbdev_error_create(base_bdev);
+	if (rc != 0) {
+		vbdev_error_config_remove(base_bdev_name);
+		SPDK_ERRLOG("Could not create ErrorInjection bdev %s (rc=%d)\n",
+			    base_bdev_name, rc);
+	}
+
+	return rc;
+}
+
+void
+spdk_vbdev_error_delete(struct spdk_bdev *vbdev, spdk_delete_error_complete cb_fn, void *cb_arg)
+{
+	if (!vbdev || vbdev->module != &error_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(vbdev, cb_fn, cb_arg);
+}
+
+static void
+vbdev_error_clear_config(void)
+{
+	struct spdk_vbdev_error_config *cfg;
+
+	while ((cfg = TAILQ_FIRST(&g_error_config))) {
+		TAILQ_REMOVE(&g_error_config, cfg, tailq);
+		free(cfg->base_bdev);
+		free(cfg);
+	}
+}
+
+static struct spdk_vbdev_error_config *
+vbdev_error_config_find_by_base_name(const char *base_bdev_name)
+{
+	struct spdk_vbdev_error_config *cfg;
+
+	TAILQ_FOREACH(cfg, &g_error_config, tailq) {
+		if (strcmp(cfg->base_bdev, base_bdev_name) == 0) {
+			return cfg;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+vbdev_error_config_add(const char *base_bdev_name)
+{
+	struct spdk_vbdev_error_config *cfg;
+
+	cfg = vbdev_error_config_find_by_base_name(base_bdev_name);
+	if (cfg) {
+		SPDK_ERRLOG("vbdev_error_config for bdev %s already exists\n",
+			    base_bdev_name);
+		return -EEXIST;
+	}
+
+	cfg = calloc(1, sizeof(*cfg));
+	if (!cfg) {
+		SPDK_ERRLOG("calloc() failed for vbdev_error_config\n");
+		return -ENOMEM;
+	}
+
+	cfg->base_bdev = strdup(base_bdev_name);
+	if (!cfg->base_bdev) {
+		free(cfg);
+		SPDK_ERRLOG("strdup() failed for base_bdev_name\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_TAIL(&g_error_config, cfg, tailq);
+
+	return 0;
+}
+
+static int
+vbdev_error_config_remove(const char *base_bdev_name)
+{
+	struct spdk_vbdev_error_config *cfg;
+
+	cfg = vbdev_error_config_find_by_base_name(base_bdev_name);
+	if (!cfg) {
+		return -ENOENT;
+	}
+
+	TAILQ_REMOVE(&g_error_config, cfg, tailq);
+	free(cfg->base_bdev);
+	free(cfg);
+	return 0;
+}
+
+static int
+vbdev_error_init(void)
+{
+	struct spdk_conf_section *sp;
+	struct spdk_vbdev_error_config *cfg;
+	const char *base_bdev_name;
+	int i, rc;
+
+	sp = spdk_conf_find_section(NULL, "BdevError");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	for (i = 0; ; i++) {
+		if (!spdk_conf_section_get_nval(sp, "BdevError", i)) {
+			break;
+		}
+
+		base_bdev_name = spdk_conf_section_get_nmval(sp, "BdevError", i, 0);
+		if (!base_bdev_name) {
+			SPDK_ERRLOG("ErrorInjection configuration missing bdev name\n");
+			rc = -EINVAL;
+			goto error;
+		}
+
+		cfg = calloc(1, sizeof(*cfg));
+		if (!cfg) {
+			SPDK_ERRLOG("calloc() failed for vbdev_error_config\n");
+			rc = -ENOMEM;
+			goto error;
+		}
+
+		cfg->base_bdev = strdup(base_bdev_name);
+		if (!cfg->base_bdev) {
+			free(cfg);
+			SPDK_ERRLOG("strdup() failed for bdev name\n");
+			rc = -ENOMEM;
+			goto error;
+		}
+
+		TAILQ_INSERT_TAIL(&g_error_config, cfg, tailq);
+	}
+
+	return 0;
+
+error:
+	vbdev_error_clear_config();
+	return rc;
+}
+
+static void
+vbdev_error_fini(void)
+{
+	vbdev_error_clear_config();
+}
+
+static void
+vbdev_error_examine(struct spdk_bdev *bdev)
+{
+	struct spdk_vbdev_error_config *cfg;
+	int rc;
+
+	cfg = vbdev_error_config_find_by_base_name(bdev->name);
+	if (cfg != NULL) {
+		rc = _spdk_vbdev_error_create(bdev);
+		if (rc != 0) {
+			SPDK_ERRLOG("could not create error vbdev for bdev %s at examine\n",
+				    bdev->name);
+		}
+	}
+
+	spdk_bdev_module_examine_done(&error_if);
+}
+
+static int
+vbdev_error_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_vbdev_error_config *cfg;
+
+	TAILQ_FOREACH(cfg, &g_error_config, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "method", "construct_error_bdev");
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "base_name", cfg->base_bdev);
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/bdev/error/vbdev_error.h b/src/spdk/lib/bdev/error/vbdev_error.h
new file mode 100644
index 00000000..4ff1ac19
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_ERROR_H
+#define SPDK_VBDEV_ERROR_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+
+enum vbdev_error_type {
+	VBDEV_IO_FAILURE = 1,
+	VBDEV_IO_PENDING,
+};
+
+typedef void (*spdk_delete_error_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create a vbdev on the base bdev to inject error into it.
+ *
+ * \param base_bdev_name Name of the base bdev.
+ * \return 0 on success or negative on failure.
+ */
+int spdk_vbdev_error_create(const char *base_bdev_name);
+
+/**
+ * Delete vbdev used to inject errors.
+ *
+ * \param bdev Pointer to error vbdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Arguments to pass to cb_fn.
+ */
+void spdk_vbdev_error_delete(struct spdk_bdev *vbdev, spdk_delete_error_complete cb_fn,
+			     void *cb_arg);
+
+/**
+ * Inject error to the base bdev. Users can specify which IO type error is injected,
+ * what type of error is injected, and how many errors are injected.
+ *
+ * \param name Name of the base bdev into which error is injected.
+ * \param io_type IO type into which error is injected.
+ * \param error_num Count of injected errors
+ */
+int spdk_vbdev_inject_error(char *name, uint32_t io_type, uint32_t error_type,
+			    uint32_t error_num);
+
+#endif // SPDK_VBDEV_ERROR_H
diff --git a/src/spdk/lib/bdev/error/vbdev_error_rpc.c b/src/spdk/lib/bdev/error/vbdev_error_rpc.c
new file mode 100644
index 00000000..8d95fd09
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error_rpc.c
@@ -0,0 +1,258 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+#include "vbdev_error.h"
+
+#define ERROR_BDEV_IO_TYPE_INVALID (SPDK_BDEV_IO_TYPE_RESET + 1)
+#define ERROR_BDEV_ERROR_TYPE_INVALID (VBDEV_IO_PENDING + 1)
+
+static uint32_t
+spdk_rpc_error_bdev_io_type_parse(char *name)
+{
+	if (strcmp(name, "read") == 0) {
+		return SPDK_BDEV_IO_TYPE_READ;
+	} else if (strcmp(name, "write") == 0) {
+		return SPDK_BDEV_IO_TYPE_WRITE;
+	} else if (strcmp(name, "flush") == 0) {
+		return SPDK_BDEV_IO_TYPE_FLUSH;
+	} else if (strcmp(name, "unmap") == 0) {
+		return SPDK_BDEV_IO_TYPE_UNMAP;
+	} else if (strcmp(name, "all") == 0) {
+		return 0xffffffff;
+	} else if (strcmp(name, "clear") == 0) {
+		return 0;
+	}
+	return ERROR_BDEV_IO_TYPE_INVALID;
+}
+
+static uint32_t
+spdk_rpc_error_bdev_error_type_parse(char *name)
+{
+	if (strcmp(name, "failure") == 0) {
+		return VBDEV_IO_FAILURE;
+	} else if (strcmp(name, "pending") == 0) {
+		return VBDEV_IO_PENDING;
+	}
+	return ERROR_BDEV_ERROR_TYPE_INVALID;
+}
+
+struct rpc_construct_error_bdev {
+	char *base_name;
+};
+
+static void
+free_rpc_construct_error_bdev(struct rpc_construct_error_bdev *req)
+{
+	free(req->base_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_error_bdev_decoders[] = {
+	{"base_name", offsetof(struct rpc_construct_error_bdev, base_name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_construct_error_bdev(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_construct_error_bdev req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_construct_error_bdev_decoders,
+				    SPDK_COUNTOF(rpc_construct_error_bdev_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (spdk_vbdev_error_create(req.base_name)) {
+		SPDK_ERRLOG("Could not create ErrorInjection bdev %s\n", req.base_name);
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_error_bdev(&req);
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+
+	free_rpc_construct_error_bdev(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_construct_error_bdev(&req);
+}
+SPDK_RPC_REGISTER("construct_error_bdev", spdk_rpc_construct_error_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_error {
+	char *name;
+};
+
+static void
+free_rpc_delete_error(struct rpc_delete_error *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_error_decoders[] = {
+	{"name", offsetof(struct rpc_delete_error, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_error_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_error_bdev(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_delete_error req = {NULL};
+	struct spdk_bdev *vbdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_error_decoders,
+				    SPDK_COUNTOF(rpc_delete_error_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	vbdev = spdk_bdev_get_by_name(req.name);
+	if (vbdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	spdk_vbdev_error_delete(vbdev, _spdk_rpc_delete_error_bdev_cb, request);
+
+	free_rpc_delete_error(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_error(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_error_bdev", spdk_rpc_delete_error_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_error_information {
+	char *name;
+	char *io_type;
+	char *error_type;
+	uint32_t num;
+};
+
+static const struct spdk_json_object_decoder rpc_error_information_decoders[] = {
+	{"name", offsetof(struct rpc_error_information, name), spdk_json_decode_string},
+	{"io_type", offsetof(struct rpc_error_information, io_type), spdk_json_decode_string},
+	{"error_type", offsetof(struct rpc_error_information, error_type), spdk_json_decode_string},
+	{"num", offsetof(struct rpc_error_information, num), spdk_json_decode_uint32, true},
+};
+
+static void
+free_rpc_error_information(struct rpc_error_information *p)
+{
+	free(p->name);
+	free(p->io_type);
+	free(p->error_type);
+}
+
+static void
+spdk_rpc_bdev_inject_error(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_error_information req = {};
+	struct spdk_json_write_ctx *w;
+	uint32_t io_type;
+	uint32_t error_type;
+	int ret;
+
+	if (spdk_json_decode_object(params, rpc_error_information_decoders,
+				    SPDK_COUNTOF(rpc_error_information_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	io_type = spdk_rpc_error_bdev_io_type_parse(req.io_type);
+	if (io_type == ERROR_BDEV_IO_TYPE_INVALID) {
+		goto invalid;
+	}
+
+	error_type = spdk_rpc_error_bdev_error_type_parse(req.error_type);
+	if (error_type == ERROR_BDEV_ERROR_TYPE_INVALID) {
+		goto invalid;
+	}
+
+	ret = spdk_vbdev_inject_error(req.name, io_type, error_type, req.num);
+	if (ret) {
+		goto invalid;
+	}
+
+	free_rpc_error_information(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_error_information(&req);
+}
+SPDK_RPC_REGISTER("bdev_inject_error", spdk_rpc_bdev_inject_error, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/gpt/Makefile b/src/spdk/lib/bdev/gpt/Makefile
new file mode 100644
index 00000000..6806c647
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = gpt.c vbdev_gpt.c
+LIBNAME = vbdev_gpt
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/gpt/gpt.c b/src/spdk/lib/bdev/gpt/gpt.c
new file mode 100644
index 00000000..0e830cdd
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/gpt.c
@@ -0,0 +1,239 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gpt.h"
+
+#include "spdk/crc32.h"
+#include "spdk/endian.h"
+#include "spdk/event.h"
+
+#include "spdk_internal/log.h"
+
+#define GPT_PRIMARY_PARTITION_TABLE_LBA 0x1
+#define PRIMARY_PARTITION_NUMBER 4
+#define GPT_PROTECTIVE_MBR 1
+#define SPDK_MAX_NUM_PARTITION_ENTRIES 128
+
+static int
+spdk_gpt_read_partitions(struct spdk_gpt *gpt)
+{
+	uint32_t total_partition_size, num_partition_entries, partition_entry_size;
+	uint64_t partition_start_lba;
+	struct spdk_gpt_header *head = gpt->header;
+	uint32_t crc32;
+
+	num_partition_entries = from_le32(&head->num_partition_entries);
+	if (num_partition_entries > SPDK_MAX_NUM_PARTITION_ENTRIES) {
+		SPDK_ERRLOG("Num_partition_entries=%u which exceeds max=%u\n",
+			    num_partition_entries, SPDK_MAX_NUM_PARTITION_ENTRIES);
+		return -1;
+	}
+
+	partition_entry_size = from_le32(&head->size_of_partition_entry);
+	if (partition_entry_size != sizeof(struct spdk_gpt_partition_entry)) {
+		SPDK_ERRLOG("Partition_entry_size(%x) != expected(%lx)\n",
+			    partition_entry_size, sizeof(struct spdk_gpt_partition_entry));
+		return -1;
+	}
+
+	total_partition_size = num_partition_entries * partition_entry_size;
+	partition_start_lba = from_le64(&head->partition_entry_lba);
+	if ((total_partition_size + partition_start_lba * gpt->sector_size) > SPDK_GPT_BUFFER_SIZE) {
+		SPDK_ERRLOG("Buffer size is not enough\n");
+		return -1;
+	}
+
+	gpt->partitions = (struct spdk_gpt_partition_entry *)(gpt->buf +
+			  partition_start_lba * gpt->sector_size);
+
+	crc32 = spdk_crc32_ieee_update(gpt->partitions, total_partition_size, ~0);
+	crc32 ^= ~0;
+
+	if (crc32 != from_le32(&head->partition_entry_array_crc32)) {
+		SPDK_ERRLOG("GPT partition entry array crc32 did not match\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_gpt_lba_range_check(struct spdk_gpt_header *head, uint64_t lba_end)
+{
+	uint64_t usable_lba_start, usable_lba_end;
+
+	usable_lba_start = from_le64(&head->first_usable_lba);
+	usable_lba_end = from_le64(&head->last_usable_lba);
+
+	if (usable_lba_end < usable_lba_start) {
+		SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") < usable_lba_start(%" PRIu64 ")\n",
+			    usable_lba_end, usable_lba_start);
+		return -1;
+	}
+
+	if (usable_lba_end > lba_end) {
+		SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") > lba_end(%" PRIu64 ")\n",
+			    usable_lba_end, lba_end);
+		return -1;
+	}
+
+	if ((usable_lba_start < GPT_PRIMARY_PARTITION_TABLE_LBA) &&
+	    (GPT_PRIMARY_PARTITION_TABLE_LBA < usable_lba_end)) {
+		SPDK_ERRLOG("Head lba is not in the usable range\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_gpt_read_header(struct spdk_gpt *gpt)
+{
+	uint32_t head_size;
+	uint32_t new_crc, original_crc;
+	struct spdk_gpt_header *head;
+
+	head = (struct spdk_gpt_header *)(gpt->buf + GPT_PRIMARY_PARTITION_TABLE_LBA * gpt->sector_size);
+	head_size = from_le32(&head->header_size);
+	if (head_size < sizeof(*head) || head_size > gpt->sector_size) {
+		SPDK_ERRLOG("head_size=%u\n", head_size);
+		return -1;
+	}
+
+	original_crc = from_le32(&head->header_crc32);
+	head->header_crc32 = 0;
+	new_crc = spdk_crc32_ieee_update(head, from_le32(&head->header_size), ~0);
+	new_crc ^= ~0;
+	/* restore header crc32 */
+	to_le32(&head->header_crc32, original_crc);
+
+	if (new_crc != original_crc) {
+		SPDK_ERRLOG("head crc32 does not match, provided=%u, caculated=%u\n",
+			    original_crc, new_crc);
+		return -1;
+	}
+
+	if (memcmp(SPDK_GPT_SIGNATURE, head->gpt_signature,
+		   sizeof(head->gpt_signature))) {
+		SPDK_ERRLOG("signature did not match\n");
+		return -1;
+	}
+
+	if (spdk_gpt_lba_range_check(head, gpt->lba_end)) {
+		SPDK_ERRLOG("lba range check error\n");
+		return -1;
+	}
+
+	gpt->header = head;
+	return 0;
+}
+
+static int
+spdk_gpt_check_mbr(struct spdk_gpt *gpt)
+{
+	int i, primary_partition = 0;
+	uint32_t total_lba_size = 0, ret = 0, expected_start_lba;
+	struct spdk_mbr *mbr;
+
+	mbr = (struct spdk_mbr *)gpt->buf;
+	if (from_le16(&mbr->mbr_signature) != SPDK_MBR_SIGNATURE) {
+		SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Signature mismatch, provided=%x,"
+			      "expected=%x\n", from_le16(&mbr->disk_signature),
+			      SPDK_MBR_SIGNATURE);
+		return -1;
+	}
+
+	for (i = 0; i < PRIMARY_PARTITION_NUMBER; i++) {
+		if (mbr->partitions[i].os_type == SPDK_MBR_OS_TYPE_GPT_PROTECTIVE) {
+			primary_partition = i;
+			ret = GPT_PROTECTIVE_MBR;
+			break;
+		}
+	}
+
+	if (ret == GPT_PROTECTIVE_MBR) {
+		expected_start_lba = GPT_PRIMARY_PARTITION_TABLE_LBA;
+		if (from_le32(&mbr->partitions[primary_partition].start_lba) != expected_start_lba) {
+			SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "start lba mismatch, provided=%u, expected=%u\n",
+				      from_le32(&mbr->partitions[primary_partition].start_lba),
+				      expected_start_lba);
+			return -1;
+		}
+
+		total_lba_size = from_le32(&mbr->partitions[primary_partition].size_lba);
+		if ((total_lba_size != ((uint32_t) gpt->total_sectors - 1)) &&
+		    (total_lba_size != 0xFFFFFFFF)) {
+			SPDK_ERRLOG("GPT Primary MBR size does not equal: (record_size %u != actual_size %u)!\n",
+				    total_lba_size, (uint32_t) gpt->total_sectors - 1);
+			return -1;
+		}
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Currently only support GPT Protective MBR format\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+spdk_gpt_parse(struct spdk_gpt *gpt)
+{
+	int rc;
+
+	if (!gpt || !gpt->buf) {
+		SPDK_ERRLOG("Gpt and the related buffer should not be NULL\n");
+		return -1;
+	}
+
+	rc = spdk_gpt_check_mbr(gpt);
+	if (rc) {
+		SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Failed to detect gpt in MBR\n");
+		return rc;
+	}
+
+	rc = spdk_gpt_read_header(gpt);
+	if (rc) {
+		SPDK_ERRLOG("Failed to read gpt header\n");
+		return rc;
+	}
+
+	rc = spdk_gpt_read_partitions(gpt);
+	if (rc) {
+		SPDK_ERRLOG("Failed to read gpt partitions\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("gpt_parse", SPDK_LOG_GPT_PARSE)
diff --git a/src/spdk/lib/bdev/gpt/gpt.h b/src/spdk/lib/bdev/gpt/gpt.h
new file mode 100644
index 00000000..923bdc1c
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/gpt.h
@@ -0,0 +1,62 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * GPT internal Interface
+ */
+
+#ifndef SPDK_INTERNAL_GPT_H
+#define SPDK_INTERNAL_GPT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/gpt_spec.h"
+
+#define SPDK_GPT_PART_TYPE_GUID SPDK_GPT_GUID(0x7c5222bd, 0x8f5d, 0x4087, 0x9c00, 0xbf9843c7b58c)
+#define SPDK_GPT_BUFFER_SIZE 32768  /* 32KB */
+#define	SPDK_GPT_GUID_EQUAL(x,y) (memcmp(x, y, sizeof(struct spdk_gpt_guid)) == 0)
+
+struct spdk_gpt {
+	unsigned char *buf;
+	uint64_t buf_size;
+	uint64_t lba_start;
+	uint64_t lba_end;
+	uint64_t total_sectors;
+	uint32_t sector_size;
+	struct spdk_gpt_header *header;
+	struct spdk_gpt_partition_entry *partitions;
+};
+
+int spdk_gpt_parse(struct spdk_gpt *gpt);
+
+#endif  /* SPDK_INTERNAL_GPT_H */
diff --git a/src/spdk/lib/bdev/gpt/vbdev_gpt.c b/src/spdk/lib/bdev/gpt/vbdev_gpt.c
new file mode 100644
index 00000000..751af0ea
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/vbdev_gpt.c
@@ -0,0 +1,463 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This driver reads a GPT partition table from a bdev and exposes a virtual block device for
+ * each partition.
+ */
+
+#include "gpt.h"
+
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static int vbdev_gpt_init(void);
+static void vbdev_gpt_examine(struct spdk_bdev *bdev);
+static int vbdev_gpt_get_ctx_size(void);
+
+static struct spdk_bdev_module gpt_if = {
+	.name = "gpt",
+	.module_init = vbdev_gpt_init,
+	.get_ctx_size = vbdev_gpt_get_ctx_size,
+	.examine_disk = vbdev_gpt_examine,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&gpt_if)
+
+/* Base block device gpt context */
+struct gpt_base {
+	struct spdk_gpt			gpt;
+	struct spdk_bdev_part_base	*part_base;
+
+	/* This channel is only used for reading the partition table. */
+	struct spdk_io_channel		*ch;
+};
+
+/* Context for each gpt virtual bdev */
+struct gpt_disk {
+	struct spdk_bdev_part	part;
+	uint32_t		partition_index;
+};
+
+struct gpt_channel {
+	struct spdk_bdev_part_channel	part_ch;
+};
+
+struct gpt_io {
+	struct spdk_io_channel *ch;
+	struct spdk_bdev_io *bdev_io;
+
+	/* for bdev_io_wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static SPDK_BDEV_PART_TAILQ g_gpt_disks = TAILQ_HEAD_INITIALIZER(g_gpt_disks);
+
+static bool g_gpt_disabled;
+
+static void
+spdk_gpt_base_free(void *ctx)
+{
+	struct gpt_base *gpt_base = ctx;
+
+	spdk_dma_free(gpt_base->gpt.buf);
+	free(gpt_base);
+}
+
+static void
+spdk_gpt_base_bdev_hotremove_cb(void *_base_bdev)
+{
+	spdk_bdev_part_base_hotremove(_base_bdev, &g_gpt_disks);
+}
+
+static int vbdev_gpt_destruct(void *ctx);
+static void vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
+static int vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w);
+
+static struct spdk_bdev_fn_table vbdev_gpt_fn_table = {
+	.destruct		= vbdev_gpt_destruct,
+	.submit_request		= vbdev_gpt_submit_request,
+	.dump_info_json		= vbdev_gpt_dump_info_json,
+};
+
+static struct gpt_base *
+spdk_gpt_base_bdev_init(struct spdk_bdev *bdev)
+{
+	struct gpt_base *gpt_base;
+	struct spdk_gpt *gpt;
+
+	gpt_base = calloc(1, sizeof(*gpt_base));
+	if (!gpt_base) {
+		SPDK_ERRLOG("Cannot alloc memory for gpt_base pointer\n");
+		return NULL;
+	}
+
+	gpt_base->part_base = spdk_bdev_part_base_construct(bdev,
+			      spdk_gpt_base_bdev_hotremove_cb,
+			      &gpt_if, &vbdev_gpt_fn_table,
+			      &g_gpt_disks, spdk_gpt_base_free, gpt_base,
+			      sizeof(struct gpt_channel), NULL, NULL);
+	if (!gpt_base->part_base) {
+		free(gpt_base);
+		SPDK_ERRLOG("cannot construct gpt_base");
+		return NULL;
+	}
+
+	gpt = &gpt_base->gpt;
+	gpt->buf_size = spdk_max(SPDK_GPT_BUFFER_SIZE, bdev->blocklen);
+	gpt->buf = spdk_dma_zmalloc(gpt->buf_size, spdk_bdev_get_buf_align(bdev), NULL);
+	if (!gpt->buf) {
+		SPDK_ERRLOG("Cannot alloc buf\n");
+		spdk_bdev_part_base_free(gpt_base->part_base);
+		return NULL;
+	}
+
+	gpt->sector_size = bdev->blocklen;
+	gpt->total_sectors = bdev->blockcnt;
+	gpt->lba_start = 0;
+	gpt->lba_end = gpt->total_sectors - 1;
+
+	return gpt_base;
+}
+
+static int
+vbdev_gpt_destruct(void *ctx)
+{
+	struct gpt_disk *gpt_disk = ctx;
+
+	return spdk_bdev_part_free(&gpt_disk->part);
+}
+
+static void
+vbdev_gpt_resubmit_request(void *arg)
+{
+	struct gpt_io *io = (struct gpt_io *)arg;
+
+	vbdev_gpt_submit_request(io->ch, io->bdev_io);
+}
+
+static void
+vbdev_gpt_queue_io(struct gpt_io *io)
+{
+	int rc;
+
+	io->bdev_io_wait.bdev = io->bdev_io->bdev;
+	io->bdev_io_wait.cb_fn = vbdev_gpt_resubmit_request;
+	io->bdev_io_wait.cb_arg = io;
+
+	rc = spdk_bdev_queue_io_wait(io->bdev_io->bdev,
+				     io->ch, &io->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in vbdev_gpt_queue_io, rc=%d.\n", rc);
+		spdk_bdev_io_complete(io->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static void
+vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+	struct gpt_channel *ch = spdk_io_channel_get_ctx(_ch);
+	struct gpt_io *io = (struct gpt_io *)bdev_io->driver_ctx;
+	int rc;
+
+	rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+	if (rc) {
+		if (rc == -ENOMEM) {
+			SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "gpt: no memory, queue io\n");
+			io->ch = _ch;
+			io->bdev_io = bdev_io;
+			vbdev_gpt_queue_io(io);
+		} else {
+			SPDK_ERRLOG("gpt: error on bdev_io submission, rc=%d.\n", rc);
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+static void
+write_guid(struct spdk_json_write_ctx *w, const struct spdk_gpt_guid *guid)
+{
+	spdk_json_write_string_fmt(w, "%08x-%04x-%04x-%04x-%04x%08x",
+				   from_le32(&guid->raw[0]),
+				   from_le16(&guid->raw[4]),
+				   from_le16(&guid->raw[6]),
+				   from_be16(&guid->raw[8]),
+				   from_be16(&guid->raw[10]),
+				   from_be32(&guid->raw[12]));
+}
+
+static void
+write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *str, size_t max_len)
+{
+	size_t len;
+	const uint16_t *p;
+
+	for (len = 0, p = str; len < max_len && *p; p++) {
+		len++;
+	}
+
+	spdk_json_write_string_utf16le_raw(w, str, len);
+}
+
+static int
+vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct gpt_disk *gpt_disk = SPDK_CONTAINEROF(ctx, struct gpt_disk, part);
+	struct spdk_bdev_part_base *base_bdev = spdk_bdev_part_get_base(&gpt_disk->part);
+	struct gpt_base *gpt_base = spdk_bdev_part_base_get_ctx(base_bdev);
+	struct spdk_bdev *part_base_bdev = spdk_bdev_part_base_get_bdev(base_bdev);
+	struct spdk_gpt *gpt = &gpt_base->gpt;
+	struct spdk_gpt_partition_entry *gpt_entry = &gpt->partitions[gpt_disk->partition_index];
+	uint64_t offset_blocks = spdk_bdev_part_get_offset_blocks(&gpt_disk->part);
+
+	spdk_json_write_name(w, "gpt");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "base_bdev");
+	spdk_json_write_string(w, spdk_bdev_get_name(part_base_bdev));
+
+	spdk_json_write_name(w, "offset_blocks");
+	spdk_json_write_uint64(w, offset_blocks);
+
+	spdk_json_write_name(w, "partition_type_guid");
+	write_guid(w, &gpt_entry->part_type_guid);
+
+	spdk_json_write_name(w, "unique_partition_guid");
+	write_guid(w, &gpt_entry->unique_partition_guid);
+
+	spdk_json_write_name(w, "partition_name");
+	write_string_utf16le(w, gpt_entry->partition_name, SPDK_COUNTOF(gpt_entry->partition_name));
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static int
+vbdev_gpt_create_bdevs(struct gpt_base *gpt_base)
+{
+	uint32_t num_partition_entries;
+	uint64_t i, head_lba_start, head_lba_end;
+	uint32_t num_partitions;
+	struct spdk_gpt_partition_entry *p;
+	struct gpt_disk *d;
+	struct spdk_gpt *gpt;
+	char *name;
+	struct spdk_bdev *base_bdev;
+	int rc;
+
+	gpt = &gpt_base->gpt;
+	num_partition_entries = from_le32(&gpt->header->num_partition_entries);
+	head_lba_start = from_le64(&gpt->header->first_usable_lba);
+	head_lba_end = from_le64(&gpt->header->last_usable_lba);
+	num_partitions = 0;
+
+	for (i = 0; i < num_partition_entries; i++) {
+		p = &gpt->partitions[i];
+		uint64_t lba_start = from_le64(&p->starting_lba);
+		uint64_t lba_end = from_le64(&p->ending_lba);
+
+		if (!SPDK_GPT_GUID_EQUAL(&gpt->partitions[i].part_type_guid,
+					 &SPDK_GPT_PART_TYPE_GUID) ||
+		    lba_start == 0) {
+			continue;
+		}
+		if (lba_start < head_lba_start || lba_end > head_lba_end) {
+			continue;
+		}
+
+		d = calloc(1, sizeof(*d));
+		if (!d) {
+			SPDK_ERRLOG("Memory allocation failure\n");
+			return -1;
+		}
+
+		/* index start at 1 instead of 0 to match the existing style */
+		base_bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
+		name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1);
+		if (!name) {
+			SPDK_ERRLOG("name allocation failure\n");
+			free(d);
+			return -1;
+		}
+
+		rc = spdk_bdev_part_construct(&d->part, gpt_base->part_base, name,
+					      lba_start, lba_end - lba_start, "GPT Disk");
+		free(name);
+		if (rc) {
+			SPDK_ERRLOG("could not construct bdev part\n");
+			/* spdk_bdev_part_construct will free name on failure */
+			free(d);
+			return -1;
+		}
+		num_partitions++;
+		d->partition_index = i;
+	}
+
+	return num_partitions;
+}
+
+static void
+spdk_gpt_bdev_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg)
+{
+	struct gpt_base *gpt_base = (struct gpt_base *)arg;
+	struct spdk_bdev *bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
+	int rc, num_partitions = 0;
+
+	spdk_bdev_free_io(bdev_io);
+	spdk_put_io_channel(gpt_base->ch);
+	gpt_base->ch = NULL;
+
+	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
+		SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n",
+			    spdk_bdev_get_name(bdev), status);
+		goto end;
+	}
+
+	rc = spdk_gpt_parse(&gpt_base->gpt);
+	if (rc) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "Failed to parse gpt\n");
+		goto end;
+	}
+
+	num_partitions = vbdev_gpt_create_bdevs(gpt_base);
+	if (num_partitions < 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "Failed to split dev=%s by gpt table\n",
+			      spdk_bdev_get_name(bdev));
+	}
+
+end:
+	/*
+	 * Notify the generic bdev layer that the actions related to the original examine
+	 *  callback are now completed.
+	 */
+	spdk_bdev_module_examine_done(&gpt_if);
+
+	/*
+	 * vbdev_gpt_create_bdevs returns the number of bdevs created upon success.
+	 * We can branch on this value.
+	 */
+	if (num_partitions <= 0) {
+		/* If no gpt_disk instances were created, free the base context */
+		spdk_bdev_part_base_free(gpt_base->part_base);
+	}
+}
+
+static int
+vbdev_gpt_read_gpt(struct spdk_bdev *bdev)
+{
+	struct gpt_base *gpt_base;
+	struct spdk_bdev_desc *part_base_desc;
+	int rc;
+
+	gpt_base = spdk_gpt_base_bdev_init(bdev);
+	if (!gpt_base) {
+		SPDK_ERRLOG("Cannot allocated gpt_base\n");
+		return -1;
+	}
+
+	part_base_desc = spdk_bdev_part_base_get_desc(gpt_base->part_base);
+	gpt_base->ch = spdk_bdev_get_io_channel(part_base_desc);
+	if (gpt_base->ch == NULL) {
+		SPDK_ERRLOG("Failed to get an io_channel.\n");
+		spdk_bdev_part_base_free(gpt_base->part_base);
+		return -1;
+	}
+
+	rc = spdk_bdev_read(part_base_desc, gpt_base->ch, gpt_base->gpt.buf, 0,
+			    gpt_base->gpt.buf_size, spdk_gpt_bdev_complete, gpt_base);
+	if (rc < 0) {
+		spdk_put_io_channel(gpt_base->ch);
+		spdk_bdev_part_base_free(gpt_base->part_base);
+		SPDK_ERRLOG("Failed to send bdev_io command\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+vbdev_gpt_init(void)
+{
+	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Gpt");
+
+	if (sp && spdk_conf_section_get_boolval(sp, "Disable", false)) {
+		/* Disable Gpt probe */
+		g_gpt_disabled = true;
+	}
+
+	return 0;
+}
+
+static int
+vbdev_gpt_get_ctx_size(void)
+{
+	return sizeof(struct gpt_io);
+}
+
+static void
+vbdev_gpt_examine(struct spdk_bdev *bdev)
+{
+	int rc;
+
+	/* A bdev with fewer than 2 blocks cannot have a GPT. Block 0 has
+	 * the MBR and block 1 has the GPT header.
+	 */
+	if (g_gpt_disabled || spdk_bdev_get_num_blocks(bdev) < 2) {
+		spdk_bdev_module_examine_done(&gpt_if);
+		return;
+	}
+
+	if (spdk_bdev_get_block_size(bdev) % 512 != 0) {
+		SPDK_ERRLOG("GPT module does not support block size %" PRIu32 " for bdev %s\n",
+			    spdk_bdev_get_block_size(bdev), spdk_bdev_get_name(bdev));
+		spdk_bdev_module_examine_done(&gpt_if);
+		return;
+	}
+
+	rc = vbdev_gpt_read_gpt(bdev);
+	if (rc) {
+		spdk_bdev_module_examine_done(&gpt_if);
+		SPDK_ERRLOG("Failed to read info from bdev %s\n", spdk_bdev_get_name(bdev));
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_gpt", SPDK_LOG_VBDEV_GPT)
diff --git a/src/spdk/lib/bdev/iscsi/Makefile b/src/spdk/lib/bdev/iscsi/Makefile
new file mode 100644
index 00000000..4a38886d
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/Makefile
@@ -0,0 +1,46 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+# CentOS 7 libiscsi package has functions declared inline but not
+# defined in the header file.  Not aware of any way to disable
+# this warning so just make sure the warning isn't treated as
+# an error.
+CFLAGS += -Wno-error
+C_SRCS = bdev_iscsi.c bdev_iscsi_rpc.c
+LIBNAME = bdev_iscsi
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi.c b/src/spdk/lib/bdev/iscsi/bdev_iscsi.c
new file mode 100644
index 00000000..528337f5
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi.c
@@ -0,0 +1,875 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/fd.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/util.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/iscsi_spec.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/bdev_module.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/scsi-lowlevel.h"
+
+#include "bdev_iscsi.h"
+
+struct bdev_iscsi_lun;
+
+#define BDEV_ISCSI_CONNECTION_POLL_US 500 /* 0.5 ms */
+#define BDEV_ISCSI_NO_MASTER_CH_POLL_US 10000 /* 10ms */
+
+#define DEFAULT_INITIATOR_NAME "iqn.2016-06.io.spdk:init"
+
+static int bdev_iscsi_initialize(void);
+static TAILQ_HEAD(, bdev_iscsi_conn_req) g_iscsi_conn_req = TAILQ_HEAD_INITIALIZER(
+			g_iscsi_conn_req);
+static struct spdk_poller *g_conn_poller = NULL;
+
+struct bdev_iscsi_io {
+	struct spdk_thread *submit_td;
+	enum spdk_bdev_io_status status;
+	int scsi_status;
+	enum spdk_scsi_sense sk;
+	uint8_t asc;
+	uint8_t ascq;
+};
+
+struct bdev_iscsi_lun {
+	struct spdk_bdev		bdev;
+	struct iscsi_context		*context;
+	char				*initiator_iqn;
+	char				*url;
+	pthread_mutex_t			mutex;
+	uint32_t			ch_count;
+	struct bdev_iscsi_io_channel	*master_ch;
+	struct spdk_thread		*master_td;
+	struct spdk_poller		*no_master_ch_poller;
+	struct spdk_thread		*no_master_ch_poller_td;
+	bool				unmap_supported;
+};
+
+struct bdev_iscsi_io_channel {
+	struct spdk_poller	*poller;
+	struct bdev_iscsi_lun	*lun;
+};
+
+struct bdev_iscsi_conn_req {
+	char					*url;
+	char					*bdev_name;
+	char					*initiator_iqn;
+	struct iscsi_context			*context;
+	spdk_bdev_iscsi_create_cb		create_cb;
+	spdk_bdev_iscsi_create_cb		create_cb_arg;
+	bool					unmap_supported;
+	TAILQ_ENTRY(bdev_iscsi_conn_req)	link;
+};
+
+static void
+complete_conn_req(struct bdev_iscsi_conn_req *req, struct spdk_bdev *bdev,
+		  int status)
+{
+	TAILQ_REMOVE(&g_iscsi_conn_req, req, link);
+	req->create_cb(req->create_cb_arg, bdev, status);
+	if (status) {
+		/* if the request failed and no iscsi lun was
+		 * created then we could not hand over this
+		 * memory and have to free it manually now.
+		 */
+		iscsi_destroy_context(req->context);
+		free(req->initiator_iqn);
+		free(req->bdev_name);
+		free(req->url);
+	}
+	free(req);
+}
+
+static int
+bdev_iscsi_get_ctx_size(void)
+{
+	return sizeof(struct bdev_iscsi_io);
+}
+
+static void
+_iscsi_free_lun(void *arg)
+{
+	struct bdev_iscsi_lun *lun = arg;
+
+	assert(lun != NULL);
+	iscsi_destroy_context(lun->context);
+	pthread_mutex_destroy(&lun->mutex);
+	free(lun->bdev.name);
+	free(lun->url);
+	free(lun->initiator_iqn);
+
+	spdk_bdev_destruct_done(&lun->bdev, 0);
+	free(lun);
+}
+
+static void
+bdev_iscsi_finish(void)
+{
+	struct bdev_iscsi_conn_req *req;
+
+	while (!TAILQ_EMPTY(&g_iscsi_conn_req)) {
+		req = TAILQ_FIRST(&g_iscsi_conn_req);
+		complete_conn_req(req, NULL, -EINTR);
+	}
+
+	if (g_conn_poller) {
+		spdk_poller_unregister(&g_conn_poller);
+	}
+}
+
+static struct spdk_bdev_module g_iscsi_bdev_module = {
+	.name		= "iscsi",
+	.module_init	= bdev_iscsi_initialize,
+	.module_fini	= bdev_iscsi_finish,
+	.get_ctx_size	= bdev_iscsi_get_ctx_size,
+	.async_init	= true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&g_iscsi_bdev_module);
+
+static void
+_bdev_iscsi_io_complete(void *_iscsi_io)
+{
+	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
+
+	if (iscsi_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+		spdk_bdev_io_complete_scsi_status(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->scsi_status,
+						  iscsi_io->sk, iscsi_io->asc, iscsi_io->ascq);
+	} else {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->status);
+	}
+}
+
+static void
+bdev_iscsi_io_complete(struct bdev_iscsi_io *iscsi_io, enum spdk_bdev_io_status status)
+{
+	iscsi_io->status = status;
+	if (iscsi_io->submit_td != NULL) {
+		spdk_thread_send_msg(iscsi_io->submit_td, _bdev_iscsi_io_complete, iscsi_io);
+	} else {
+		_bdev_iscsi_io_complete(iscsi_io);
+	}
+}
+
+/* Common call back function for read/write/flush command */
+static void
+bdev_iscsi_command_cb(struct iscsi_context *context, int status, void *_task, void *_iscsi_io)
+{
+	struct scsi_task *task = _task;
+	struct bdev_iscsi_io *iscsi_io = _iscsi_io;
+
+	iscsi_io->scsi_status = status;
+	iscsi_io->sk = (uint8_t)task->sense.key;
+	iscsi_io->asc = (task->sense.ascq >> 8) & 0xFF;
+	iscsi_io->ascq = task->sense.ascq & 0xFF;
+
+	scsi_free_scsi_task(task);
+	bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static void
+bdev_iscsi_readv(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+		 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
+{
+	struct scsi_task *task;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI_INIT, "read %d iovs size %lu to lba: %#lx\n",
+		      iovcnt, nbytes, lba);
+
+	task = iscsi_read16_task(lun->context, 0, lba, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
+				 bdev_iscsi_command_cb, iscsi_io);
+	if (task == NULL) {
+		SPDK_ERRLOG("failed to get read16_task\n");
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+	scsi_task_set_iov_in(task, (struct scsi_iovec *)iov, iovcnt);
+#else
+	int i;
+	for (i = 0; i < iovcnt; i++) {
+		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
+	}
+#endif
+}
+
+static void
+bdev_iscsi_writev(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+		  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
+{
+	struct scsi_task *task;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI_INIT, "write %d iovs size %lu to lba: %#lx\n",
+		      iovcnt, nbytes, lba);
+
+	task = iscsi_write16_task(lun->context, 0, lba, NULL, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
+				  bdev_iscsi_command_cb, iscsi_io);
+	if (task == NULL) {
+		SPDK_ERRLOG("failed to get write16_task\n");
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+	scsi_task_set_iov_out(task, (struct scsi_iovec *)iov, iovcnt);
+#else
+	int i;
+	for (i = 0; i < iovcnt; i++) {
+		scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
+	}
+#endif
+}
+
+static void
+bdev_iscsi_destruct_cb(void *ctx)
+{
+	struct bdev_iscsi_lun *lun = ctx;
+
+	spdk_poller_unregister(&lun->no_master_ch_poller);
+	spdk_io_device_unregister(lun, _iscsi_free_lun);
+}
+
+static int
+bdev_iscsi_destruct(void *ctx)
+{
+	struct bdev_iscsi_lun *lun = ctx;
+
+	assert(lun->no_master_ch_poller_td);
+	spdk_thread_send_msg(lun->no_master_ch_poller_td, bdev_iscsi_destruct_cb, lun);
+	return 1;
+}
+
+static void
+bdev_iscsi_flush(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io, uint32_t num_blocks,
+		 int immed, uint64_t lba)
+{
+	struct scsi_task *task;
+
+	task = iscsi_synchronizecache16_task(lun->context, 0, lba,
+					     num_blocks, 0, immed, bdev_iscsi_command_cb, iscsi_io);
+	if (task == NULL) {
+		SPDK_ERRLOG("failed to get sync16_task\n");
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+}
+
+static void
+bdev_iscsi_unmap(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+		 uint64_t lba, uint64_t num_blocks)
+{
+	struct scsi_task *task;
+	struct unmap_list list[1];
+
+	list[0].lba = lba;
+	list[0].num = num_blocks;
+	task = iscsi_unmap_task(lun->context, 0, 0, 0, list, 1,
+				bdev_iscsi_command_cb, iscsi_io);
+	if (task == NULL) {
+		SPDK_ERRLOG("failed to get unmap_task\n");
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+}
+
+static void
+bdev_iscsi_reset_cb(struct iscsi_context *context __attribute__((unused)), int status,
+		    void *command_data, void *private_data)
+{
+	uint32_t tmf_response;
+	struct bdev_iscsi_io *iscsi_io = private_data;
+
+	tmf_response = *(uint32_t *)command_data;
+	if (tmf_response == ISCSI_TASK_FUNC_RESP_COMPLETE) {
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+	} else {
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static void
+_bdev_iscsi_reset(void *_bdev_io)
+{
+	int rc;
+	struct spdk_bdev_io *bdev_io = _bdev_io;
+	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+	struct iscsi_context *context = lun->context;
+
+	rc = iscsi_task_mgmt_lun_reset_async(context, 0,
+					     bdev_iscsi_reset_cb, iscsi_io);
+	if (rc != 0) {
+		SPDK_ERRLOG("failed to do iscsi reset\n");
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+}
+
+static void
+bdev_iscsi_reset(struct spdk_bdev_io *bdev_io)
+{
+	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+	spdk_thread_send_msg(lun->master_td, _bdev_iscsi_reset, bdev_io);
+}
+
+static int
+bdev_iscsi_poll_lun(struct bdev_iscsi_lun *lun)
+{
+	struct pollfd pfd = {};
+
+	pfd.fd = iscsi_get_fd(lun->context);
+	pfd.events = iscsi_which_events(lun->context);
+
+	if (poll(&pfd, 1, 0) < 0) {
+		SPDK_ERRLOG("poll failed\n");
+		return -1;
+	}
+
+	if (pfd.revents != 0) {
+		if (iscsi_service(lun->context, pfd.revents) < 0) {
+			SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(lun->context));
+		}
+	}
+
+	return -1;
+}
+
+static int
+bdev_iscsi_no_master_ch_poll(void *arg)
+{
+	struct bdev_iscsi_lun *lun = arg;
+	int rc = 0;
+
+	if (pthread_mutex_trylock(&lun->mutex)) {
+		/* Don't care about the error code here. */
+		return -1;
+	}
+
+	if (lun->ch_count == 0) {
+		rc = bdev_iscsi_poll_lun(arg);
+	}
+
+	pthread_mutex_unlock(&lun->mutex);
+	return rc;
+}
+
+static int
+bdev_iscsi_poll(void *arg)
+{
+	struct bdev_iscsi_io_channel *ch = arg;
+
+	return bdev_iscsi_poll_lun(ch->lun);
+}
+
+static void bdev_iscsi_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	bdev_iscsi_readv((struct bdev_iscsi_lun *)bdev_io->bdev->ctxt,
+			 (struct bdev_iscsi_io *)bdev_io->driver_ctx,
+			 bdev_io->u.bdev.iovs,
+			 bdev_io->u.bdev.iovcnt,
+			 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+			 bdev_io->u.bdev.offset_blocks);
+}
+
+static void _bdev_iscsi_submit_request(void *_bdev_io)
+{
+	struct spdk_bdev_io *bdev_io = _bdev_io;
+	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_iscsi_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		break;
+
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		bdev_iscsi_writev(lun, iscsi_io,
+				  bdev_io->u.bdev.iovs,
+				  bdev_io->u.bdev.iovcnt,
+				  bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+				  bdev_io->u.bdev.offset_blocks);
+		break;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		bdev_iscsi_flush(lun, iscsi_io,
+				 bdev_io->u.bdev.num_blocks,
+				 ISCSI_IMMEDIATE_DATA_NO,
+				 bdev_io->u.bdev.offset_blocks);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		bdev_iscsi_reset(bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		bdev_iscsi_unmap(lun, iscsi_io,
+				 bdev_io->u.bdev.offset_blocks,
+				 bdev_io->u.bdev.num_blocks);
+		break;
+	default:
+		bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+		break;
+	}
+}
+
+static void bdev_iscsi_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_thread *submit_td = spdk_io_channel_get_thread(_ch);
+	struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+	struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+
+	if (lun->master_td != submit_td) {
+		iscsi_io->submit_td = submit_td;
+		spdk_thread_send_msg(lun->master_td, _bdev_iscsi_submit_request, bdev_io);
+		return;
+	} else {
+		iscsi_io->submit_td = NULL;
+	}
+
+	_bdev_iscsi_submit_request(bdev_io);
+}
+
+static bool
+bdev_iscsi_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct bdev_iscsi_lun *lun = ctx;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		return lun->unmap_supported;
+	default:
+		return false;
+	}
+}
+
+static int
+bdev_iscsi_create_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_iscsi_io_channel *ch = ctx_buf;
+	struct bdev_iscsi_lun *lun = io_device;
+
+	pthread_mutex_lock(&lun->mutex);
+	if (lun->ch_count == 0) {
+		assert(lun->master_ch == NULL);
+		assert(lun->master_td == NULL);
+		lun->master_ch = ch;
+		lun->master_td = spdk_get_thread();
+		ch->poller = spdk_poller_register(bdev_iscsi_poll, ch, 0);
+		ch->lun = lun;
+	}
+	lun->ch_count++;
+	pthread_mutex_unlock(&lun->mutex);
+
+	return 0;
+}
+
+static void
+bdev_iscsi_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_iscsi_io_channel *io_channel = ctx_buf;
+	struct bdev_iscsi_lun *lun = io_device;
+
+	pthread_mutex_lock(&lun->mutex);
+	lun->ch_count--;
+	if (lun->ch_count == 0) {
+		assert(lun->master_ch != NULL);
+		assert(lun->master_td != NULL);
+		assert(lun->master_td == spdk_get_thread());
+
+		lun->master_ch = NULL;
+		lun->master_td = NULL;
+		spdk_poller_unregister(&io_channel->poller);
+	}
+	pthread_mutex_unlock(&lun->mutex);
+}
+
+static struct spdk_io_channel *
+bdev_iscsi_get_io_channel(void *ctx)
+{
+	struct bdev_iscsi_lun *lun = ctx;
+
+	return spdk_get_io_channel(lun);
+}
+
+static int
+bdev_iscsi_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct bdev_iscsi_lun *lun = ctx;
+
+	spdk_json_write_name(w, "iscsi");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "initiator_name");
+	spdk_json_write_string(w, lun->initiator_iqn);
+	spdk_json_write_name(w, "url");
+	spdk_json_write_string(w, lun->url);
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+bdev_iscsi_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct bdev_iscsi_lun *lun = bdev->ctxt;
+
+	pthread_mutex_lock(&lun->mutex);
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_iscsi_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_string(w, "initiator_iqn", lun->initiator_iqn);
+	spdk_json_write_named_string(w, "url", lun->url);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+	pthread_mutex_unlock(&lun->mutex);
+}
+
+static const struct spdk_bdev_fn_table iscsi_fn_table = {
+	.destruct		= bdev_iscsi_destruct,
+	.submit_request		= bdev_iscsi_submit_request,
+	.io_type_supported	= bdev_iscsi_io_type_supported,
+	.get_io_channel		= bdev_iscsi_get_io_channel,
+	.dump_info_json		= bdev_iscsi_dump_info_json,
+	.write_config_json	= bdev_iscsi_write_config_json,
+};
+
+static int
+create_iscsi_lun(struct iscsi_context *context, char *url, char *initiator_iqn, char *name,
+		 uint64_t num_blocks, uint32_t block_size, struct spdk_bdev **bdev, bool unmap_supported)
+{
+	struct bdev_iscsi_lun *lun;
+	int rc;
+
+	lun = calloc(sizeof(*lun), 1);
+	if (!lun) {
+		SPDK_ERRLOG("Unable to allocate enough memory for iscsi backend\n");
+		return -ENOMEM;
+	}
+
+	lun->context = context;
+	lun->url = url;
+	lun->initiator_iqn = initiator_iqn;
+
+	pthread_mutex_init(&lun->mutex, NULL);
+
+	lun->bdev.name = name;
+	lun->bdev.product_name = "iSCSI LUN";
+	lun->bdev.module = &g_iscsi_bdev_module;
+	lun->bdev.blocklen = block_size;
+	lun->bdev.blockcnt = num_blocks;
+	lun->bdev.ctxt = lun;
+	lun->unmap_supported = unmap_supported;
+
+	lun->bdev.fn_table = &iscsi_fn_table;
+
+	spdk_io_device_register(lun, bdev_iscsi_create_cb, bdev_iscsi_destroy_cb,
+				sizeof(struct bdev_iscsi_io_channel),
+				name);
+	rc = spdk_bdev_register(&lun->bdev);
+	if (rc) {
+		spdk_io_device_unregister(lun, NULL);
+		pthread_mutex_destroy(&lun->mutex);
+		free(lun);
+		return rc;
+	}
+
+	lun->no_master_ch_poller_td = spdk_get_thread();
+	lun->no_master_ch_poller = spdk_poller_register(bdev_iscsi_no_master_ch_poll, lun,
+				   BDEV_ISCSI_NO_MASTER_CH_POLL_US);
+
+	*bdev = &lun->bdev;
+	return 0;
+}
+
+static void
+iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
+			void *command_data, void *private_data)
+{
+	struct bdev_iscsi_conn_req *req = private_data;
+	struct scsi_readcapacity16 *readcap16;
+	struct spdk_bdev *bdev = NULL;
+	struct scsi_task *task = command_data;
+
+	if (status != SPDK_SCSI_STATUS_GOOD) {
+		SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(iscsi));
+		goto ret;
+	}
+
+	readcap16 = scsi_datain_unmarshall(task);
+	if (!readcap16) {
+		status = -ENOMEM;
+		goto ret;
+	}
+
+	status = create_iscsi_lun(req->context, req->url, req->initiator_iqn, req->bdev_name,
+				  readcap16->returned_lba + 1, readcap16->block_length, &bdev, req->unmap_supported);
+	if (status) {
+		SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status);
+	}
+
+ret:
+	scsi_free_scsi_task(task);
+	complete_conn_req(req, bdev, status);
+}
+
+static void
+bdev_iscsi_inquiry_cb(struct iscsi_context *context, int status, void *_task, void *private_data)
+{
+	struct scsi_task *task = _task;
+	struct scsi_inquiry_logical_block_provisioning *lbp_inq = NULL;
+	struct bdev_iscsi_conn_req *req = private_data;
+
+	if (status == SPDK_SCSI_STATUS_GOOD) {
+		lbp_inq = scsi_datain_unmarshall(task);
+		if (lbp_inq != NULL && lbp_inq->lbpu) {
+			req->unmap_supported = true;
+		}
+	}
+
+	task = iscsi_readcapacity16_task(context, 0, iscsi_readcapacity16_cb, req);
+	if (task) {
+		return;
+	}
+
+	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
+	complete_conn_req(req, NULL, status);
+}
+
+static void
+iscsi_connect_cb(struct iscsi_context *iscsi, int status,
+		 void *command_data, void *private_data)
+{
+	struct bdev_iscsi_conn_req *req = private_data;
+	struct scsi_task *task;
+
+	if (status != SPDK_SCSI_STATUS_GOOD) {
+		goto ret;
+	}
+
+	task = iscsi_inquiry_task(iscsi, 0, 1,
+				  SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+				  255, bdev_iscsi_inquiry_cb, req);
+	if (task) {
+		return;
+	}
+
+ret:
+	SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
+	complete_conn_req(req, NULL, status);
+}
+
+static int
+iscsi_bdev_conn_poll(void *arg)
+{
+	struct bdev_iscsi_conn_req *req, *tmp;
+	struct pollfd pfd;
+	struct iscsi_context *context;
+
+	TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
+		context = req->context;
+		pfd.fd = iscsi_get_fd(context);
+		pfd.events = iscsi_which_events(context);
+		pfd.revents = 0;
+		if (poll(&pfd, 1, 0) < 0) {
+			SPDK_ERRLOG("poll failed\n");
+			return -1;
+		}
+
+		if (pfd.revents != 0) {
+			if (iscsi_service(context, pfd.revents) < 0) {
+				SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(context));
+			}
+		}
+	}
+
+	return -1;
+}
+
+int
+create_iscsi_disk(const char *bdev_name, const char *url, const char *initiator_iqn,
+		  spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg)
+{
+	struct bdev_iscsi_conn_req *req;
+	struct iscsi_url *iscsi_url = NULL;
+	int rc;
+
+	if (!bdev_name || !url || !initiator_iqn || strlen(initiator_iqn) == 0 || !cb_fn) {
+		return -EINVAL;
+	}
+
+	req = calloc(1, sizeof(struct bdev_iscsi_conn_req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot allocate pointer of struct bdev_iscsi_conn_req\n");
+		return -ENOMEM;
+	}
+
+	req->bdev_name = strdup(bdev_name);
+	req->url = strdup(url);
+	req->initiator_iqn = strdup(initiator_iqn);
+	req->context = iscsi_create_context(initiator_iqn);
+	if (!req->bdev_name || !req->url || !req->initiator_iqn || !req->context) {
+		SPDK_ERRLOG("Out of memory\n");
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	req->create_cb = cb_fn;
+	req->create_cb_arg = cb_arg;
+
+	iscsi_url = iscsi_parse_full_url(req->context, url);
+	if (iscsi_url == NULL) {
+		SPDK_ERRLOG("could not parse URL: %s\n", iscsi_get_error(req->context));
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = iscsi_set_session_type(req->context, ISCSI_SESSION_NORMAL);
+	rc = rc ? rc : iscsi_set_header_digest(req->context, ISCSI_HEADER_DIGEST_NONE);
+	rc = rc ? rc : iscsi_set_targetname(req->context, iscsi_url->target);
+	rc = rc ? rc : iscsi_full_connect_async(req->context, iscsi_url->portal, iscsi_url->lun,
+						iscsi_connect_cb, req);
+	if (rc == 0 && iscsi_url->user[0] != '\0') {
+		rc = iscsi_set_initiator_username_pwd(req->context, iscsi_url->user, iscsi_url->passwd);
+	}
+
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to connect provided URL=%s: %s\n", url, iscsi_get_error(req->context));
+		goto err;
+	}
+
+	iscsi_destroy_url(iscsi_url);
+	TAILQ_INSERT_TAIL(&g_iscsi_conn_req, req, link);
+	if (!g_conn_poller) {
+		g_conn_poller = spdk_poller_register(iscsi_bdev_conn_poll, NULL, BDEV_ISCSI_CONNECTION_POLL_US);
+	}
+
+	return 0;
+
+err:
+	/* iscsi_destroy_url() is not NULL-proof */
+	if (iscsi_url) {
+		iscsi_destroy_url(iscsi_url);
+	}
+
+	if (req->context) {
+		iscsi_destroy_context(req->context);
+	}
+
+	free(req->initiator_iqn);
+	free(req->bdev_name);
+	free(req->url);
+	free(req);
+	return rc;
+}
+
+void
+delete_iscsi_disk(struct spdk_bdev *bdev, spdk_delete_iscsi_complete cb_fn, void *cb_arg)
+{
+	if (!bdev || bdev->module != &g_iscsi_bdev_module) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static void
+bdev_iscsi_initialize_cb(void *cb_arg, struct spdk_bdev *bdev, int status)
+{
+	if (TAILQ_EMPTY(&g_iscsi_conn_req)) {
+		spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+	}
+}
+
+static int
+bdev_iscsi_initialize(void)
+{
+	struct spdk_conf_section *sp;
+
+	const char *url, *bdev_name, *initiator_iqn;
+	int i, rc;
+
+	sp = spdk_conf_find_section(NULL, "iSCSI_Initiator");
+	if (sp == NULL) {
+		spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+		return 0;
+	}
+
+	initiator_iqn = spdk_conf_section_get_val(sp, "initiator_name");
+	if (!initiator_iqn) {
+		initiator_iqn = DEFAULT_INITIATOR_NAME;
+	}
+
+	rc = 0;
+	for (i = 0; (url = spdk_conf_section_get_nmval(sp, "URL", i, 0)) != NULL; i++) {
+		bdev_name = spdk_conf_section_get_nmval(sp, "URL", i, 1);
+		if (bdev_name == NULL) {
+			SPDK_ERRLOG("no bdev name specified for URL %s\n", url);
+			rc = -EINVAL;
+			break;
+		}
+
+		rc = create_iscsi_disk(bdev_name, url, initiator_iqn, bdev_iscsi_initialize_cb, NULL);
+		if (rc) {
+			break;
+		}
+	}
+
+	if (i == 0) {
+		spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+	}
+
+	return rc;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("iscsi_init", SPDK_LOG_ISCSI_INIT)
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi.h b/src/spdk/lib/bdev/iscsi/bdev_iscsi.h
new file mode 100644
index 00000000..b1d22fa8
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi.h
@@ -0,0 +1,75 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_ISCSI_H
+#define SPDK_BDEV_ISCSI_H
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_iscsi_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * SPDK bdev iSCSI callback type.
+ *
+ * \param cb_arg Completion callback custom arguments
+ * \param bdev created bdev
+ * \param status operation status. Zero on success.
+ */
+typedef void (*spdk_bdev_iscsi_create_cb)(void *cb_arg, struct spdk_bdev *bdev, int status);
+
+/**
+ * Create new iSCSI bdev.
+ *
+ * \warning iSCSI URL allow providing login and password. Be careful because
+ * they will show up in configuration dump.
+ *
+ * \param name name for new bdev.
+ * \param initiator_iqn connection iqn name we identify to target as
+ * \param url iSCSI URL string.
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ * \return 0 on success or negative error code. If success bdev with provided name was created.
+ */
+int create_iscsi_disk(const char *bdev_name, const char *initiator_iqn, const char *url,
+		      spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Delete iSCSI bdev.
+ *
+ * \param bdev Pointer to iSCSI bdev.
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void delete_iscsi_disk(struct spdk_bdev *bdev, spdk_delete_iscsi_complete cb_fn, void *cb_arg);
+
+#endif // SPDK_BDEV_ISCSI_H
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c b/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c
new file mode 100644
index 00000000..3682b612
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c
@@ -0,0 +1,173 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_iscsi.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_construct_iscsi_bdev {
+	char *name;
+	char *initiator_iqn;
+	char *url;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_iscsi_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_construct_iscsi_bdev, name), spdk_json_decode_string},
+	{"initiator_iqn", offsetof(struct rpc_construct_iscsi_bdev, initiator_iqn), spdk_json_decode_string},
+	{"url", offsetof(struct rpc_construct_iscsi_bdev, url), spdk_json_decode_string},
+};
+
+static void
+free_rpc_construct_iscsi_bdev(struct rpc_construct_iscsi_bdev *req)
+{
+	free(req->name);
+	free(req->initiator_iqn);
+	free(req->url);
+}
+
+static void
+construct_iscsi_bdev_cb(void *cb_arg, struct spdk_bdev *bdev, int status)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	if (status > 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "iSCSI error (%d).", status);
+	} else if (status < 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(-status));
+	} else {
+		w = spdk_jsonrpc_begin_result(request);
+		if (w == NULL) {
+			return;
+		}
+
+		spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+		spdk_jsonrpc_end_result(request, w);
+	}
+}
+
+static void
+spdk_rpc_construct_iscsi_bdev(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_construct_iscsi_bdev req = {};
+	int rc = 0;
+
+	if (spdk_json_decode_object(params, rpc_construct_iscsi_bdev_decoders,
+				    SPDK_COUNTOF(rpc_construct_iscsi_bdev_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = create_iscsi_disk(req.name, req.url, req.initiator_iqn, construct_iscsi_bdev_cb, request);
+	if (rc) {
+		goto invalid;
+	}
+
+	free_rpc_construct_iscsi_bdev(&req);
+	return;
+
+invalid:
+	free_rpc_construct_iscsi_bdev(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_iscsi_bdev", spdk_rpc_construct_iscsi_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_iscsi {
+	char *name;
+};
+
+static void
+free_rpc_delete_iscsi(struct rpc_delete_iscsi *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_iscsi_decoders[] = {
+	{"name", offsetof(struct rpc_delete_iscsi, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_iscsi_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_iscsi_bdev(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_delete_iscsi req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_iscsi_decoders,
+				    SPDK_COUNTOF(rpc_delete_iscsi_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_iscsi_disk(bdev, _spdk_rpc_delete_iscsi_bdev_cb, request);
+
+	free_rpc_delete_iscsi(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_iscsi(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_iscsi_bdev", spdk_rpc_delete_iscsi_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/lvol/Makefile b/src/spdk/lib/bdev/lvol/Makefile
new file mode 100644
index 00000000..569b14cf
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_lvol.c vbdev_lvol_rpc.c
+LIBNAME = vbdev_lvol
+LOCAL_SYS_LIBS = -luuid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol.c b/src/spdk/lib/bdev/lvol/vbdev_lvol.c
new file mode 100644
index 00000000..74df81e4
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol.c
@@ -0,0 +1,1321 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/blob_bdev.h"
+#include "spdk/rpc.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+#include "spdk/uuid.h"
+
+#include "vbdev_lvol.h"
+
+static TAILQ_HEAD(, lvol_store_bdev) g_spdk_lvol_pairs = TAILQ_HEAD_INITIALIZER(
+			g_spdk_lvol_pairs);
+
+static int vbdev_lvs_init(void);
+static int vbdev_lvs_get_ctx_size(void);
+static void vbdev_lvs_examine(struct spdk_bdev *bdev);
+
+static struct spdk_bdev_module g_lvol_if = {
+	.name = "lvol",
+	.module_init = vbdev_lvs_init,
+	.examine_disk = vbdev_lvs_examine,
+	.get_ctx_size = vbdev_lvs_get_ctx_size,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&g_lvol_if)
+
+struct lvol_store_bdev *
+vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
+{
+	struct spdk_lvol_store *lvs = NULL;
+	struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+	while (lvs_bdev != NULL) {
+		lvs = lvs_bdev->lvs;
+		if (lvs == lvs_orig) {
+			if (lvs_bdev->req != NULL) {
+				/* We do not allow access to lvs that are being destroyed */
+				return NULL;
+			} else {
+				return lvs_bdev;
+			}
+		}
+		lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+	}
+
+	return NULL;
+}
+
+static int
+_vbdev_lvol_change_bdev_alias(struct spdk_lvol *lvol, const char *new_lvol_name)
+{
+	struct spdk_bdev_alias *tmp;
+	char *old_alias;
+	char *alias;
+	int rc;
+	int alias_number = 0;
+
+	/* bdev representing lvols have only one alias,
+	 * while we changed lvs name earlier, we have to iterate alias list to get one,
+	 * and check if there is only one alias */
+
+	TAILQ_FOREACH(tmp, &lvol->bdev->aliases, tailq) {
+		if (++alias_number > 1) {
+			SPDK_ERRLOG("There is more than 1 alias in bdev %s\n", lvol->bdev->name);
+			return -EINVAL;
+		}
+
+		old_alias = tmp->alias;
+	}
+
+	if (alias_number == 0) {
+		SPDK_ERRLOG("There are no aliases in bdev %s\n", lvol->bdev->name);
+		return -EINVAL;
+	}
+
+	alias = spdk_sprintf_alloc("%s/%s", lvol->lvol_store->name, new_lvol_name);
+	if (alias == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for alias\n");
+		return -ENOMEM;
+	}
+
+	rc = spdk_bdev_alias_add(lvol->bdev, alias);
+	if (rc != 0) {
+		SPDK_ERRLOG("cannot add alias '%s'\n", alias);
+		free(alias);
+		return rc;
+	}
+	free(alias);
+
+	rc = spdk_bdev_alias_del(lvol->bdev, old_alias);
+	if (rc != 0) {
+		SPDK_ERRLOG("cannot remove alias '%s'\n", old_alias);
+		return rc;
+	}
+
+	return 0;
+}
+
+static struct lvol_store_bdev *
+vbdev_get_lvs_bdev_by_bdev(struct spdk_bdev *bdev_orig)
+{
+	struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+	while (lvs_bdev != NULL) {
+		if (lvs_bdev->bdev == bdev_orig) {
+			if (lvs_bdev->req != NULL) {
+				/* We do not allow access to lvs that are being destroyed */
+				return NULL;
+			} else {
+				return lvs_bdev;
+			}
+		}
+		lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+	}
+
+	return NULL;
+}
+
+static void
+vbdev_lvs_hotremove_cb(void *ctx)
+{
+	struct spdk_bdev *bdev = ctx;
+	struct lvol_store_bdev *lvs_bdev;
+
+	lvs_bdev = vbdev_get_lvs_bdev_by_bdev(bdev);
+	if (lvs_bdev != NULL) {
+		vbdev_lvs_unload(lvs_bdev->lvs, NULL, NULL);
+	}
+}
+
+static void
+_vbdev_lvs_create_cb(void *cb_arg, struct spdk_lvol_store *lvs, int lvserrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct lvol_store_bdev *lvs_bdev;
+	struct spdk_bdev *bdev = req->base_bdev;
+	struct spdk_bs_dev *bs_dev = req->bs_dev;
+
+	if (lvserrno != 0) {
+		assert(lvs == NULL);
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Cannot create lvol store bdev\n");
+		goto end;
+	}
+
+	lvserrno = spdk_bs_bdev_claim(bs_dev, &g_lvol_if);
+	if (lvserrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store base bdev already claimed by another bdev\n");
+		req->bs_dev->destroy(req->bs_dev);
+		goto end;
+	}
+
+	assert(lvs != NULL);
+
+	lvs_bdev = calloc(1, sizeof(*lvs_bdev));
+	if (!lvs_bdev) {
+		lvserrno = -ENOMEM;
+		goto end;
+	}
+	lvs_bdev->lvs = lvs;
+	lvs_bdev->bdev = bdev;
+	lvs_bdev->req = NULL;
+
+	TAILQ_INSERT_TAIL(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store bdev inserted\n");
+
+end:
+	req->cb_fn(req->cb_arg, lvs, lvserrno);
+	free(req);
+
+	return;
+}
+
+int
+vbdev_lvs_create(struct spdk_bdev *base_bdev, const char *name, uint32_t cluster_sz,
+		 spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_dev *bs_dev;
+	struct spdk_lvs_with_handle_req *lvs_req;
+	struct spdk_lvs_opts opts;
+	int rc;
+	int len;
+
+	if (base_bdev == NULL) {
+		SPDK_ERRLOG("Bdev does not exist\n");
+		return -ENODEV;
+	}
+
+	spdk_lvs_opts_init(&opts);
+	if (cluster_sz != 0) {
+		opts.cluster_sz = cluster_sz;
+	}
+
+	if (name == NULL) {
+		SPDK_ERRLOG("missing name param\n");
+		return -EINVAL;
+	}
+
+	len = strnlen(name, SPDK_LVS_NAME_MAX);
+
+	if (len == 0 || len == SPDK_LVS_NAME_MAX) {
+		SPDK_ERRLOG("name must be between 1 and %d characters\n", SPDK_LVS_NAME_MAX - 1);
+		return -EINVAL;
+	}
+	snprintf(opts.name, sizeof(opts.name), "%s", name);
+
+	lvs_req = calloc(1, sizeof(*lvs_req));
+	if (!lvs_req) {
+		SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+		return -ENOMEM;
+	}
+
+	bs_dev = spdk_bdev_create_bs_dev(base_bdev, vbdev_lvs_hotremove_cb, base_bdev);
+	if (!bs_dev) {
+		SPDK_ERRLOG("Cannot create blobstore device\n");
+		free(lvs_req);
+		return -ENODEV;
+	}
+
+	lvs_req->bs_dev = bs_dev;
+	lvs_req->base_bdev = base_bdev;
+	lvs_req->cb_fn = cb_fn;
+	lvs_req->cb_arg = cb_arg;
+
+	rc = spdk_lvs_init(bs_dev, &opts, _vbdev_lvs_create_cb, lvs_req);
+	if (rc < 0) {
+		free(lvs_req);
+		bs_dev->destroy(bs_dev);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+_vbdev_lvs_rename_cb(void *cb_arg, int lvserrno)
+{
+	struct spdk_lvs_req *req = cb_arg;
+	struct spdk_lvol *tmp;
+
+	if (lvserrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store rename failed\n");
+	} else {
+		TAILQ_FOREACH(tmp, &req->lvol_store->lvols, link) {
+			/* We have to pass current lvol name, since only lvs name changed */
+			_vbdev_lvol_change_bdev_alias(tmp, tmp->name);
+		}
+	}
+
+	req->cb_fn(req->cb_arg, lvserrno);
+	free(req);
+}
+
+void
+vbdev_lvs_rename(struct spdk_lvol_store *lvs, const char *new_lvs_name,
+		 spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+	struct lvol_store_bdev *lvs_bdev;
+
+	struct spdk_lvs_req *req;
+
+	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+	if (!lvs_bdev) {
+		SPDK_ERRLOG("No such lvol store found\n");
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol_store = lvs;
+
+	spdk_lvs_rename(lvs, new_lvs_name, _vbdev_lvs_rename_cb, req);
+}
+
+static void
+_vbdev_lvs_remove_cb(void *cb_arg, int lvserrno)
+{
+	struct lvol_store_bdev *lvs_bdev = cb_arg;
+	struct spdk_lvs_req *req = lvs_bdev->req;
+
+	if (lvserrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Could not remove lvol store bdev\n");
+	} else {
+		TAILQ_REMOVE(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+		free(lvs_bdev);
+	}
+
+	if (req->cb_fn != NULL) {
+		req->cb_fn(req->cb_arg, lvserrno);
+	}
+	free(req);
+}
+
+static void
+_vbdev_lvs_remove_lvol_cb(void *cb_arg, int lvolerrno)
+{
+	struct lvol_store_bdev *lvs_bdev = cb_arg;
+	struct spdk_lvol_store *lvs = lvs_bdev->lvs;
+	struct spdk_lvol *lvol;
+
+	if (lvolerrno != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_LVOL, "Lvol removed with errno %d\n", lvolerrno);
+	}
+
+	if (TAILQ_EMPTY(&lvs->lvols)) {
+		spdk_lvs_destroy(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+		return;
+	}
+
+	lvol = TAILQ_FIRST(&lvs->lvols);
+	while (lvol != NULL) {
+		if (spdk_lvol_deletable(lvol)) {
+			vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
+			return;
+		}
+		lvol = TAILQ_NEXT(lvol, link);
+	}
+
+	/* If no lvol is deletable, that means there is circular dependency. */
+	SPDK_ERRLOG("Lvols left in lvs, but unable to delete.\n");
+	assert(false);
+}
+
+static void
+_vbdev_lvs_remove_bdev_unregistered_cb(void *cb_arg, int bdeverrno)
+{
+	struct lvol_store_bdev *lvs_bdev = cb_arg;
+	struct spdk_lvol_store *lvs = lvs_bdev->lvs;
+	struct spdk_lvol *lvol, *tmp;
+
+	if (bdeverrno != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_LVOL, "Lvol unregistered with errno %d\n", bdeverrno);
+	}
+
+	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+		if (lvol->ref_count != 0) {
+			/* An lvol is still open, don't unload whole lvol store. */
+			return;
+		}
+	}
+	spdk_lvs_unload(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+}
+
+static void
+_vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg,
+		  bool destroy)
+{
+	struct spdk_lvs_req *req;
+	struct lvol_store_bdev *lvs_bdev;
+	struct spdk_lvol *lvol, *tmp;
+	bool all_lvols_closed = true;
+
+	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+	if (!lvs_bdev) {
+		SPDK_ERRLOG("No such lvol store found\n");
+		if (cb_fn != NULL) {
+			cb_fn(cb_arg, -ENODEV);
+		}
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+		if (cb_fn != NULL) {
+			cb_fn(cb_arg, -ENOMEM);
+		}
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	lvs_bdev->req = req;
+
+	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+		if (lvol->ref_count != 0) {
+			all_lvols_closed = false;
+		}
+	}
+
+	if (all_lvols_closed == true) {
+		if (destroy) {
+			spdk_lvs_destroy(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+		} else {
+			spdk_lvs_unload(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+		}
+	} else {
+		lvs->destruct = destroy;
+		if (destroy) {
+			_vbdev_lvs_remove_lvol_cb(lvs_bdev, 0);
+		} else {
+			TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+				spdk_bdev_unregister(lvol->bdev, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
+			}
+		}
+	}
+}
+
+void
+vbdev_lvs_unload(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+	_vbdev_lvs_remove(lvs, cb_fn, cb_arg, false);
+}
+
+void
+vbdev_lvs_destruct(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+	_vbdev_lvs_remove(lvs, cb_fn, cb_arg, true);
+}
+
+struct lvol_store_bdev *
+vbdev_lvol_store_first(void)
+{
+	struct lvol_store_bdev *lvs_bdev;
+
+	lvs_bdev = TAILQ_FIRST(&g_spdk_lvol_pairs);
+	if (lvs_bdev) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Starting lvolstore iteration at %p\n", lvs_bdev->lvs);
+	}
+
+	return lvs_bdev;
+}
+
+struct lvol_store_bdev *
+vbdev_lvol_store_next(struct lvol_store_bdev *prev)
+{
+	struct lvol_store_bdev *lvs_bdev;
+
+	if (prev == NULL) {
+		SPDK_ERRLOG("prev argument cannot be NULL\n");
+		return NULL;
+	}
+
+	lvs_bdev = TAILQ_NEXT(prev, lvol_stores);
+	if (lvs_bdev) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Continuing lvolstore iteration at %p\n", lvs_bdev->lvs);
+	}
+
+	return lvs_bdev;
+}
+
+static struct spdk_lvol_store *
+_vbdev_get_lvol_store_by_uuid(const struct spdk_uuid *uuid)
+{
+	struct spdk_lvol_store *lvs = NULL;
+	struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+	while (lvs_bdev != NULL) {
+		lvs = lvs_bdev->lvs;
+		if (spdk_uuid_compare(&lvs->uuid, uuid) == 0) {
+			return lvs;
+		}
+		lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+	}
+	return NULL;
+}
+
+struct spdk_lvol_store *
+vbdev_get_lvol_store_by_uuid(const char *uuid_str)
+{
+	struct spdk_uuid uuid;
+
+	if (spdk_uuid_parse(&uuid, uuid_str)) {
+		return NULL;
+	}
+
+	return _vbdev_get_lvol_store_by_uuid(&uuid);
+}
+
+struct spdk_lvol_store *
+vbdev_get_lvol_store_by_name(const char *name)
+{
+	struct spdk_lvol_store *lvs = NULL;
+	struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+	while (lvs_bdev != NULL) {
+		lvs = lvs_bdev->lvs;
+		if (strncmp(lvs->name, name, sizeof(lvs->name)) == 0) {
+			return lvs;
+		}
+		lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+	}
+	return NULL;
+}
+
+struct vbdev_lvol_destroy_ctx {
+	struct spdk_lvol *lvol;
+	spdk_lvol_op_complete cb_fn;
+	void *cb_arg;
+};
+
+static void
+_vbdev_lvol_unregister_cb(void *ctx, int lvolerrno)
+{
+	struct spdk_bdev *bdev = ctx;
+
+	spdk_bdev_destruct_done(bdev, lvolerrno);
+	free(bdev);
+}
+
+static int
+vbdev_lvol_unregister(void *ctx)
+{
+	struct spdk_lvol *lvol = ctx;
+
+	assert(lvol != NULL);
+
+	spdk_bdev_alias_del_all(lvol->bdev);
+	spdk_lvol_close(lvol, _vbdev_lvol_unregister_cb, lvol->bdev);
+
+	/* return 1 to indicate we have an operation that must finish asynchronously before the
+	 *  lvol is closed
+	 */
+	return 1;
+}
+
+static void
+_vbdev_lvol_destroy_cb(void *cb_arg, int bdeverrno)
+{
+	struct vbdev_lvol_destroy_ctx *ctx = cb_arg;
+	struct spdk_lvol *lvol = ctx->lvol;
+
+	if (bdeverrno < 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Could not unregister bdev during lvol (%s) destroy\n",
+			     lvol->unique_id);
+		ctx->cb_fn(ctx->cb_arg, bdeverrno);
+		free(ctx);
+		return;
+	}
+
+	spdk_lvol_destroy(lvol, ctx->cb_fn, ctx->cb_arg);
+	free(ctx);
+}
+
+void
+vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct vbdev_lvol_destroy_ctx *ctx;
+
+	assert(lvol != NULL);
+	assert(cb_fn != NULL);
+
+	/* Check if it is possible to delete lvol */
+	if (spdk_lvol_deletable(lvol) == false) {
+		/* throw an error */
+		SPDK_ERRLOG("Cannot delete lvol\n");
+		cb_fn(cb_arg, -EPERM);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->lvol = lvol;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	spdk_bdev_unregister(lvol->bdev, _vbdev_lvol_destroy_cb, ctx);
+}
+
+static char *
+vbdev_lvol_find_name(struct spdk_lvol *lvol, spdk_blob_id blob_id)
+{
+	struct spdk_lvol_store *lvs;
+	struct spdk_lvol *_lvol;
+
+	assert(lvol != NULL);
+
+	lvs = lvol->lvol_store;
+
+	assert(lvs);
+
+	TAILQ_FOREACH(_lvol, &lvs->lvols, link) {
+		if (_lvol->blob_id == blob_id) {
+			return _lvol->name;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+vbdev_lvol_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct spdk_lvol *lvol = ctx;
+	struct lvol_store_bdev *lvs_bdev;
+	struct spdk_bdev *bdev;
+	struct spdk_blob *blob;
+	char lvol_store_uuid[SPDK_UUID_STRING_LEN];
+	spdk_blob_id *ids = NULL;
+	size_t count, i;
+	char *name;
+	int rc = 0;
+
+	spdk_json_write_name(w, "lvol");
+	spdk_json_write_object_begin(w);
+
+	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
+	if (!lvs_bdev) {
+		SPDK_ERRLOG("No such lvol store found\n");
+		rc = -ENODEV;
+		goto end;
+	}
+
+	bdev = lvs_bdev->bdev;
+
+	spdk_uuid_fmt_lower(lvol_store_uuid, sizeof(lvol_store_uuid), &lvol->lvol_store->uuid);
+	spdk_json_write_name(w, "lvol_store_uuid");
+	spdk_json_write_string(w, lvol_store_uuid);
+
+	spdk_json_write_name(w, "base_bdev");
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+
+	blob = lvol->blob;
+
+	spdk_json_write_name(w, "thin_provision");
+	spdk_json_write_bool(w, spdk_blob_is_thin_provisioned(blob));
+
+	spdk_json_write_name(w, "snapshot");
+	spdk_json_write_bool(w, spdk_blob_is_snapshot(blob));
+
+	spdk_json_write_name(w, "clone");
+	spdk_json_write_bool(w, spdk_blob_is_clone(blob));
+
+	if (spdk_blob_is_clone(blob)) {
+		spdk_blob_id snapshotid = spdk_blob_get_parent_snapshot(lvol->lvol_store->blobstore, lvol->blob_id);
+		if (snapshotid != SPDK_BLOBID_INVALID) {
+			name = vbdev_lvol_find_name(lvol, snapshotid);
+			if (name != NULL) {
+				spdk_json_write_name(w, "base_snapshot");
+				spdk_json_write_string(w, name);
+			} else {
+				SPDK_ERRLOG("Cannot obtain snapshots name\n");
+			}
+		}
+	}
+
+	if (spdk_blob_is_snapshot(blob)) {
+		/* Take a number of clones */
+		rc = spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count);
+		if (rc == -ENOMEM && count > 0) {
+			ids = malloc(sizeof(spdk_blob_id) * count);
+			if (ids == NULL) {
+				SPDK_ERRLOG("Cannot allocate memory\n");
+				rc = -ENOMEM;
+				goto end;
+			}
+
+			rc = spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, ids, &count);
+			if (rc == 0) {
+				spdk_json_write_name(w, "clones");
+				spdk_json_write_array_begin(w);
+				for (i = 0; i < count; i++) {
+					name = vbdev_lvol_find_name(lvol, ids[i]);
+					if (name != NULL) {
+						spdk_json_write_string(w, name);
+					} else {
+						SPDK_ERRLOG("Cannot obtain clone name\n");
+					}
+
+				}
+				spdk_json_write_array_end(w);
+			}
+			free(ids);
+		}
+
+	}
+
+end:
+	spdk_json_write_object_end(w);
+
+	return rc;
+}
+
+static void
+vbdev_lvol_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* Nothing to dump as lvol configuration is saved on physical device. */
+}
+
+static struct spdk_io_channel *
+vbdev_lvol_get_io_channel(void *ctx)
+{
+	struct spdk_lvol *lvol = ctx;
+
+	return spdk_lvol_get_io_channel(lvol);
+}
+
+static bool
+vbdev_lvol_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct spdk_lvol *lvol = ctx;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return !spdk_blob_is_read_only(lvol->blob);
+	case SPDK_BDEV_IO_TYPE_RESET:
+	case SPDK_BDEV_IO_TYPE_READ:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void
+lvol_op_comp(void *cb_arg, int bserrno)
+{
+	struct lvol_task *task = cb_arg;
+	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task);
+
+	if (bserrno != 0) {
+		if (bserrno == -ENOMEM) {
+			task->status = SPDK_BDEV_IO_STATUS_NOMEM;
+		} else {
+			task->status = SPDK_BDEV_IO_STATUS_FAILED;
+		}
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Vbdev processing callback on device %s with type %d\n",
+		     bdev_io->bdev->name, bdev_io->type);
+	spdk_bdev_io_complete(bdev_io, task->status);
+}
+
+static void
+lvol_unmap(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	uint64_t start_page, num_pages;
+	struct spdk_blob *blob = lvol->blob;
+	struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+	start_page = bdev_io->u.bdev.offset_blocks;
+	num_pages = bdev_io->u.bdev.num_blocks;
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+		     "Vbdev doing unmap at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+		     num_pages, bdev_io->bdev->name);
+	spdk_blob_io_unmap(blob, ch, start_page, num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_write_zeroes(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	uint64_t start_page, num_pages;
+	struct spdk_blob *blob = lvol->blob;
+	struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+	start_page = bdev_io->u.bdev.offset_blocks;
+	num_pages = bdev_io->u.bdev.num_blocks;
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+		     "Vbdev doing write zeros at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+		     num_pages, bdev_io->bdev->name);
+	spdk_blob_io_write_zeroes(blob, ch, start_page, num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_read(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	uint64_t start_page, num_pages;
+	struct spdk_lvol *lvol = bdev_io->bdev->ctxt;
+	struct spdk_blob *blob = lvol->blob;
+	struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+	start_page = bdev_io->u.bdev.offset_blocks;
+	num_pages = bdev_io->u.bdev.num_blocks;
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+		     "Vbdev doing read at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+		     num_pages, bdev_io->bdev->name);
+	spdk_blob_io_readv(blob, ch, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, start_page,
+			   num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_write(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	uint64_t start_page, num_pages;
+	struct spdk_blob *blob = lvol->blob;
+	struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+	start_page = bdev_io->u.bdev.offset_blocks;
+	num_pages = bdev_io->u.bdev.num_blocks;
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+		     "Vbdev doing write at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+		     num_pages, bdev_io->bdev->name);
+	spdk_blob_io_writev(blob, ch, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, start_page,
+			    num_pages, lvol_op_comp, task);
+}
+
+static int
+lvol_reset(struct spdk_bdev_io *bdev_io)
+{
+	spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+
+	return 0;
+}
+
+static void
+vbdev_lvol_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_lvol *lvol = bdev_io->bdev->ctxt;
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Vbdev request type %d submitted\n", bdev_io->type);
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, lvol_read,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		lvol_write(lvol, ch, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		lvol_reset(bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		lvol_unmap(lvol, ch, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		lvol_write_zeroes(lvol, ch, bdev_io);
+		break;
+	default:
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "lvol: unsupported I/O type %d\n", bdev_io->type);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+	return;
+}
+
+static struct spdk_bdev_fn_table vbdev_lvol_fn_table = {
+	.destruct		= vbdev_lvol_unregister,
+	.io_type_supported	= vbdev_lvol_io_type_supported,
+	.submit_request		= vbdev_lvol_submit_request,
+	.get_io_channel		= vbdev_lvol_get_io_channel,
+	.dump_info_json		= vbdev_lvol_dump_info_json,
+	.write_config_json	= vbdev_lvol_write_config_json,
+};
+
+static void
+_spdk_lvol_destroy_cb(void *cb_arg, int bdeverrno)
+{
+}
+
+static void
+_create_lvol_disk_destroy_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_lvol *lvol = cb_arg;
+
+	if (bdeverrno < 0) {
+		SPDK_ERRLOG("Could not unregister bdev for lvol %s\n",
+			    lvol->unique_id);
+		return;
+	}
+
+	spdk_lvol_destroy(lvol, _spdk_lvol_destroy_cb, NULL);
+}
+
+static void
+_create_lvol_disk_unload_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_lvol *lvol = cb_arg;
+
+	if (bdeverrno < 0) {
+		SPDK_ERRLOG("Could not unregister bdev for lvol %s\n",
+			    lvol->unique_id);
+		return;
+	}
+
+	TAILQ_REMOVE(&lvol->lvol_store->lvols, lvol, link);
+	free(lvol->unique_id);
+	free(lvol);
+}
+
+static int
+_create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
+{
+	struct spdk_bdev *bdev;
+	struct lvol_store_bdev *lvs_bdev;
+	uint64_t total_size;
+	unsigned char *alias;
+	int rc;
+
+	if (!lvol->unique_id) {
+		return -EINVAL;
+	}
+
+	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
+	if (lvs_bdev == NULL) {
+		SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
+		return -ENODEV;
+	}
+
+	bdev = calloc(1, sizeof(struct spdk_bdev));
+	if (!bdev) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol bdev\n");
+		return -ENOMEM;
+	}
+
+	bdev->name = lvol->unique_id;
+	bdev->product_name = "Logical Volume";
+	bdev->blocklen = spdk_bs_get_io_unit_size(lvol->lvol_store->blobstore);
+	total_size = spdk_blob_get_num_clusters(lvol->blob) *
+		     spdk_bs_get_cluster_size(lvol->lvol_store->blobstore);
+	assert((total_size % bdev->blocklen) == 0);
+	bdev->blockcnt = total_size / bdev->blocklen;
+	bdev->uuid = lvol->uuid;
+	bdev->need_aligned_buffer = lvs_bdev->bdev->need_aligned_buffer;
+	bdev->split_on_optimal_io_boundary = true;
+	bdev->optimal_io_boundary = spdk_bs_get_cluster_size(lvol->lvol_store->blobstore) / bdev->blocklen;
+
+	bdev->ctxt = lvol;
+	bdev->fn_table = &vbdev_lvol_fn_table;
+	bdev->module = &g_lvol_if;
+
+	rc = spdk_vbdev_register(bdev, &lvs_bdev->bdev, 1);
+	if (rc) {
+		free(bdev);
+		return rc;
+	}
+	lvol->bdev = bdev;
+
+	alias = spdk_sprintf_alloc("%s/%s", lvs_bdev->lvs->name, lvol->name);
+	if (alias == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for alias\n");
+		spdk_bdev_unregister(lvol->bdev, (destroy ? _create_lvol_disk_destroy_cb :
+						  _create_lvol_disk_unload_cb), lvol);
+		return -ENOMEM;
+	}
+
+	rc = spdk_bdev_alias_add(bdev, alias);
+	if (rc != 0) {
+		SPDK_ERRLOG("Cannot add alias to lvol bdev\n");
+		spdk_bdev_unregister(lvol->bdev, (destroy ? _create_lvol_disk_destroy_cb :
+						  _create_lvol_disk_unload_cb), lvol);
+	}
+	free(alias);
+
+	return rc;
+}
+
+static void
+_vbdev_lvol_create_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+	struct spdk_lvol_with_handle_req *req = cb_arg;
+
+	if (lvolerrno < 0) {
+		goto end;
+	}
+
+	lvolerrno = _create_lvol_disk(lvol, true);
+
+end:
+	req->cb_fn(req->cb_arg, lvol, lvolerrno);
+	free(req);
+}
+
+int
+vbdev_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+		  bool thin_provision, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_with_handle_req *req;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	rc = spdk_lvol_create(lvs, name, sz, thin_provision, _vbdev_lvol_create_cb, req);
+	if (rc != 0) {
+		free(req);
+	}
+
+	return rc;
+}
+
+void
+vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name,
+			   spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_with_handle_req *req;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_lvol_create_snapshot(lvol, snapshot_name, _vbdev_lvol_create_cb, req);
+}
+
+void
+vbdev_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
+			spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_with_handle_req *req;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_lvol_create_clone(lvol, clone_name, _vbdev_lvol_create_cb, req);
+}
+
+static void
+_vbdev_lvol_rename_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+
+	if (lvolerrno != 0) {
+		SPDK_ERRLOG("Renaming lvol failed\n");
+	}
+
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+void
+vbdev_lvol_rename(struct spdk_lvol *lvol, const char *new_lvol_name,
+		  spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+	int rc;
+
+	rc = _vbdev_lvol_change_bdev_alias(lvol, new_lvol_name);
+	if (rc != 0) {
+		SPDK_ERRLOG("renaming lvol to '%s' does not succeed\n", new_lvol_name);
+		cb_fn(cb_arg, rc);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_lvol_rename(lvol, new_lvol_name, _vbdev_lvol_rename_cb, req);
+}
+
+static void
+_vbdev_lvol_resize_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+	uint64_t total_size;
+
+	/* change bdev size */
+	if (lvolerrno != 0) {
+		SPDK_ERRLOG("CB function for bdev lvol %s receive error no: %d.\n", lvol->name, lvolerrno);
+		goto finish;
+	}
+
+	total_size = spdk_blob_get_num_clusters(lvol->blob) *
+		     spdk_bs_get_cluster_size(lvol->lvol_store->blobstore);
+	assert((total_size % lvol->bdev->blocklen) == 0);
+
+	lvolerrno = spdk_bdev_notify_blockcnt_change(lvol->bdev, total_size / lvol->bdev->blocklen);
+	if (lvolerrno != 0) {
+		SPDK_ERRLOG("Could not change num blocks for bdev lvol %s with error no: %d.\n",
+			    lvol->name, lvolerrno);
+	}
+
+finish:
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+void
+vbdev_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		cb_fn(cb_arg, -EINVAL);
+		return;
+	}
+
+	assert(lvol->bdev != NULL);
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->sz = sz;
+	req->lvol = lvol;
+
+	spdk_lvol_resize(req->lvol, req->sz, _vbdev_lvol_resize_cb, req);
+}
+
+static int
+vbdev_lvs_init(void)
+{
+	return 0;
+}
+
+static int
+vbdev_lvs_get_ctx_size(void)
+{
+	return sizeof(struct lvol_task);
+}
+
+static void
+_vbdev_lvs_examine_failed(void *cb_arg, int lvserrno)
+{
+	spdk_bdev_module_examine_done(&g_lvol_if);
+}
+
+static void
+_vbdev_lvol_examine_close_cb(struct spdk_lvol_store *lvs)
+{
+	if (lvs->lvols_opened >= lvs->lvol_count) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvols finished\n");
+		spdk_bdev_module_examine_done(&g_lvol_if);
+	}
+}
+
+static void
+_vbdev_lvs_examine_finish(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+	struct spdk_lvol_store *lvs = cb_arg;
+
+	if (lvolerrno != 0) {
+		SPDK_ERRLOG("Error opening lvol %s\n", lvol->unique_id);
+		TAILQ_REMOVE(&lvs->lvols, lvol, link);
+		lvs->lvol_count--;
+		free(lvol->unique_id);
+		free(lvol);
+		goto end;
+	}
+
+	if (_create_lvol_disk(lvol, false)) {
+		SPDK_ERRLOG("Cannot create bdev for lvol %s\n", lvol->unique_id);
+		lvs->lvol_count--;
+		_vbdev_lvol_examine_close_cb(lvs);
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvol %s failed\n", lvol->unique_id);
+		return;
+	}
+
+	lvs->lvols_opened++;
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvol %s succeeded\n", lvol->unique_id);
+
+end:
+
+	if (lvs->lvols_opened >= lvs->lvol_count) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvols finished\n");
+		spdk_bdev_module_examine_done(&g_lvol_if);
+	}
+}
+
+static void
+_vbdev_lvs_examine_cb(void *arg, struct spdk_lvol_store *lvol_store, int lvserrno)
+{
+	struct lvol_store_bdev *lvs_bdev;
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)arg;
+	struct spdk_lvol *lvol, *tmp;
+
+	if (lvserrno == -EEXIST) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+			     "Name for lvolstore on device %s conflicts with name for already loaded lvs\n",
+			     req->base_bdev->name);
+		/* On error blobstore destroys bs_dev itself */
+		spdk_bdev_module_examine_done(&g_lvol_if);
+		goto end;
+	} else if (lvserrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store not found on %s\n", req->base_bdev->name);
+		/* On error blobstore destroys bs_dev itself */
+		spdk_bdev_module_examine_done(&g_lvol_if);
+		goto end;
+	}
+
+	lvserrno = spdk_bs_bdev_claim(lvol_store->bs_dev, &g_lvol_if);
+	if (lvserrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store base bdev already claimed by another bdev\n");
+		spdk_lvs_unload(lvol_store, _vbdev_lvs_examine_failed, NULL);
+		goto end;
+	}
+
+	lvs_bdev = calloc(1, sizeof(*lvs_bdev));
+	if (!lvs_bdev) {
+		SPDK_ERRLOG("Cannot alloc memory for lvs_bdev\n");
+		spdk_lvs_unload(lvol_store, _vbdev_lvs_examine_failed, NULL);
+		goto end;
+	}
+
+	lvs_bdev->lvs = lvol_store;
+	lvs_bdev->bdev = req->base_bdev;
+
+	TAILQ_INSERT_TAIL(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+
+	SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store found on %s - begin parsing\n",
+		     req->base_bdev->name);
+
+	lvol_store->lvols_opened = 0;
+
+	if (TAILQ_EMPTY(&lvol_store->lvols)) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store examination done\n");
+		spdk_bdev_module_examine_done(&g_lvol_if);
+	} else {
+		/* Open all lvols */
+		TAILQ_FOREACH_SAFE(lvol, &lvol_store->lvols, link, tmp) {
+			spdk_lvol_open(lvol, _vbdev_lvs_examine_finish, lvol_store);
+		}
+	}
+
+end:
+	free(req);
+}
+
+static void
+vbdev_lvs_examine(struct spdk_bdev *bdev)
+{
+	struct spdk_bs_dev *bs_dev;
+	struct spdk_lvs_with_handle_req *req;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		spdk_bdev_module_examine_done(&g_lvol_if);
+		SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+		return;
+	}
+
+	bs_dev = spdk_bdev_create_bs_dev(bdev, vbdev_lvs_hotremove_cb, bdev);
+	if (!bs_dev) {
+		SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Cannot create bs dev on %s\n", bdev->name);
+		spdk_bdev_module_examine_done(&g_lvol_if);
+		free(req);
+		return;
+	}
+
+	req->base_bdev = bdev;
+
+	spdk_lvs_load(bs_dev, _vbdev_lvs_examine_cb, req);
+}
+
+struct spdk_lvol *
+vbdev_lvol_get_from_bdev(struct spdk_bdev *bdev)
+{
+	if (!bdev || bdev->module != &g_lvol_if) {
+		return NULL;
+	}
+
+	if (bdev->ctxt == NULL) {
+		SPDK_ERRLOG("No lvol ctx assigned to bdev %s\n", bdev->name);
+		return NULL;
+	}
+
+	return (struct spdk_lvol *)bdev->ctxt;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_lvol", SPDK_LOG_VBDEV_LVOL);
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol.h b/src/spdk/lib/bdev/lvol/vbdev_lvol.h
new file mode 100644
index 00000000..93991d08
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol.h
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_LVOL_H
+#define SPDK_VBDEV_LVOL_H
+
+#include "spdk/lvol.h"
+#include "spdk/bdev_module.h"
+
+#include "spdk_internal/lvolstore.h"
+
+struct lvol_store_bdev {
+	struct spdk_lvol_store	*lvs;
+	struct spdk_bdev	*bdev;
+	struct spdk_lvs_req	*req;
+
+	TAILQ_ENTRY(lvol_store_bdev)	lvol_stores;
+};
+
+int vbdev_lvs_create(struct spdk_bdev *base_bdev, const char *name, uint32_t cluster_sz,
+		     spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg);
+void vbdev_lvs_destruct(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg);
+void vbdev_lvs_unload(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+int vbdev_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+		      bool thin_provisioned, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+void vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name,
+				spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+void vbdev_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
+			     spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Change size of lvol
+ * \param lvol Handle to lvol
+ * \param sz Size of lvol to change
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ * \return error
+ */
+void vbdev_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn,
+		       void *cb_arg);
+
+void vbdev_lvol_rename(struct spdk_lvol *lvol, const char *new_lvol_name,
+		       spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Destroy a logical volume
+ * \param lvol Handle to lvol
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Renames given lvolstore.
+ *
+ * \param lvs Pointer to lvolstore
+ * \param new_name New name of lvs
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void vbdev_lvs_rename(struct spdk_lvol_store *lvs, const char *new_lvs_name,
+		      spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Search for handle lvolstore
+ * \param uuid_str UUID of lvolstore
+ * \return Handle to spdk_lvol_store or NULL if not found.
+ */
+struct spdk_lvol_store *vbdev_get_lvol_store_by_uuid(const char *uuid_str);
+
+/**
+ * \brief Search for handle to lvolstore
+ * \param name name of lvolstore
+ * \return Handle to spdk_lvol_store or NULL if not found.
+ */
+struct spdk_lvol_store *vbdev_get_lvol_store_by_name(const char *name);
+
+/**
+ * \brief Search for handle to lvol_store_bdev
+ * \param lvs handle to lvolstore
+ * \return Handle to lvol_store_bdev or NULL if not found.
+ */
+struct lvol_store_bdev *vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs);
+
+struct spdk_lvol *vbdev_lvol_get_from_bdev(struct spdk_bdev *bdev);
+
+#endif /* SPDK_VBDEV_LVOL_H */
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c b/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c
new file mode 100644
index 00000000..30f67f35
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c
@@ -0,0 +1,1089 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/bdev.h"
+#include "spdk/util.h"
+#include "vbdev_lvol.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+SPDK_LOG_REGISTER_COMPONENT("lvolrpc", SPDK_LOG_LVOL_RPC)
+
+struct rpc_construct_lvol_store {
+	char *lvs_name;
+	char *bdev_name;
+	uint32_t cluster_sz;
+};
+
+static int
+vbdev_get_lvol_store_by_uuid_xor_name(const char *uuid, const char *lvs_name,
+				      struct spdk_lvol_store **lvs)
+{
+	if ((uuid == NULL && lvs_name == NULL)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "lvs UUID nor lvs name specified\n");
+		return -EINVAL;
+	} else if ((uuid && lvs_name)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "both lvs UUID '%s' and lvs name '%s' specified\n", uuid,
+			     lvs_name);
+		return -EINVAL;
+	} else if (uuid) {
+		*lvs = vbdev_get_lvol_store_by_uuid(uuid);
+
+		if (*lvs == NULL) {
+			SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "blobstore with UUID '%s' not found\n", uuid);
+			return -ENODEV;
+		}
+	} else if (lvs_name) {
+
+		*lvs = vbdev_get_lvol_store_by_name(lvs_name);
+
+		if (*lvs == NULL) {
+			SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "blobstore with name '%s' not found\n", lvs_name);
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+static void
+free_rpc_construct_lvol_store(struct rpc_construct_lvol_store *req)
+{
+	free(req->bdev_name);
+	free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_lvol_store_decoders[] = {
+	{"bdev_name", offsetof(struct rpc_construct_lvol_store, bdev_name), spdk_json_decode_string},
+	{"cluster_sz", offsetof(struct rpc_construct_lvol_store, cluster_sz), spdk_json_decode_uint32, true},
+	{"lvs_name", offsetof(struct rpc_construct_lvol_store, lvs_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_lvol_store_construct_cb(void *cb_arg, struct spdk_lvol_store *lvol_store, int lvserrno)
+{
+	struct spdk_json_write_ctx *w;
+	char lvol_store_uuid[SPDK_UUID_STRING_LEN];
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvserrno != 0) {
+		goto invalid;
+	}
+
+	spdk_uuid_fmt_lower(lvol_store_uuid, sizeof(lvol_store_uuid), &lvol_store->uuid);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, lvol_store_uuid);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_construct_lvol_store(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_construct_lvol_store req = {};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_lvol_store_decoders,
+				    SPDK_COUNTOF(rpc_construct_lvol_store_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (req.bdev_name == NULL) {
+		SPDK_ERRLOG("missing bdev_name param\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (req.lvs_name == NULL) {
+		SPDK_ERRLOG("missing lvs_name param\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+	bdev = spdk_bdev_get_by_name(req.bdev_name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.bdev_name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = vbdev_lvs_create(bdev, req.lvs_name, req.cluster_sz, _spdk_rpc_lvol_store_construct_cb,
+			      request);
+	if (rc < 0) {
+		goto invalid;
+	}
+	free_rpc_construct_lvol_store(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_construct_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("construct_lvol_store", spdk_rpc_construct_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_rename_lvol_store {
+	char *old_name;
+	char *new_name;
+};
+
+static void
+free_rpc_rename_lvol_store(struct rpc_rename_lvol_store *req)
+{
+	free(req->old_name);
+	free(req->new_name);
+}
+
+static const struct spdk_json_object_decoder rpc_rename_lvol_store_decoders[] = {
+	{"old_name", offsetof(struct rpc_rename_lvol_store, old_name), spdk_json_decode_string},
+	{"new_name", offsetof(struct rpc_rename_lvol_store, new_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_rename_lvol_store_cb(void *cb_arg, int lvserrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvserrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_rename_lvol_store(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_rename_lvol_store req = {};
+	struct spdk_lvol_store *lvs;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_rename_lvol_store_decoders,
+				    SPDK_COUNTOF(rpc_rename_lvol_store_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	lvs = vbdev_get_lvol_store_by_name(req.old_name);
+	if (lvs == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "no lvs existing for given name\n");
+		rc = -ENOENT;
+		goto invalid;
+	}
+
+	vbdev_lvs_rename(lvs, req.new_name, _spdk_rpc_rename_lvol_store_cb, request);
+
+	free_rpc_rename_lvol_store(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_rename_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("rename_lvol_store", spdk_rpc_rename_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_destroy_lvol_store {
+	char *uuid;
+	char *lvs_name;
+};
+
+static void
+free_rpc_destroy_lvol_store(struct rpc_destroy_lvol_store *req)
+{
+	free(req->uuid);
+	free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_destroy_lvol_store_decoders[] = {
+	{"uuid", offsetof(struct rpc_destroy_lvol_store, uuid), spdk_json_decode_string, true},
+	{"lvs_name", offsetof(struct rpc_destroy_lvol_store, lvs_name), spdk_json_decode_string, true},
+};
+
+static void
+_spdk_rpc_lvol_store_destroy_cb(void *cb_arg, int lvserrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvserrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_destroy_lvol_store(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_destroy_lvol_store req = {};
+	struct spdk_lvol_store *lvs = NULL;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_destroy_lvol_store_decoders,
+				    SPDK_COUNTOF(rpc_destroy_lvol_store_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	vbdev_lvs_destruct(lvs, _spdk_rpc_lvol_store_destroy_cb, request);
+
+	free_rpc_destroy_lvol_store(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_destroy_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("destroy_lvol_store", spdk_rpc_destroy_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_lvol_bdev {
+	char *uuid;
+	char *lvs_name;
+	char *lvol_name;
+	uint64_t size;
+	bool thin_provision;
+};
+
+static void
+free_rpc_construct_lvol_bdev(struct rpc_construct_lvol_bdev *req)
+{
+	free(req->uuid);
+	free(req->lvs_name);
+	free(req->lvol_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_lvol_bdev_decoders[] = {
+	{"uuid", offsetof(struct rpc_construct_lvol_bdev, uuid), spdk_json_decode_string, true},
+	{"lvs_name", offsetof(struct rpc_construct_lvol_bdev, lvs_name), spdk_json_decode_string, true},
+	{"lvol_name", offsetof(struct rpc_construct_lvol_bdev, lvol_name), spdk_json_decode_string, true},
+	{"size", offsetof(struct rpc_construct_lvol_bdev, size), spdk_json_decode_uint64},
+	{"thin_provision", offsetof(struct rpc_construct_lvol_bdev, thin_provision), spdk_json_decode_bool, true},
+};
+
+static void
+_spdk_rpc_construct_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, lvol->unique_id);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_construct_lvol_bdev(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_construct_lvol_bdev req = {};
+	int rc;
+	struct spdk_lvol_store *lvs = NULL;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Creating blob\n");
+
+	if (spdk_json_decode_object(params, rpc_construct_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_construct_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	if (req.lvol_name == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "no bdev name\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = vbdev_lvol_create(lvs, req.lvol_name, req.size, req.thin_provision,
+			       _spdk_rpc_construct_lvol_bdev_cb, request);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	free_rpc_construct_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_construct_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("construct_lvol_bdev", spdk_rpc_construct_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_snapshot_lvol_bdev {
+	char *lvol_name;
+	char *snapshot_name;
+};
+
+static void
+free_rpc_snapshot_lvol_bdev(struct rpc_snapshot_lvol_bdev *req)
+{
+	free(req->lvol_name);
+	free(req->snapshot_name);
+}
+
+static const struct spdk_json_object_decoder rpc_snapshot_lvol_bdev_decoders[] = {
+	{"lvol_name", offsetof(struct rpc_snapshot_lvol_bdev, lvol_name), spdk_json_decode_string},
+	{"snapshot_name", offsetof(struct rpc_snapshot_lvol_bdev, snapshot_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_snapshot_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, lvol->unique_id);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_snapshot_lvol_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_snapshot_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Snapshotting blob\n");
+
+	if (spdk_json_decode_object(params, rpc_snapshot_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_snapshot_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.lvol_name);
+	if (bdev == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "bdev '%s' does not exist\n", req.lvol_name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	vbdev_lvol_create_snapshot(lvol, req.snapshot_name, _spdk_rpc_snapshot_lvol_bdev_cb, request);
+
+	free_rpc_snapshot_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_snapshot_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("snapshot_lvol_bdev", spdk_rpc_snapshot_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_clone_lvol_bdev {
+	char *snapshot_name;
+	char *clone_name;
+};
+
+static void
+free_rpc_clone_lvol_bdev(struct rpc_clone_lvol_bdev *req)
+{
+	free(req->snapshot_name);
+	free(req->clone_name);
+}
+
+static const struct spdk_json_object_decoder rpc_clone_lvol_bdev_decoders[] = {
+	{"snapshot_name", offsetof(struct rpc_clone_lvol_bdev, snapshot_name), spdk_json_decode_string},
+	{"clone_name", offsetof(struct rpc_clone_lvol_bdev, clone_name), spdk_json_decode_string, true},
+};
+
+static void
+_spdk_rpc_clone_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, lvol->unique_id);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_clone_lvol_bdev(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct rpc_clone_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Cloning blob\n");
+
+	if (spdk_json_decode_object(params, rpc_clone_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_clone_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.snapshot_name);
+	if (bdev == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "bdev '%s' does not exist\n", req.snapshot_name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	vbdev_lvol_create_clone(lvol, req.clone_name, _spdk_rpc_clone_lvol_bdev_cb, request);
+
+	free_rpc_clone_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_clone_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("clone_lvol_bdev", spdk_rpc_clone_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_rename_lvol_bdev {
+	char *old_name;
+	char *new_name;
+};
+
+static void
+free_rpc_rename_lvol_bdev(struct rpc_rename_lvol_bdev *req)
+{
+	free(req->old_name);
+	free(req->new_name);
+}
+
+static const struct spdk_json_object_decoder rpc_rename_lvol_bdev_decoders[] = {
+	{"old_name", offsetof(struct rpc_rename_lvol_bdev, old_name), spdk_json_decode_string},
+	{"new_name", offsetof(struct rpc_rename_lvol_bdev, new_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_rename_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_rename_lvol_bdev(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_rename_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc = 0;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Renaming lvol\n");
+
+	if (spdk_json_decode_object(params, rpc_rename_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_rename_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.old_name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.old_name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	vbdev_lvol_rename(lvol, req.new_name, _spdk_rpc_rename_lvol_bdev_cb, request);
+
+	free_rpc_rename_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_rename_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("rename_lvol_bdev", spdk_rpc_rename_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_inflate_lvol_bdev {
+	char *name;
+};
+
+static void
+free_rpc_inflate_lvol_bdev(struct rpc_inflate_lvol_bdev *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_inflate_lvol_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_inflate_lvol_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_inflate_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_inflate_lvol_bdev(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_inflate_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc = 0;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Inflating lvol\n");
+
+	if (spdk_json_decode_object(params, rpc_inflate_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_inflate_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	spdk_lvol_inflate(lvol, _spdk_rpc_inflate_lvol_bdev_cb, request);
+
+	free_rpc_inflate_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_inflate_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("inflate_lvol_bdev", spdk_rpc_inflate_lvol_bdev, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_decouple_parent_lvol_bdev(struct spdk_jsonrpc_request *request,
+				   const struct spdk_json_val *params)
+{
+	struct rpc_inflate_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc = 0;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Decoupling parent of lvol\n");
+
+	if (spdk_json_decode_object(params, rpc_inflate_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_inflate_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	spdk_lvol_decouple_parent(lvol, _spdk_rpc_inflate_lvol_bdev_cb, request);
+
+	free_rpc_inflate_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	free_rpc_inflate_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("decouple_parent_lvol_bdev", spdk_rpc_decouple_parent_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_resize_lvol_bdev {
+	char *name;
+	uint64_t size;
+};
+
+static void
+free_rpc_resize_lvol_bdev(struct rpc_resize_lvol_bdev *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_resize_lvol_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_resize_lvol_bdev, name), spdk_json_decode_string},
+	{"size", offsetof(struct rpc_resize_lvol_bdev, size), spdk_json_decode_uint64},
+};
+
+static void
+_spdk_rpc_resize_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_resize_lvol_bdev(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_resize_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc = 0;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Resizing lvol\n");
+
+	if (spdk_json_decode_object(params, rpc_resize_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_resize_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (req.name == NULL) {
+		SPDK_ERRLOG("missing name param\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("no bdev for provided name %s\n", req.name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	vbdev_lvol_resize(lvol, req.size, _spdk_rpc_resize_lvol_bdev_cb, request);
+
+	free_rpc_resize_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_resize_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("resize_lvol_bdev", spdk_rpc_resize_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_destroy_lvol_bdev {
+	char *name;
+};
+
+static void
+free_rpc_destroy_lvol_bdev(struct rpc_destroy_lvol_bdev *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_destroy_lvol_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_destroy_lvol_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_destroy_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request = cb_arg;
+
+	if (lvolerrno != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+					 spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_destroy_lvol_bdev(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_destroy_lvol_bdev req = {};
+	struct spdk_bdev *bdev;
+	struct spdk_lvol *lvol;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_destroy_lvol_bdev_decoders,
+				    SPDK_COUNTOF(rpc_destroy_lvol_bdev_decoders),
+				    &req)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("no bdev for provided name %s\n", req.name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	lvol = vbdev_lvol_get_from_bdev(bdev);
+	if (lvol == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	vbdev_lvol_destroy(lvol, _spdk_rpc_destroy_lvol_bdev_cb, request);
+
+	free_rpc_destroy_lvol_bdev(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_destroy_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("destroy_lvol_bdev", spdk_rpc_destroy_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_get_lvol_stores {
+	char *uuid;
+	char *lvs_name;
+};
+
+static void
+free_rpc_get_lvol_stores(struct rpc_get_lvol_stores *req)
+{
+	free(req->uuid);
+	free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_lvol_stores_decoders[] = {
+	{"uuid", offsetof(struct rpc_get_lvol_stores, uuid), spdk_json_decode_string, true},
+	{"lvs_name", offsetof(struct rpc_get_lvol_stores, lvs_name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_dump_lvol_store_info(struct spdk_json_write_ctx *w, struct lvol_store_bdev *lvs_bdev)
+{
+	struct spdk_blob_store *bs;
+	uint64_t cluster_size, block_size;
+	char uuid[SPDK_UUID_STRING_LEN];
+
+	bs = lvs_bdev->lvs->blobstore;
+	cluster_size = spdk_bs_get_cluster_size(bs);
+	/* Block size of lvols is always size of blob store page */
+	block_size = spdk_bs_get_page_size(bs);
+
+	spdk_json_write_object_begin(w);
+
+	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &lvs_bdev->lvs->uuid);
+	spdk_json_write_name(w, "uuid");
+	spdk_json_write_string(w, uuid);
+
+	spdk_json_write_name(w, "name");
+	spdk_json_write_string(w, lvs_bdev->lvs->name);
+
+	spdk_json_write_name(w, "base_bdev");
+	spdk_json_write_string(w, spdk_bdev_get_name(lvs_bdev->bdev));
+
+	spdk_json_write_name(w, "total_data_clusters");
+	spdk_json_write_uint64(w, spdk_bs_total_data_cluster_count(bs));
+
+	spdk_json_write_name(w, "free_clusters");
+	spdk_json_write_uint64(w, spdk_bs_free_cluster_count(bs));
+
+	spdk_json_write_name(w, "block_size");
+	spdk_json_write_uint64(w, block_size);
+
+	spdk_json_write_name(w, "cluster_size");
+	spdk_json_write_uint64(w, cluster_size);
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_rpc_get_lvol_stores(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct rpc_get_lvol_stores req = {};
+	struct spdk_json_write_ctx *w;
+	struct lvol_store_bdev *lvs_bdev = NULL;
+	struct spdk_lvol_store *lvs = NULL;
+	int rc;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_get_lvol_stores_decoders,
+					    SPDK_COUNTOF(rpc_get_lvol_stores_decoders),
+					    &req)) {
+			SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+			rc = -EINVAL;
+			goto invalid;
+		}
+
+		rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+		if (rc != 0) {
+			goto invalid;
+		}
+
+		lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+		if (lvs_bdev == NULL) {
+			rc = -ENODEV;
+			goto invalid;
+		}
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_get_lvol_stores(&req);
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	if (lvs_bdev != NULL) {
+		spdk_rpc_dump_lvol_store_info(w, lvs_bdev);
+	} else {
+		for (lvs_bdev = vbdev_lvol_store_first(); lvs_bdev != NULL;
+		     lvs_bdev = vbdev_lvol_store_next(lvs_bdev)) {
+			spdk_rpc_dump_lvol_store_info(w, lvs_bdev);
+		}
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+
+	free_rpc_get_lvol_stores(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_get_lvol_stores(&req);
+}
+
+SPDK_RPC_REGISTER("get_lvol_stores", spdk_rpc_get_lvol_stores, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/malloc/Makefile b/src/spdk/lib/bdev/malloc/Makefile
new file mode 100644
index 00000000..f4eb9aaa
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_malloc.c bdev_malloc_rpc.c
+LIBNAME = bdev_malloc
+LOCAL_SYS_LIBS = -luuid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc.c b/src/spdk/lib/bdev/malloc/bdev_malloc.c
new file mode 100644
index 00000000..eb4b2b9c
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc.c
@@ -0,0 +1,524 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_malloc.h"
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/copy_engine.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/queue.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+struct malloc_disk {
+	struct spdk_bdev		disk;
+	void				*malloc_buf;
+	TAILQ_ENTRY(malloc_disk)	link;
+};
+
+struct malloc_task {
+	int				num_outstanding;
+	enum spdk_bdev_io_status	status;
+};
+
+static struct malloc_task *
+__malloc_task_from_copy_task(struct spdk_copy_task *ct)
+{
+	return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task));
+}
+
+static struct spdk_copy_task *
+__copy_task_from_malloc_task(struct malloc_task *mt)
+{
+	return (struct spdk_copy_task *)((uintptr_t)mt + sizeof(struct malloc_task));
+}
+
+static void
+malloc_done(void *ref, int status)
+{
+	struct malloc_task *task = __malloc_task_from_copy_task(ref);
+
+	if (status != 0) {
+		if (status == -ENOMEM) {
+			task->status = SPDK_BDEV_IO_STATUS_NOMEM;
+		} else {
+			task->status = SPDK_BDEV_IO_STATUS_FAILED;
+		}
+	}
+
+	if (--task->num_outstanding == 0) {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
+	}
+}
+
+static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks);
+
+int malloc_disk_count = 0;
+
+static int bdev_malloc_initialize(void);
+static void bdev_malloc_get_spdk_running_config(FILE *fp);
+
+static int
+bdev_malloc_get_ctx_size(void)
+{
+	return sizeof(struct malloc_task) + spdk_copy_task_size();
+}
+
+static struct spdk_bdev_module malloc_if = {
+	.name = "malloc",
+	.module_init = bdev_malloc_initialize,
+	.config_text = bdev_malloc_get_spdk_running_config,
+	.get_ctx_size = bdev_malloc_get_ctx_size,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&malloc_if)
+
+static void
+malloc_disk_free(struct malloc_disk *malloc_disk)
+{
+	if (!malloc_disk) {
+		return;
+	}
+
+	free(malloc_disk->disk.name);
+	spdk_dma_free(malloc_disk->malloc_buf);
+	spdk_dma_free(malloc_disk);
+}
+
+static int
+bdev_malloc_destruct(void *ctx)
+{
+	struct malloc_disk *malloc_disk = ctx;
+
+	TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link);
+	malloc_disk_free(malloc_disk);
+	return 0;
+}
+
+static int
+bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
+{
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		if (nbytes < iovs[i].iov_len) {
+			return 0;
+		}
+
+		nbytes -= iovs[i].iov_len;
+	}
+
+	return nbytes != 0;
+}
+
+static void
+bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
+		  struct malloc_task *task,
+		  struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+	int64_t res = 0;
+	void *src = mdisk->malloc_buf + offset;
+	int i;
+
+	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
+				      SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n",
+		      len, offset);
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	task->num_outstanding = iovcnt;
+
+	for (i = 0; i < iovcnt; i++) {
+		res = spdk_copy_submit(__copy_task_from_malloc_task(task),
+				       ch, iov[i].iov_base,
+				       src, iov[i].iov_len, malloc_done);
+
+		if (res != 0) {
+			malloc_done(__copy_task_from_malloc_task(task), res);
+		}
+
+		src += iov[i].iov_len;
+		len -= iov[i].iov_len;
+	}
+}
+
+static void
+bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
+		   struct malloc_task *task,
+		   struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+	int64_t res = 0;
+	void *dst = mdisk->malloc_buf + offset;
+	int i;
+
+	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
+				      SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n",
+		      len, offset);
+
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	task->num_outstanding = iovcnt;
+
+	for (i = 0; i < iovcnt; i++) {
+		res = spdk_copy_submit(__copy_task_from_malloc_task(task),
+				       ch, dst, iov[i].iov_base,
+				       iov[i].iov_len, malloc_done);
+
+		if (res != 0) {
+			malloc_done(__copy_task_from_malloc_task(task), res);
+		}
+
+		dst += iov[i].iov_len;
+	}
+}
+
+static int
+bdev_malloc_unmap(struct malloc_disk *mdisk,
+		  struct spdk_io_channel *ch,
+		  struct malloc_task *task,
+		  uint64_t offset,
+		  uint64_t byte_count)
+{
+	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	task->num_outstanding = 1;
+
+	return spdk_copy_submit_fill(__copy_task_from_malloc_task(task), ch,
+				     mdisk->malloc_buf + offset, 0, byte_count, malloc_done);
+}
+
+static int64_t
+bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task,
+		  uint64_t offset, uint64_t nbytes)
+{
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+	return 0;
+}
+
+static int
+bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task)
+{
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+	return 0;
+}
+
+static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	uint32_t block_size = bdev_io->bdev->blocklen;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+			assert(bdev_io->u.bdev.iovcnt == 1);
+			bdev_io->u.bdev.iovs[0].iov_base =
+				((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
+				bdev_io->u.bdev.offset_blocks * block_size;
+			bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size;
+			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx),
+					      SPDK_BDEV_IO_STATUS_SUCCESS);
+			return 0;
+		}
+
+		bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt,
+				  ch,
+				  (struct malloc_task *)bdev_io->driver_ctx,
+				  bdev_io->u.bdev.iovs,
+				  bdev_io->u.bdev.iovcnt,
+				  bdev_io->u.bdev.num_blocks * block_size,
+				  bdev_io->u.bdev.offset_blocks * block_size);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
+				   ch,
+				   (struct malloc_task *)bdev_io->driver_ctx,
+				   bdev_io->u.bdev.iovs,
+				   bdev_io->u.bdev.iovcnt,
+				   bdev_io->u.bdev.num_blocks * block_size,
+				   bdev_io->u.bdev.offset_blocks * block_size);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt,
+					 (struct malloc_task *)bdev_io->driver_ctx);
+
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt,
+					 (struct malloc_task *)bdev_io->driver_ctx,
+					 bdev_io->u.bdev.offset_blocks * block_size,
+					 bdev_io->u.bdev.num_blocks * block_size);
+
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
+					 ch,
+					 (struct malloc_task *)bdev_io->driver_ctx,
+					 bdev_io->u.bdev.offset_blocks * block_size,
+					 bdev_io->u.bdev.num_blocks * block_size);
+
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		/* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */
+		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
+					 ch,
+					 (struct malloc_task *)bdev_io->driver_ctx,
+					 bdev_io->u.bdev.offset_blocks * block_size,
+					 bdev_io->u.bdev.num_blocks * block_size);
+
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	if (_bdev_malloc_submit_request(ch, bdev_io) != 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static struct spdk_io_channel *
+bdev_malloc_get_io_channel(void *ctx)
+{
+	return spdk_copy_engine_get_io_channel();
+}
+
+static void
+bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	char uuid_str[SPDK_UUID_STRING_LEN];
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_malloc_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
+	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+	spdk_json_write_named_string(w, "uuid", uuid_str);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table malloc_fn_table = {
+	.destruct		= bdev_malloc_destruct,
+	.submit_request		= bdev_malloc_submit_request,
+	.io_type_supported	= bdev_malloc_io_type_supported,
+	.get_io_channel		= bdev_malloc_get_io_channel,
+	.write_config_json	= bdev_malloc_write_json_config,
+};
+
+struct spdk_bdev *create_malloc_disk(const char *name, const struct spdk_uuid *uuid,
+				     uint64_t num_blocks, uint32_t block_size)
+{
+	struct malloc_disk	*mdisk;
+	int			rc;
+
+	if (num_blocks == 0) {
+		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+		return NULL;
+	}
+
+	mdisk = spdk_dma_zmalloc(sizeof(*mdisk), 0, NULL);
+	if (!mdisk) {
+		SPDK_ERRLOG("mdisk spdk_dma_zmalloc() failed\n");
+		return NULL;
+	}
+
+	/*
+	 * Allocate the large backend memory buffer from pinned memory.
+	 *
+	 * TODO: need to pass a hint so we know which socket to allocate
+	 *  from on multi-socket systems.
+	 */
+	mdisk->malloc_buf = spdk_dma_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL);
+	if (!mdisk->malloc_buf) {
+		SPDK_ERRLOG("malloc_buf spdk_dma_zmalloc() failed\n");
+		malloc_disk_free(mdisk);
+		return NULL;
+	}
+
+	if (name) {
+		mdisk->disk.name = strdup(name);
+	} else {
+		/* Auto-generate a name */
+		mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count);
+		malloc_disk_count++;
+	}
+	if (!mdisk->disk.name) {
+		malloc_disk_free(mdisk);
+		return NULL;
+	}
+	mdisk->disk.product_name = "Malloc disk";
+
+	mdisk->disk.write_cache = 1;
+	mdisk->disk.blocklen = block_size;
+	mdisk->disk.blockcnt = num_blocks;
+	if (uuid) {
+		mdisk->disk.uuid = *uuid;
+	} else {
+		spdk_uuid_generate(&mdisk->disk.uuid);
+	}
+
+	mdisk->disk.ctxt = mdisk;
+	mdisk->disk.fn_table = &malloc_fn_table;
+	mdisk->disk.module = &malloc_if;
+
+	rc = spdk_bdev_register(&mdisk->disk);
+	if (rc) {
+		malloc_disk_free(mdisk);
+		return NULL;
+	}
+
+	TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link);
+
+	return &mdisk->disk;
+}
+
+void
+delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg)
+{
+	if (!bdev || bdev->module != &malloc_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int bdev_malloc_initialize(void)
+{
+	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
+	int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0;
+	uint64_t size;
+	struct spdk_bdev *bdev;
+
+	if (sp != NULL) {
+		NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
+		LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
+		BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
+		if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
+			SPDK_ERRLOG("Malloc section present, but no devices specified\n");
+			goto end;
+		}
+		if (BlockSize < 1) {
+			/* Default is 512 bytes */
+			BlockSize = 512;
+		}
+		size = (uint64_t)LunSizeInMB * 1024 * 1024;
+		for (i = 0; i < NumberOfLuns; i++) {
+			bdev = create_malloc_disk(NULL, NULL, size / BlockSize, BlockSize);
+			if (bdev == NULL) {
+				SPDK_ERRLOG("Could not create malloc disk\n");
+				rc = EINVAL;
+				goto end;
+			}
+		}
+	}
+
+end:
+	return rc;
+}
+
+static void
+bdev_malloc_get_spdk_running_config(FILE *fp)
+{
+	int num_malloc_luns = 0;
+	uint64_t malloc_lun_size = 0;
+	struct malloc_disk *mdisk;
+
+	/* count number of malloc LUNs, get LUN size */
+	TAILQ_FOREACH(mdisk, &g_malloc_disks, link) {
+		if (0 == malloc_lun_size) {
+			/* assume all malloc luns the same size */
+			malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
+			malloc_lun_size /= (1024 * 1024);
+		}
+		num_malloc_luns++;
+	}
+
+	if (num_malloc_luns > 0) {
+		fprintf(fp,
+			"\n"
+			"# Users may change this section to create a different number or size of\n"
+			"# malloc LUNs.\n"
+			"# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n"
+			"# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
+			"# Not all LUNs defined here are necessarily used below.\n"
+			"[Malloc]\n"
+			"  NumberOfLuns %d\n"
+			"  LunSizeInMB %" PRIu64 "\n",
+			num_malloc_luns, malloc_lun_size,
+			num_malloc_luns - 1, num_malloc_luns,
+			malloc_lun_size);
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC)
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc.h b/src/spdk/lib/bdev/malloc/bdev_malloc.h
new file mode 100644
index 00000000..8ebdba78
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc.h
@@ -0,0 +1,48 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_MALLOC_H
+#define SPDK_BDEV_MALLOC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_malloc_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev *create_malloc_disk(const char *name, const struct spdk_uuid *uuid,
+				     uint64_t num_blocks, uint32_t block_size);
+
+void delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg);
+
+#endif /* SPDK_BDEV_MALLOC_H */
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c b/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c
new file mode 100644
index 00000000..4066cf2f
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c
@@ -0,0 +1,170 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_malloc.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/uuid.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_malloc {
+	char *name;
+	char *uuid;
+	uint64_t num_blocks;
+	uint32_t block_size;
+};
+
+static void
+free_rpc_construct_malloc(struct rpc_construct_malloc *r)
+{
+	free(r->name);
+	free(r->uuid);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
+	{"name", offsetof(struct rpc_construct_malloc, name), spdk_json_decode_string, true},
+	{"uuid", offsetof(struct rpc_construct_malloc, uuid), spdk_json_decode_string, true},
+	{"num_blocks", offsetof(struct rpc_construct_malloc, num_blocks), spdk_json_decode_uint64},
+	{"block_size", offsetof(struct rpc_construct_malloc, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_malloc_bdev(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_construct_malloc req = {NULL};
+	struct spdk_json_write_ctx *w;
+	struct spdk_uuid *uuid = NULL;
+	struct spdk_uuid decoded_uuid;
+	struct spdk_bdev *bdev;
+
+	if (spdk_json_decode_object(params, rpc_construct_malloc_decoders,
+				    SPDK_COUNTOF(rpc_construct_malloc_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.uuid) {
+		if (spdk_uuid_parse(&decoded_uuid, req.uuid)) {
+			goto invalid;
+		}
+		uuid = &decoded_uuid;
+	}
+
+	bdev = create_malloc_disk(req.name, uuid, req.num_blocks, req.block_size);
+	if (bdev == NULL) {
+		goto invalid;
+	}
+
+	free_rpc_construct_malloc(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	free_rpc_construct_malloc(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_malloc_bdev", spdk_rpc_construct_malloc_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_malloc {
+	char *name;
+};
+
+static void
+free_rpc_delete_malloc(struct rpc_delete_malloc *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_malloc_decoders[] = {
+	{"name", offsetof(struct rpc_delete_malloc, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_malloc_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_malloc_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_delete_malloc req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_malloc_decoders,
+				    SPDK_COUNTOF(rpc_delete_malloc_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_BDEV_MALLOC, "bdev '%s' does not exist\n", req.name);
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_malloc_disk(bdev, _spdk_rpc_delete_malloc_bdev_cb, request);
+
+	free_rpc_delete_malloc(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_malloc(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_malloc_bdev", spdk_rpc_delete_malloc_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/null/Makefile b/src/spdk/lib/bdev/null/Makefile
new file mode 100644
index 00000000..24962e58
--- /dev/null
+++ b/src/spdk/lib/bdev/null/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_null.c bdev_null_rpc.c
+LIBNAME = bdev_null
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/null/bdev_null.c b/src/spdk/lib/bdev/null/bdev_null.c
new file mode 100644
index 00000000..9ff64725
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null.c
@@ -0,0 +1,384 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_null.h"
+
+struct null_bdev {
+	struct spdk_bdev	bdev;
+	TAILQ_ENTRY(null_bdev)	tailq;
+};
+
+struct null_io_channel {
+	struct spdk_poller		*poller;
+	TAILQ_HEAD(, spdk_bdev_io)	io;
+};
+
+static TAILQ_HEAD(, null_bdev) g_null_bdev_head;
+static void *g_null_read_buf;
+
+static int bdev_null_initialize(void);
+static void bdev_null_finish(void);
+static void bdev_null_get_spdk_running_config(FILE *fp);
+
+static struct spdk_bdev_module null_if = {
+	.name = "null",
+	.module_init = bdev_null_initialize,
+	.module_fini = bdev_null_finish,
+	.config_text = bdev_null_get_spdk_running_config,
+	.async_fini = true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&null_if)
+
+static int
+bdev_null_destruct(void *ctx)
+{
+	struct null_bdev *bdev = ctx;
+
+	TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq);
+	free(bdev->bdev.name);
+	spdk_dma_free(bdev);
+
+	return 0;
+}
+
+static void
+bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+	struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch);
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+			assert(bdev_io->u.bdev.iovcnt == 1);
+			bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf;
+			bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+		}
+		TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
+		break;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	default:
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		break;
+	}
+}
+
+static bool
+bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	default:
+		return false;
+	}
+}
+
+static struct spdk_io_channel *
+bdev_null_get_io_channel(void *ctx)
+{
+	return spdk_get_io_channel(&g_null_bdev_head);
+}
+
+static void
+bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	char uuid_str[SPDK_UUID_STRING_LEN];
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_null_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
+	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+	spdk_json_write_named_string(w, "uuid", uuid_str);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table null_fn_table = {
+	.destruct		= bdev_null_destruct,
+	.submit_request		= bdev_null_submit_request,
+	.io_type_supported	= bdev_null_io_type_supported,
+	.get_io_channel		= bdev_null_get_io_channel,
+	.write_config_json	= bdev_null_write_config_json,
+};
+
+struct spdk_bdev *
+create_null_bdev(const char *name, const struct spdk_uuid *uuid,
+		 uint64_t num_blocks, uint32_t block_size)
+{
+	struct null_bdev *bdev;
+	int rc;
+
+	if (block_size % 512 != 0) {
+		SPDK_ERRLOG("Block size %u is not a multiple of 512.\n", block_size);
+		return NULL;
+	}
+
+	if (num_blocks == 0) {
+		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+		return NULL;
+	}
+
+	bdev = spdk_dma_zmalloc(sizeof(*bdev), 0, NULL);
+	if (!bdev) {
+		SPDK_ERRLOG("could not allocate null_bdev\n");
+		return NULL;
+	}
+
+	bdev->bdev.name = strdup(name);
+	if (!bdev->bdev.name) {
+		spdk_dma_free(bdev);
+		return NULL;
+	}
+	bdev->bdev.product_name = "Null disk";
+
+	bdev->bdev.write_cache = 0;
+	bdev->bdev.blocklen = block_size;
+	bdev->bdev.blockcnt = num_blocks;
+	if (uuid) {
+		bdev->bdev.uuid = *uuid;
+	} else {
+		spdk_uuid_generate(&bdev->bdev.uuid);
+	}
+
+	bdev->bdev.ctxt = bdev;
+	bdev->bdev.fn_table = &null_fn_table;
+	bdev->bdev.module = &null_if;
+
+	rc = spdk_bdev_register(&bdev->bdev);
+	if (rc) {
+		free(bdev->bdev.name);
+		spdk_dma_free(bdev);
+		return NULL;
+	}
+
+	TAILQ_INSERT_TAIL(&g_null_bdev_head, bdev, tailq);
+
+	return &bdev->bdev;
+}
+
+void
+delete_null_bdev(struct spdk_bdev *bdev, spdk_delete_null_complete cb_fn, void *cb_arg)
+{
+	if (!bdev || bdev->module != &null_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int
+null_io_poll(void *arg)
+{
+	struct null_io_channel		*ch = arg;
+	TAILQ_HEAD(, spdk_bdev_io)	io;
+	struct spdk_bdev_io		*bdev_io;
+
+	TAILQ_INIT(&io);
+	TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link);
+
+	if (TAILQ_EMPTY(&io)) {
+		return 0;
+	}
+
+	while (!TAILQ_EMPTY(&io)) {
+		bdev_io = TAILQ_FIRST(&io);
+		TAILQ_REMOVE(&io, bdev_io, module_link);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+	}
+
+	return 1;
+}
+
+static int
+null_bdev_create_cb(void *io_device, void *ctx_buf)
+{
+	struct null_io_channel *ch = ctx_buf;
+
+	TAILQ_INIT(&ch->io);
+	ch->poller = spdk_poller_register(null_io_poll, ch, 0);
+
+	return 0;
+}
+
+static void
+null_bdev_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct null_io_channel *ch = ctx_buf;
+
+	spdk_poller_unregister(&ch->poller);
+}
+
+static int
+bdev_null_initialize(void)
+{
+	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Null");
+	uint64_t size_in_mb, num_blocks;
+	int block_size, i, rc = 0;
+	struct spdk_bdev *bdev;
+	const char *name, *val;
+
+	TAILQ_INIT(&g_null_bdev_head);
+
+	/*
+	 * This will be used if upper layer expects us to allocate the read buffer.
+	 *  Instead of using a real rbuf from the bdev pool, just always point to
+	 *  this same zeroed buffer.
+	 */
+	g_null_read_buf = spdk_dma_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL);
+
+	/*
+	 * We need to pick some unique address as our "io device" - so just use the
+	 *  address of the global tailq.
+	 */
+	spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb,
+				sizeof(struct null_io_channel),
+				"null_bdev");
+
+	if (sp == NULL) {
+		goto end;
+	}
+
+	i = 0;
+	while (true) {
+		val = spdk_conf_section_get_nval(sp, "Dev", i);
+		if (val == NULL) {
+			break;
+		}
+
+		name = spdk_conf_section_get_nmval(sp, "Dev", i, 0);
+		if (name == NULL) {
+			SPDK_ERRLOG("Null entry %d: Name must be provided\n", i);
+			continue;
+		}
+
+		val = spdk_conf_section_get_nmval(sp, "Dev", i, 1);
+		if (val == NULL) {
+			SPDK_ERRLOG("Null entry %d: Size in MB must be provided\n", i);
+			continue;
+		}
+
+		errno = 0;
+		size_in_mb = strtoull(val, NULL, 10);
+		if (errno) {
+			SPDK_ERRLOG("Null entry %d: Invalid size in MB %s\n", i, val);
+			continue;
+		}
+
+		val = spdk_conf_section_get_nmval(sp, "Dev", i, 2);
+		if (val == NULL) {
+			block_size = 512;
+		} else {
+			errno = 0;
+			block_size = (int)strtol(val, NULL, 10);
+			if (errno) {
+				SPDK_ERRLOG("Null entry %d: Invalid block size %s\n", i, val);
+				continue;
+			}
+		}
+
+		num_blocks = size_in_mb * (1024 * 1024) / block_size;
+
+		bdev = create_null_bdev(name, NULL, num_blocks, block_size);
+		if (bdev == NULL) {
+			SPDK_ERRLOG("Could not create null bdev\n");
+			rc = EINVAL;
+			goto end;
+		}
+
+		i++;
+	}
+
+end:
+	return rc;
+}
+
+static void
+_bdev_null_finish_cb(void *arg)
+{
+	spdk_dma_free(g_null_read_buf);
+	spdk_bdev_module_finish_done();
+}
+
+static void
+bdev_null_finish(void)
+{
+	spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb);
+}
+
+static void
+bdev_null_get_spdk_running_config(FILE *fp)
+{
+	struct null_bdev *bdev;
+	uint64_t null_bdev_size;
+
+	fprintf(fp, "\n[Null]\n");
+
+	TAILQ_FOREACH(bdev, &g_null_bdev_head, tailq) {
+		null_bdev_size = bdev->bdev.blocklen * bdev->bdev.blockcnt;
+		null_bdev_size /= (1024 * 1024);
+		fprintf(fp, "  %s %" PRIu64 " %d\n",
+			bdev->bdev.name, null_bdev_size, bdev->bdev.blocklen);
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_null", SPDK_LOG_BDEV_NULL)
diff --git a/src/spdk/lib/bdev/null/bdev_null.h b/src/spdk/lib/bdev/null/bdev_null.h
new file mode 100644
index 00000000..fa0123e3
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null.h
@@ -0,0 +1,57 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_NULL_H
+#define SPDK_BDEV_NULL_H
+
+#include "spdk/stdinc.h"
+
+typedef void (*spdk_delete_null_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev;
+struct spdk_uuid;
+
+struct spdk_bdev *create_null_bdev(const char *name, const struct spdk_uuid *uuid,
+				   uint64_t num_blocks, uint32_t block_size);
+
+/**
+ * Delete null bdev.
+ *
+ * \param bdev Pointer to null bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_null_bdev(struct spdk_bdev *bdev, spdk_delete_null_complete cb_fn,
+		      void *cb_arg);
+
+#endif /* SPDK_BDEV_NULL_H */
diff --git a/src/spdk/lib/bdev/null/bdev_null_rpc.c b/src/spdk/lib/bdev/null/bdev_null_rpc.c
new file mode 100644
index 00000000..9410b7ad
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null_rpc.c
@@ -0,0 +1,169 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_null.h"
+
+struct rpc_construct_null {
+	char *name;
+	char *uuid;
+	uint64_t num_blocks;
+	uint32_t block_size;
+};
+
+static void
+free_rpc_construct_null(struct rpc_construct_null *req)
+{
+	free(req->name);
+	free(req->uuid);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_null_decoders[] = {
+	{"name", offsetof(struct rpc_construct_null, name), spdk_json_decode_string},
+	{"uuid", offsetof(struct rpc_construct_null, uuid), spdk_json_decode_string, true},
+	{"num_blocks", offsetof(struct rpc_construct_null, num_blocks), spdk_json_decode_uint64},
+	{"block_size", offsetof(struct rpc_construct_null, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_null_bdev(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_construct_null req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_uuid *uuid = NULL;
+	struct spdk_uuid decoded_uuid;
+	struct spdk_bdev *bdev;
+
+	if (spdk_json_decode_object(params, rpc_construct_null_decoders,
+				    SPDK_COUNTOF(rpc_construct_null_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_NULL, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.uuid) {
+		if (spdk_uuid_parse(&decoded_uuid, req.uuid)) {
+			goto invalid;
+		}
+		uuid = &decoded_uuid;
+	}
+
+	bdev = create_null_bdev(req.name, uuid, req.num_blocks, req.block_size);
+	if (bdev == NULL) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_null(&req);
+		return;
+	}
+
+	spdk_json_write_string(w, bdev->name);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_construct_null(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_construct_null(&req);
+}
+SPDK_RPC_REGISTER("construct_null_bdev", spdk_rpc_construct_null_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_null {
+	char *name;
+};
+
+static void
+free_rpc_delete_null(struct rpc_delete_null *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_null_decoders[] = {
+	{"name", offsetof(struct rpc_delete_null, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_null_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_null_bdev(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_delete_null req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_null_decoders,
+				    SPDK_COUNTOF(rpc_delete_null_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_null_bdev(bdev, _spdk_rpc_delete_null_bdev_cb, request);
+
+	free_rpc_delete_null(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_null(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_null_bdev", spdk_rpc_delete_null_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/nvme/Makefile b/src/spdk/lib/bdev/nvme/Makefile
new file mode 100644
index 00000000..c5a40c74
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c
+LIBNAME = bdev_nvme
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.c b/src/spdk/lib/bdev/nvme/bdev_nvme.c
new file mode 100644
index 00000000..07c3b6ce
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme.c
@@ -0,0 +1,1856 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+
+#include "spdk/config.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/bdev.h"
+#include "spdk/json.h"
+#include "spdk/nvme.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static void bdev_nvme_get_spdk_running_config(FILE *fp);
+static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
+
+struct nvme_io_channel {
+	struct spdk_nvme_qpair	*qpair;
+	struct spdk_poller	*poller;
+
+	bool			collect_spin_stat;
+	uint64_t		spin_ticks;
+	uint64_t		start_ticks;
+	uint64_t		end_ticks;
+};
+
+struct nvme_bdev_io {
+	/** array of iovecs to transfer. */
+	struct iovec *iovs;
+
+	/** Number of iovecs in iovs array. */
+	int iovcnt;
+
+	/** Current iovec position. */
+	int iovpos;
+
+	/** Offset in current iovec. */
+	uint32_t iov_offset;
+
+	/** Saved status for admin passthru completion event. */
+	struct spdk_nvme_cpl cpl;
+
+	/** Originating thread */
+	struct spdk_thread *orig_thread;
+};
+
+enum data_direction {
+	BDEV_DISK_READ = 0,
+	BDEV_DISK_WRITE = 1
+};
+
+struct nvme_probe_ctx {
+	size_t count;
+	struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
+	const char *names[NVME_MAX_CONTROLLERS];
+	const char *hostnqn;
+};
+
+static struct spdk_bdev_nvme_opts g_opts = {
+	.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
+	.timeout_us = 0,
+	.retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT,
+	.nvme_adminq_poll_period_us = 1000000ULL,
+};
+
+#define NVME_HOTPLUG_POLL_PERIOD_MAX			10000000ULL
+#define NVME_HOTPLUG_POLL_PERIOD_DEFAULT		100000ULL
+
+static int g_hot_insert_nvme_controller_index = 0;
+static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
+static bool g_nvme_hotplug_enabled = false;
+static struct spdk_thread *g_bdev_nvme_init_thread;
+static struct spdk_poller *g_hotplug_poller;
+static char *g_nvme_hostnqn = NULL;
+static pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static TAILQ_HEAD(, nvme_ctrlr)	g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
+
+static int nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr);
+static int bdev_nvme_library_init(void);
+static void bdev_nvme_library_fini(void);
+static int bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair,
+			       struct nvme_bdev_io *bio,
+			       int direction, struct iovec *iov, int iovcnt, uint64_t lba_count,
+			       uint64_t lba);
+static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+				    struct nvme_bdev_io *bio,
+				    struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+				 struct nvme_bdev_io *bio,
+				 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+				    struct nvme_bdev_io *bio,
+				    struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len);
+static int nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid);
+
+struct spdk_nvme_qpair *
+spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
+{
+	struct nvme_io_channel *nvme_ch;
+
+	nvme_ch =  spdk_io_channel_get_ctx(ctrlr_io_ch);
+
+	return nvme_ch->qpair;
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name)
+{
+	struct nvme_ctrlr *_nvme_ctrlr;
+
+	TAILQ_FOREACH(_nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		if (strcmp(ctrlr_name, _nvme_ctrlr->name) == 0) {
+			return _nvme_ctrlr;
+		}
+	}
+
+	return NULL;
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_first_ctrlr(void)
+{
+	return TAILQ_FIRST(&g_nvme_ctrlrs);
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev)
+{
+	return TAILQ_NEXT(prev, tailq);
+}
+
+static int
+bdev_nvme_get_ctx_size(void)
+{
+	return sizeof(struct nvme_bdev_io);
+}
+
+static struct spdk_bdev_module nvme_if = {
+	.name = "nvme",
+	.module_init = bdev_nvme_library_init,
+	.module_fini = bdev_nvme_library_fini,
+	.config_text = bdev_nvme_get_spdk_running_config,
+	.config_json = bdev_nvme_config_json,
+	.get_ctx_size = bdev_nvme_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&nvme_if)
+
+static int
+bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+		struct nvme_bdev_io *bio,
+		struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n",
+		      lba_count, lba);
+
+	return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_READ,
+				   iov, iovcnt, lba_count, lba);
+}
+
+static int
+bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+		 struct nvme_bdev_io *bio,
+		 struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n",
+		      lba_count, lba);
+
+	return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_WRITE,
+				   iov, iovcnt, lba_count, lba);
+}
+
+static int
+bdev_nvme_poll(void *arg)
+{
+	struct nvme_io_channel *ch = arg;
+	int32_t num_completions;
+
+	if (ch->qpair == NULL) {
+		return -1;
+	}
+
+	if (ch->collect_spin_stat && ch->start_ticks == 0) {
+		ch->start_ticks = spdk_get_ticks();
+	}
+
+	num_completions = spdk_nvme_qpair_process_completions(ch->qpair, 0);
+
+	if (ch->collect_spin_stat) {
+		if (num_completions > 0) {
+			if (ch->end_ticks != 0) {
+				ch->spin_ticks += (ch->end_ticks - ch->start_ticks);
+				ch->end_ticks = 0;
+			}
+			ch->start_ticks = 0;
+		} else {
+			ch->end_ticks = spdk_get_ticks();
+		}
+	}
+
+	return num_completions;
+}
+
+static int
+bdev_nvme_poll_adminq(void *arg)
+{
+	struct spdk_nvme_ctrlr *ctrlr = arg;
+
+	return spdk_nvme_ctrlr_process_admin_completions(ctrlr);
+}
+
+static void
+bdev_nvme_unregister_cb(void *io_device)
+{
+	struct spdk_nvme_ctrlr *ctrlr = io_device;
+
+	spdk_nvme_detach(ctrlr);
+}
+
+static int
+bdev_nvme_destruct(void *ctx)
+{
+	struct nvme_bdev *nvme_disk = ctx;
+	struct nvme_ctrlr *nvme_ctrlr = nvme_disk->nvme_ctrlr;
+
+	pthread_mutex_lock(&g_bdev_nvme_mutex);
+	nvme_ctrlr->ref--;
+	free(nvme_disk->disk.name);
+	memset(nvme_disk, 0, sizeof(*nvme_disk));
+	if (nvme_ctrlr->ref == 0) {
+		TAILQ_REMOVE(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
+		pthread_mutex_unlock(&g_bdev_nvme_mutex);
+		spdk_io_device_unregister(nvme_ctrlr->ctrlr, bdev_nvme_unregister_cb);
+		spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
+		free(nvme_ctrlr->name);
+		free(nvme_ctrlr->bdevs);
+		free(nvme_ctrlr);
+		return 0;
+	}
+
+	pthread_mutex_unlock(&g_bdev_nvme_mutex);
+	return 0;
+
+}
+
+static int
+bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio,
+		uint64_t offset, uint64_t nbytes)
+{
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+	return 0;
+}
+
+static void
+_bdev_nvme_reset_done(struct spdk_io_channel_iter *i, int status)
+{
+	void *ctx = spdk_io_channel_iter_get_ctx(i);
+	int rc = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	if (status) {
+		rc = SPDK_BDEV_IO_STATUS_FAILED;
+	}
+	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc);
+}
+
+static void
+_bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
+{
+	struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
+	struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch);
+
+	nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
+	if (!nvme_ch->qpair) {
+		spdk_for_each_channel_continue(i, -1);
+		return;
+	}
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_bdev_nvme_reset(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
+	struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
+	int rc;
+
+	if (status) {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	rc = spdk_nvme_ctrlr_reset(ctrlr);
+	if (rc != 0) {
+		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	/* Recreate all of the I/O queue pairs */
+	spdk_for_each_channel(ctrlr,
+			      _bdev_nvme_reset_create_qpair,
+			      bio,
+			      _bdev_nvme_reset_done);
+
+
+}
+
+static void
+_bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	int rc;
+
+	rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
+	if (!rc) {
+		nvme_ch->qpair = NULL;
+	}
+
+	spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+bdev_nvme_reset(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
+{
+	/* First, delete all NVMe I/O queue pairs. */
+	spdk_for_each_channel(nbdev->nvme_ctrlr->ctrlr,
+			      _bdev_nvme_reset_destroy_qpair,
+			      bio,
+			      _bdev_nvme_reset);
+
+	return 0;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+		struct nvme_bdev_io *bio,
+		uint64_t offset_blocks,
+		uint64_t num_blocks);
+
+static void
+bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	int ret;
+
+	ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
+			      ch,
+			      (struct nvme_bdev_io *)bdev_io->driver_ctx,
+			      bdev_io->u.bdev.iovs,
+			      bdev_io->u.bdev.iovcnt,
+			      bdev_io->u.bdev.num_blocks,
+			      bdev_io->u.bdev.offset_blocks);
+
+	if (spdk_likely(ret == 0)) {
+		return;
+	} else if (ret == -ENOMEM) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+	} else {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static int
+_bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	if (nvme_ch->qpair == NULL) {
+		/* The device is currently resetting */
+		return -1;
+	}
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		return bdev_nvme_writev((struct nvme_bdev *)bdev_io->bdev->ctxt,
+					ch,
+					(struct nvme_bdev_io *)bdev_io->driver_ctx,
+					bdev_io->u.bdev.iovs,
+					bdev_io->u.bdev.iovcnt,
+					bdev_io->u.bdev.num_blocks,
+					bdev_io->u.bdev.offset_blocks);
+
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt,
+				       ch,
+				       (struct nvme_bdev_io *)bdev_io->driver_ctx,
+				       bdev_io->u.bdev.offset_blocks,
+				       bdev_io->u.bdev.num_blocks);
+
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt,
+				       ch,
+				       (struct nvme_bdev_io *)bdev_io->driver_ctx,
+				       bdev_io->u.bdev.offset_blocks,
+				       bdev_io->u.bdev.num_blocks);
+
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return bdev_nvme_reset((struct nvme_bdev *)bdev_io->bdev->ctxt,
+				       (struct nvme_bdev_io *)bdev_io->driver_ctx);
+
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		return bdev_nvme_flush((struct nvme_bdev *)bdev_io->bdev->ctxt,
+				       (struct nvme_bdev_io *)bdev_io->driver_ctx,
+				       bdev_io->u.bdev.offset_blocks,
+				       bdev_io->u.bdev.num_blocks);
+
+	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+		return bdev_nvme_admin_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt,
+						ch,
+						(struct nvme_bdev_io *)bdev_io->driver_ctx,
+						&bdev_io->u.nvme_passthru.cmd,
+						bdev_io->u.nvme_passthru.buf,
+						bdev_io->u.nvme_passthru.nbytes);
+
+	case SPDK_BDEV_IO_TYPE_NVME_IO:
+		return bdev_nvme_io_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt,
+					     ch,
+					     (struct nvme_bdev_io *)bdev_io->driver_ctx,
+					     &bdev_io->u.nvme_passthru.cmd,
+					     bdev_io->u.nvme_passthru.buf,
+					     bdev_io->u.nvme_passthru.nbytes);
+
+	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+		return bdev_nvme_io_passthru_md((struct nvme_bdev *)bdev_io->bdev->ctxt,
+						ch,
+						(struct nvme_bdev_io *)bdev_io->driver_ctx,
+						&bdev_io->u.nvme_passthru.cmd,
+						bdev_io->u.nvme_passthru.buf,
+						bdev_io->u.nvme_passthru.nbytes,
+						bdev_io->u.nvme_passthru.md_buf,
+						bdev_io->u.nvme_passthru.md_len);
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void
+bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	int rc = _bdev_nvme_submit_request(ch, bdev_io);
+
+	if (spdk_unlikely(rc != 0)) {
+		if (rc == -ENOMEM) {
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+		} else {
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+static bool
+bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct nvme_bdev *nbdev = ctx;
+	const struct spdk_nvme_ctrlr_data *cdata;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_RESET:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+	case SPDK_BDEV_IO_TYPE_NVME_IO:
+		return true;
+
+	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+		return spdk_nvme_ns_get_md_size(nbdev->ns) ? true : false;
+
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr);
+		return cdata->oncs.dsm;
+
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr);
+		/*
+		 * If an NVMe controller guarantees reading unallocated blocks returns zero,
+		 * we can implement WRITE_ZEROES as an NVMe deallocate command.
+		 */
+		if (cdata->oncs.dsm &&
+		    spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->ns) == SPDK_NVME_DEALLOC_READ_00) {
+			return true;
+		}
+		/*
+		 * The NVMe controller write_zeroes function is currently not used by our driver.
+		 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail.
+		 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read.
+		 */
+		return false;
+
+	default:
+		return false;
+	}
+}
+
+static int
+bdev_nvme_create_cb(void *io_device, void *ctx_buf)
+{
+	struct spdk_nvme_ctrlr *ctrlr = io_device;
+	struct nvme_io_channel *ch = ctx_buf;
+
+#ifdef SPDK_CONFIG_VTUNE
+	ch->collect_spin_stat = true;
+#else
+	ch->collect_spin_stat = false;
+#endif
+
+	ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
+
+	if (ch->qpair == NULL) {
+		return -1;
+	}
+
+	ch->poller = spdk_poller_register(bdev_nvme_poll, ch, 0);
+	return 0;
+}
+
+static void
+bdev_nvme_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct nvme_io_channel *ch = ctx_buf;
+
+	spdk_nvme_ctrlr_free_io_qpair(ch->qpair);
+	spdk_poller_unregister(&ch->poller);
+}
+
+static struct spdk_io_channel *
+bdev_nvme_get_io_channel(void *ctx)
+{
+	struct nvme_bdev *nvme_bdev = ctx;
+
+	return spdk_get_io_channel(nvme_bdev->nvme_ctrlr->ctrlr);
+}
+
+void
+spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
+{
+	const char *trtype_str;
+	const char *adrfam_str;
+
+	trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
+	if (trtype_str) {
+		spdk_json_write_named_string(w, "trtype", trtype_str);
+	}
+
+	adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+	if (adrfam_str) {
+		spdk_json_write_named_string(w, "adrfam", adrfam_str);
+	}
+
+	if (trid->traddr[0] != '\0') {
+		spdk_json_write_named_string(w, "traddr", trid->traddr);
+	}
+
+	if (trid->trsvcid[0] != '\0') {
+		spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+	}
+
+	if (trid->subnqn[0] != '\0') {
+		spdk_json_write_named_string(w, "subnqn", trid->subnqn);
+	}
+}
+
+static int
+bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct nvme_bdev *nvme_bdev = ctx;
+	struct nvme_ctrlr *nvme_ctrlr = nvme_bdev->nvme_ctrlr;
+	const struct spdk_nvme_ctrlr_data *cdata;
+	struct spdk_nvme_ns *ns;
+	union spdk_nvme_vs_register vs;
+	union spdk_nvme_csts_register csts;
+	char buf[128];
+
+	cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_ctrlr->ctrlr);
+	vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_ctrlr->ctrlr);
+	csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_ctrlr->ctrlr);
+	ns = nvme_bdev->ns;
+
+	spdk_json_write_named_object_begin(w, "nvme");
+
+	if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+		spdk_json_write_named_string(w, "pci_address", nvme_ctrlr->trid.traddr);
+	}
+
+	spdk_json_write_named_object_begin(w, "trid");
+
+	spdk_bdev_nvme_dump_trid_json(&nvme_ctrlr->trid, w);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_named_object_begin(w, "ctrlr_data");
+
+	spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
+
+	snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
+	spdk_str_trim(buf);
+	spdk_json_write_named_string(w, "model_number", buf);
+
+	snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
+	spdk_str_trim(buf);
+	spdk_json_write_named_string(w, "serial_number", buf);
+
+	snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
+	spdk_str_trim(buf);
+	spdk_json_write_named_string(w, "firmware_revision", buf);
+
+	spdk_json_write_named_object_begin(w, "oacs");
+
+	spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
+	spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
+	spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
+	spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_named_object_begin(w, "vs");
+
+	spdk_json_write_name(w, "nvme_version");
+	if (vs.bits.ter) {
+		spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
+	} else {
+		spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
+	}
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_named_object_begin(w, "csts");
+
+	spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy);
+	spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_named_object_begin(w, "ns_data");
+
+	spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* No config per bdev needed */
+}
+
+static uint64_t
+bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	uint64_t spin_time;
+
+	if (!nvme_ch->collect_spin_stat) {
+		return 0;
+	}
+
+	if (nvme_ch->end_ticks != 0) {
+		nvme_ch->spin_ticks += (nvme_ch->end_ticks - nvme_ch->start_ticks);
+		nvme_ch->end_ticks = 0;
+	}
+
+	spin_time = (nvme_ch->spin_ticks * 1000000ULL) / spdk_get_ticks_hz();
+	nvme_ch->start_ticks = 0;
+	nvme_ch->spin_ticks = 0;
+
+	return spin_time;
+}
+
+static const struct spdk_bdev_fn_table nvmelib_fn_table = {
+	.destruct		= bdev_nvme_destruct,
+	.submit_request		= bdev_nvme_submit_request,
+	.io_type_supported	= bdev_nvme_io_type_supported,
+	.get_io_channel		= bdev_nvme_get_io_channel,
+	.dump_info_json		= bdev_nvme_dump_info_json,
+	.write_config_json	= bdev_nvme_write_config_json,
+	.get_spin_time		= bdev_nvme_get_spin_time,
+};
+
+static int
+nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
+{
+	struct spdk_nvme_ctrlr	*ctrlr = nvme_ctrlr->ctrlr;
+	struct nvme_bdev	*bdev;
+	struct spdk_nvme_ns	*ns;
+	const struct spdk_uuid	*uuid;
+	const struct spdk_nvme_ctrlr_data *cdata;
+	int			rc;
+
+	cdata = spdk_nvme_ctrlr_get_data(ctrlr);
+
+	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+	if (!ns) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nsid);
+		return -EINVAL;
+	}
+
+	bdev = &nvme_ctrlr->bdevs[nsid - 1];
+	bdev->id = nsid;
+
+	bdev->nvme_ctrlr = nvme_ctrlr;
+	bdev->ns = ns;
+	nvme_ctrlr->ref++;
+
+	bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_ctrlr->name, spdk_nvme_ns_get_id(ns));
+	if (!bdev->disk.name) {
+		nvme_ctrlr->ref--;
+		memset(bdev, 0, sizeof(*bdev));
+		return -ENOMEM;
+	}
+	bdev->disk.product_name = "NVMe disk";
+
+	bdev->disk.write_cache = 0;
+	if (cdata->vwc.present) {
+		/* Enable if the Volatile Write Cache exists */
+		bdev->disk.write_cache = 1;
+	}
+	bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
+	bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns);
+	bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
+
+	uuid = spdk_nvme_ns_get_uuid(ns);
+	if (uuid != NULL) {
+		bdev->disk.uuid = *uuid;
+	}
+
+	bdev->disk.ctxt = bdev;
+	bdev->disk.fn_table = &nvmelib_fn_table;
+	bdev->disk.module = &nvme_if;
+	rc = spdk_bdev_register(&bdev->disk);
+	if (rc) {
+		free(bdev->disk.name);
+		nvme_ctrlr->ref--;
+		memset(bdev, 0, sizeof(*bdev));
+		return rc;
+	}
+	bdev->active = true;
+
+	return 0;
+}
+
+
+static bool
+hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+		 struct spdk_nvme_ctrlr_opts *opts)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr);
+
+	return true;
+}
+
+static struct nvme_ctrlr *
+nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
+{
+	struct nvme_ctrlr	*nvme_ctrlr;
+
+	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->trid) == 0) {
+			return nvme_ctrlr;
+		}
+	}
+
+	return NULL;
+}
+
+static struct nvme_ctrlr *
+nvme_ctrlr_get_by_name(const char *name)
+{
+	struct nvme_ctrlr *nvme_ctrlr;
+
+	if (name == NULL) {
+		return NULL;
+	}
+
+	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		if (strcmp(name, nvme_ctrlr->name) == 0) {
+			return nvme_ctrlr;
+		}
+	}
+
+	return NULL;
+}
+
+static bool
+probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+	 struct spdk_nvme_ctrlr_opts *opts)
+{
+	struct nvme_probe_ctx *ctx = cb_ctx;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr);
+
+	if (nvme_ctrlr_get(trid)) {
+		SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+			    trid->traddr);
+		return false;
+	}
+
+	if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+		bool claim_device = false;
+		size_t i;
+
+		for (i = 0; i < ctx->count; i++) {
+			if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+				claim_device = true;
+				break;
+			}
+		}
+
+		if (!claim_device) {
+			SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr);
+			return false;
+		}
+	}
+
+	if (ctx->hostnqn) {
+		snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn);
+	}
+
+	return true;
+}
+
+static void
+spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr *ctrlr = ctx;
+	int rc;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_WARNLOG("Abort failed. Resetting controller.\n");
+		rc = spdk_nvme_ctrlr_reset(ctrlr);
+		if (rc) {
+			SPDK_ERRLOG("Resetting controller failed.\n");
+		}
+	}
+}
+
+static void
+timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
+	   struct spdk_nvme_qpair *qpair, uint16_t cid)
+{
+	int rc;
+	union spdk_nvme_csts_register csts;
+
+	SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
+
+	csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
+	if (csts.bits.cfs) {
+		SPDK_ERRLOG("Controller Fatal Status, reset required\n");
+		rc = spdk_nvme_ctrlr_reset(ctrlr);
+		if (rc) {
+			SPDK_ERRLOG("Resetting controller failed.\n");
+		}
+		return;
+	}
+
+	switch (g_opts.action_on_timeout) {
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+		if (qpair) {
+			rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
+						       spdk_nvme_abort_cpl, ctrlr);
+			if (rc == 0) {
+				return;
+			}
+
+			SPDK_ERRLOG("Unable to send abort. Resetting.\n");
+		}
+
+	/* FALLTHROUGH */
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+		rc = spdk_nvme_ctrlr_reset(ctrlr);
+		if (rc) {
+			SPDK_ERRLOG("Resetting controller failed.\n");
+		}
+		break;
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+		break;
+	}
+}
+
+static void
+nvme_ctrlr_deactivate_bdev(struct nvme_bdev *bdev)
+{
+	spdk_bdev_unregister(&bdev->disk, NULL, NULL);
+	bdev->active = false;
+}
+
+static void
+nvme_ctrlr_update_ns_bdevs(struct nvme_ctrlr *nvme_ctrlr)
+{
+	struct spdk_nvme_ctrlr	*ctrlr = nvme_ctrlr->ctrlr;
+	uint32_t		i;
+	struct nvme_bdev	*bdev;
+
+	for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+		uint32_t	nsid = i + 1;
+
+		bdev = &nvme_ctrlr->bdevs[i];
+		if (!bdev->active && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+			SPDK_NOTICELOG("NSID %u to be added\n", nsid);
+			nvme_ctrlr_create_bdev(nvme_ctrlr, nsid);
+		}
+
+		if (bdev->active && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+			SPDK_NOTICELOG("NSID %u Bdev %s is removed\n", nsid, bdev->disk.name);
+			nvme_ctrlr_deactivate_bdev(bdev);
+		}
+	}
+
+}
+
+static void
+aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_ctrlr *nvme_ctrlr		= arg;
+	union spdk_nvme_async_event_completion	event;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_WARNLOG("AER request execute failed");
+		return;
+	}
+
+	event.raw = cpl->cdw0;
+	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
+	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
+		nvme_ctrlr_update_ns_bdevs(nvme_ctrlr);
+	}
+}
+
+static int
+create_ctrlr(struct spdk_nvme_ctrlr *ctrlr,
+	     const char *name,
+	     const struct spdk_nvme_transport_id *trid)
+{
+	struct nvme_ctrlr *nvme_ctrlr;
+
+	nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
+	if (nvme_ctrlr == NULL) {
+		SPDK_ERRLOG("Failed to allocate device struct\n");
+		return -ENOMEM;
+	}
+	nvme_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
+	nvme_ctrlr->bdevs = calloc(nvme_ctrlr->num_ns, sizeof(struct nvme_bdev));
+	if (!nvme_ctrlr->bdevs) {
+		SPDK_ERRLOG("Failed to allocate block devices struct\n");
+		free(nvme_ctrlr);
+		return -ENOMEM;
+	}
+
+	nvme_ctrlr->adminq_timer_poller = NULL;
+	nvme_ctrlr->ctrlr = ctrlr;
+	nvme_ctrlr->ref = 0;
+	nvme_ctrlr->trid = *trid;
+	nvme_ctrlr->name = strdup(name);
+	if (nvme_ctrlr->name == NULL) {
+		free(nvme_ctrlr->bdevs);
+		free(nvme_ctrlr);
+		return -ENOMEM;
+	}
+
+	spdk_io_device_register(ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,
+				sizeof(struct nvme_io_channel),
+				name);
+
+	if (nvme_ctrlr_create_bdevs(nvme_ctrlr) != 0) {
+		spdk_io_device_unregister(ctrlr, bdev_nvme_unregister_cb);
+		free(nvme_ctrlr->bdevs);
+		free(nvme_ctrlr->name);
+		free(nvme_ctrlr);
+		return -1;
+	}
+
+	nvme_ctrlr->adminq_timer_poller = spdk_poller_register(bdev_nvme_poll_adminq, ctrlr,
+					  g_opts.nvme_adminq_poll_period_us);
+
+	TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
+
+	if (g_opts.timeout_us > 0 && g_opts.action_on_timeout != SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE) {
+		spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
+				timeout_cb, NULL);
+	}
+
+	spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
+
+	return 0;
+}
+
+static void
+attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
+{
+	struct nvme_probe_ctx *ctx = cb_ctx;
+	char *name = NULL;
+	size_t i;
+
+	if (ctx) {
+		for (i = 0; i < ctx->count; i++) {
+			if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+				name = strdup(ctx->names[i]);
+				break;
+			}
+		}
+	} else {
+		name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
+	}
+	if (!name) {
+		SPDK_ERRLOG("Failed to assign name to NVMe device\n");
+		return;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name);
+
+	create_ctrlr(ctrlr, name, trid);
+
+	free(name);
+}
+
+static void
+remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint32_t i;
+	struct nvme_ctrlr *nvme_ctrlr;
+	struct nvme_bdev *nvme_bdev;
+
+	pthread_mutex_lock(&g_bdev_nvme_mutex);
+	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		if (nvme_ctrlr->ctrlr == ctrlr) {
+			pthread_mutex_unlock(&g_bdev_nvme_mutex);
+			for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+				uint32_t	nsid = i + 1;
+
+				nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1];
+				assert(nvme_bdev->id == nsid);
+				if (nvme_bdev->active) {
+					spdk_bdev_unregister(&nvme_bdev->disk, NULL, NULL);
+				}
+			}
+			return;
+		}
+	}
+	pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+static int
+bdev_nvme_hotplug(void *arg)
+{
+	if (spdk_nvme_probe(NULL, NULL, hotplug_probe_cb, attach_cb, remove_cb) != 0) {
+		SPDK_ERRLOG("spdk_nvme_probe() failed\n");
+	}
+
+	return -1;
+}
+
+void
+spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
+{
+	*opts = g_opts;
+}
+
+int
+spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
+{
+	if (g_bdev_nvme_init_thread != NULL) {
+		return -EPERM;
+	}
+
+	g_opts = *opts;
+
+	return 0;
+}
+struct set_nvme_hotplug_ctx {
+	uint64_t period_us;
+	bool enabled;
+	spdk_thread_fn fn;
+	void *fn_ctx;
+};
+
+static void
+set_nvme_hotplug_period_cb(void *_ctx)
+{
+	struct set_nvme_hotplug_ctx *ctx = _ctx;
+
+	spdk_poller_unregister(&g_hotplug_poller);
+	if (ctx->enabled) {
+		g_hotplug_poller = spdk_poller_register(bdev_nvme_hotplug, NULL, ctx->period_us);
+	}
+
+	g_nvme_hotplug_poll_period_us = ctx->period_us;
+	g_nvme_hotplug_enabled = ctx->enabled;
+	if (ctx->fn) {
+		ctx->fn(ctx->fn_ctx);
+	}
+
+	free(ctx);
+}
+
+int
+spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx)
+{
+	struct set_nvme_hotplug_ctx *ctx;
+
+	if (enabled == true && !spdk_process_is_primary()) {
+		return -EPERM;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		return -ENOMEM;
+	}
+
+	period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
+	ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
+	ctx->enabled = enabled;
+	ctx->fn = cb;
+	ctx->fn_ctx = cb_ctx;
+
+	spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
+	return 0;
+}
+
+int
+spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+		      const char *base_name,
+		      const char **names, size_t *count,
+		      const char *hostnqn)
+{
+	struct nvme_probe_ctx	*probe_ctx;
+	struct nvme_ctrlr	*nvme_ctrlr;
+	struct nvme_bdev	*nvme_bdev;
+	uint32_t		i, nsid;
+	size_t			j;
+
+	if (nvme_ctrlr_get(trid) != NULL) {
+		SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
+		return -1;
+	}
+
+	probe_ctx = calloc(1, sizeof(*probe_ctx));
+	if (probe_ctx == NULL) {
+		SPDK_ERRLOG("Failed to allocate probe_ctx\n");
+		return -1;
+	}
+
+	probe_ctx->count = 1;
+	probe_ctx->trids[0] = *trid;
+	probe_ctx->names[0] = base_name;
+	probe_ctx->hostnqn = hostnqn;
+	if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) {
+		SPDK_ERRLOG("Failed to probe for new devices\n");
+		free(probe_ctx);
+		return -1;
+	}
+
+	nvme_ctrlr = nvme_ctrlr_get(trid);
+	if (!nvme_ctrlr) {
+		SPDK_ERRLOG("Failed to find new NVMe controller\n");
+		free(probe_ctx);
+		return -1;
+	}
+
+	/*
+	 * Report the new bdevs that were created in this call.
+	 * There can be more than one bdev per NVMe controller since one bdev is created per namespace.
+	 */
+	j = 0;
+	for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+		nsid = i + 1;
+		nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1];
+		if (!nvme_bdev->active) {
+			continue;
+		}
+		assert(nvme_bdev->id == nsid);
+		if (j < *count) {
+			names[j] = nvme_bdev->disk.name;
+			j++;
+		} else {
+			SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %zu. Unable to return all names of created bdevs\n",
+				    *count);
+			free(probe_ctx);
+			return -1;
+		}
+	}
+
+	*count = j;
+
+	free(probe_ctx);
+	return 0;
+}
+
+int
+spdk_bdev_nvme_delete(const char *name)
+{
+	struct nvme_ctrlr *nvme_ctrlr = NULL;
+
+	if (name == NULL) {
+		return -EINVAL;
+	}
+
+	nvme_ctrlr = nvme_ctrlr_get_by_name(name);
+	if (nvme_ctrlr == NULL) {
+		SPDK_ERRLOG("Failed to find NVMe controller\n");
+		return -ENODEV;
+	}
+
+	remove_cb(NULL, nvme_ctrlr->ctrlr);
+	return 0;
+}
+
+static int
+bdev_nvme_library_init(void)
+{
+	struct spdk_conf_section *sp;
+	const char *val;
+	int rc = 0;
+	int64_t intval = 0;
+	size_t i;
+	struct nvme_probe_ctx *probe_ctx = NULL;
+	int retry_count;
+	uint32_t local_nvme_num = 0;
+	int64_t hotplug_period;
+	bool hotplug_enabled = g_nvme_hotplug_enabled;
+
+	g_bdev_nvme_init_thread = spdk_get_thread();
+
+	sp = spdk_conf_find_section(NULL, "Nvme");
+	if (sp == NULL) {
+		goto end;
+	}
+
+	probe_ctx = calloc(1, sizeof(*probe_ctx));
+	if (probe_ctx == NULL) {
+		SPDK_ERRLOG("Failed to allocate probe_ctx\n");
+		rc = -1;
+		goto end;
+	}
+
+	if ((retry_count = spdk_conf_section_get_intval(sp, "RetryCount")) < 0) {
+		if ((retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount")) < 0) {
+			retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
+		} else {
+			SPDK_WARNLOG("NvmeRetryCount was renamed to RetryCount\n");
+			SPDK_WARNLOG("Please update your configuration file\n");
+		}
+	}
+
+	g_opts.retry_count = retry_count;
+
+	val = spdk_conf_section_get_val(sp, "TimeoutUsec");
+	if (val != NULL) {
+		intval = strtoll(val, NULL, 10);
+		if (intval == LLONG_MIN || intval == LLONG_MAX) {
+			SPDK_ERRLOG("Invalid TimeoutUsec value\n");
+			rc = -1;
+			goto end;
+		} else if (intval < 0) {
+			intval = 0;
+		}
+	}
+
+	g_opts.timeout_us = intval;
+
+	if (g_opts.timeout_us > 0) {
+		val = spdk_conf_section_get_val(sp, "ActionOnTimeout");
+		if (val != NULL) {
+			if (!strcasecmp(val, "Reset")) {
+				g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+			} else if (!strcasecmp(val, "Abort")) {
+				g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+			}
+		} else {
+			/* Handle old name for backward compatibility */
+			val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
+			if (val) {
+				SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n");
+				SPDK_WARNLOG("Please update your configuration file\n");
+
+				if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) {
+					g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+				}
+			}
+		}
+	}
+
+	intval = spdk_conf_section_get_intval(sp, "AdminPollRate");
+	if (intval > 0) {
+		g_opts.nvme_adminq_poll_period_us = intval;
+	}
+
+	if (spdk_process_is_primary()) {
+		hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false);
+	}
+
+	hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate");
+
+	g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN");
+	probe_ctx->hostnqn = g_nvme_hostnqn;
+
+	for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
+		val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
+		if (val == NULL) {
+			break;
+		}
+
+		rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val);
+		if (rc < 0) {
+			SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
+			rc = -1;
+			goto end;
+		}
+
+		val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1);
+		if (val == NULL) {
+			SPDK_ERRLOG("No name provided for TransportID\n");
+			rc = -1;
+			goto end;
+		}
+
+		probe_ctx->names[i] = val;
+		probe_ctx->count++;
+
+		if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+			struct spdk_nvme_ctrlr *ctrlr;
+			struct spdk_nvme_ctrlr_opts opts;
+
+			if (nvme_ctrlr_get(&probe_ctx->trids[i])) {
+				SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+					    probe_ctx->trids[i].traddr);
+				rc = -1;
+				goto end;
+			}
+
+			if (probe_ctx->trids[i].subnqn[0] == '\0') {
+				SPDK_ERRLOG("Need to provide subsystem nqn\n");
+				rc = -1;
+				goto end;
+			}
+
+			spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
+
+			if (probe_ctx->hostnqn != NULL) {
+				snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn);
+			}
+
+			ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts));
+			if (ctrlr == NULL) {
+				SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n",
+					    probe_ctx->trids[i].traddr);
+				rc = -1;
+				goto end;
+			}
+
+			rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i]);
+			if (rc) {
+				goto end;
+			}
+		} else {
+			local_nvme_num++;
+		}
+	}
+
+	if (local_nvme_num > 0) {
+		/* used to probe local NVMe device */
+		if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, NULL)) {
+			rc = -1;
+			goto end;
+		}
+
+		for (i = 0; i < probe_ctx->count; i++) {
+			if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+				continue;
+			}
+
+			if (!nvme_ctrlr_get(&probe_ctx->trids[i])) {
+				SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr);
+				SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n");
+			}
+		}
+	}
+
+	rc = spdk_bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL);
+	if (rc) {
+		SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc));
+		rc = -1;
+	}
+end:
+	spdk_nvme_retry_count = g_opts.retry_count;
+
+	free(probe_ctx);
+	return rc;
+}
+
+static void
+bdev_nvme_library_fini(void)
+{
+	spdk_poller_unregister(&g_hotplug_poller);
+}
+
+static int
+nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr)
+{
+	int			rc;
+	int			bdev_created = 0;
+	uint32_t		nsid;
+
+	for (nsid = spdk_nvme_ctrlr_get_first_active_ns(nvme_ctrlr->ctrlr);
+	     nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(nvme_ctrlr->ctrlr, nsid)) {
+		rc = nvme_ctrlr_create_bdev(nvme_ctrlr, nsid);
+		if (rc == 0) {
+			bdev_created++;
+		}
+	}
+
+	return (bdev_created > 0) ? 0 : -1;
+}
+
+static void
+bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
+
+	spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_admin_passthru_completion(void *ctx)
+{
+	struct nvme_bdev_io *bio = ctx;
+	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+	spdk_bdev_io_complete_nvme_status(bdev_io,
+					  bio->cpl.status.sct, bio->cpl.status.sc);
+}
+
+static void
+bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_bdev_io *bio = ref;
+
+	bio->cpl = *cpl;
+	spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio);
+}
+
+static void
+bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
+{
+	struct nvme_bdev_io *bio = ref;
+	struct iovec *iov;
+
+	bio->iov_offset = sgl_offset;
+	for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
+		iov = &bio->iovs[bio->iovpos];
+		if (bio->iov_offset < iov->iov_len) {
+			break;
+		}
+
+		bio->iov_offset -= iov->iov_len;
+	}
+}
+
+static int
+bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
+{
+	struct nvme_bdev_io *bio = ref;
+	struct iovec *iov;
+
+	assert(bio->iovpos < bio->iovcnt);
+
+	iov = &bio->iovs[bio->iovpos];
+
+	*address = iov->iov_base;
+	*length = iov->iov_len;
+
+	if (bio->iov_offset) {
+		assert(bio->iov_offset <= iov->iov_len);
+		*address += bio->iov_offset;
+		*length -= bio->iov_offset;
+	}
+
+	bio->iov_offset += *length;
+	if (bio->iov_offset == iov->iov_len) {
+		bio->iovpos++;
+		bio->iov_offset = 0;
+	}
+
+	return 0;
+}
+
+static int
+bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair,
+		    struct nvme_bdev_io *bio,
+		    int direction, struct iovec *iov, int iovcnt, uint64_t lba_count,
+		    uint64_t lba)
+{
+	int rc;
+
+	bio->iovs = iov;
+	bio->iovcnt = iovcnt;
+	bio->iovpos = 0;
+	bio->iov_offset = 0;
+
+	if (direction == BDEV_DISK_READ) {
+		rc = spdk_nvme_ns_cmd_readv(bdev->ns, qpair, lba,
+					    lba_count, bdev_nvme_queued_done, bio, 0,
+					    bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
+	} else {
+		rc = spdk_nvme_ns_cmd_writev(bdev->ns, qpair, lba,
+					     lba_count, bdev_nvme_queued_done, bio, 0,
+					     bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
+	}
+
+	if (rc != 0 && rc != -ENOMEM) {
+		SPDK_ERRLOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc);
+	}
+	return rc;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+		struct nvme_bdev_io *bio,
+		uint64_t offset_blocks,
+		uint64_t num_blocks)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
+	struct spdk_nvme_dsm_range *range;
+	uint64_t offset, remaining;
+	uint64_t num_ranges_u64;
+	uint16_t num_ranges;
+	int rc;
+
+	num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
+			 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+	if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
+		SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
+		return -EINVAL;
+	}
+	num_ranges = (uint16_t)num_ranges_u64;
+
+	offset = offset_blocks;
+	remaining = num_blocks;
+	range = &dsm_ranges[0];
+
+	/* Fill max-size ranges until the remaining blocks fit into one range */
+	while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
+		range->attributes.raw = 0;
+		range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+		range->starting_lba = offset;
+
+		offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+		remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+		range++;
+	}
+
+	/* Final range describes the remaining blocks */
+	range->attributes.raw = 0;
+	range->length = remaining;
+	range->starting_lba = offset;
+
+	rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair,
+			SPDK_NVME_DSM_ATTR_DEALLOCATE,
+			dsm_ranges, num_ranges,
+			bdev_nvme_queued_done, bio);
+
+	return rc;
+}
+
+static int
+bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+			 struct nvme_bdev_io *bio,
+			 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+	uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+	if (nbytes > max_xfer_size) {
+		SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+		return -EINVAL;
+	}
+
+	bio->orig_thread = spdk_io_channel_get_thread(ch);
+
+	return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_ctrlr->ctrlr, cmd, buf,
+					     (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+		      struct nvme_bdev_io *bio,
+		      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+	if (nbytes > max_xfer_size) {
+		SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+	 * so fill it out automatically.
+	 */
+	cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
+
+	return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+					  (uint32_t)nbytes, bdev_nvme_queued_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+			 struct nvme_bdev_io *bio,
+			 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len)
+{
+	struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+	size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->ns);
+	uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+	if (nbytes > max_xfer_size) {
+		SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+		return -EINVAL;
+	}
+
+	if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->ns)) {
+		SPDK_ERRLOG("invalid meta data buffer size\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+	 * so fill it out automatically.
+	 */
+	cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
+
+	return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+			(uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
+}
+
+static void
+bdev_nvme_get_spdk_running_config(FILE *fp)
+{
+	struct nvme_ctrlr	*nvme_ctrlr;
+
+	fprintf(fp, "\n[Nvme]");
+	fprintf(fp, "\n"
+		"# NVMe Device Whitelist\n"
+		"# Users may specify which NVMe devices to claim by their transport id.\n"
+		"# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n"
+		"# The second argument is the assigned name, which can be referenced from\n"
+		"# other sections in the configuration file. For NVMe devices, a namespace\n"
+		"# is automatically appended to each name in the format <YourName>nY, where\n"
+		"# Y is the NSID (starts at 1).\n");
+
+	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		const char *trtype;
+
+		trtype = spdk_nvme_transport_id_trtype_str(nvme_ctrlr->trid.trtype);
+		if (!trtype) {
+			continue;
+		}
+
+		if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+			fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n",
+				trtype,
+				nvme_ctrlr->trid.traddr, nvme_ctrlr->name);
+		} else {
+			const char *adrfam;
+
+			adrfam = spdk_nvme_transport_id_adrfam_str(nvme_ctrlr->trid.adrfam);
+
+			if (adrfam) {
+				fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n",
+					trtype,	adrfam,
+					nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid,
+					nvme_ctrlr->trid.subnqn, nvme_ctrlr->name);
+			} else {
+				fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n",
+					trtype,
+					nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid,
+					nvme_ctrlr->trid.subnqn, nvme_ctrlr->name);
+			}
+
+		}
+	}
+
+	fprintf(fp, "\n"
+		"# The number of attempts per I/O when an I/O fails. Do not include\n"
+		"# this key to get the default behavior.\n");
+	fprintf(fp, "RetryCount %d\n", spdk_nvme_retry_count);
+	fprintf(fp, "\n"
+		"# Timeout for each command, in microseconds. If 0, don't track timeouts.\n");
+	fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us);
+
+	fprintf(fp, "\n"
+		"# Action to take on command time out. Only valid when Timeout is greater\n"
+		"# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n"
+		"# the command, or 'None' to just print a message but do nothing.\n"
+		"# Admin command timeouts will always result in a reset.\n");
+	switch (g_opts.action_on_timeout) {
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+		fprintf(fp, "ActionOnTimeout None\n");
+		break;
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+		fprintf(fp, "ActionOnTimeout Reset\n");
+		break;
+	case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+		fprintf(fp, "ActionOnTimeout Abort\n");
+		break;
+	}
+
+	fprintf(fp, "\n"
+		"# Set how often the admin queue is polled for asynchronous events.\n"
+		"# Units in microseconds.\n");
+	fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us);
+	fprintf(fp, "\n"
+		"# Disable handling of hotplug (runtime insert and remove) events,\n"
+		"# users can set to Yes if want to enable it.\n"
+		"# Default: No\n");
+	fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No");
+	fprintf(fp, "\n"
+		"# Set how often the hotplug is processed for insert and remove events."
+		"# Units in microseconds.\n");
+	fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us);
+	if (g_nvme_hostnqn) {
+		fprintf(fp, "HostNQN %s\n",  g_nvme_hostnqn);
+	}
+
+	fprintf(fp, "\n");
+}
+
+static int
+bdev_nvme_config_json(struct spdk_json_write_ctx *w)
+{
+	struct nvme_ctrlr		*nvme_ctrlr;
+	struct spdk_nvme_transport_id	*trid;
+	const char			*action;
+
+	if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
+		action = "reset";
+	} else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
+		action = "abort";
+	} else {
+		action = "none";
+	}
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "set_bdev_nvme_options");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "action_on_timeout", action);
+	spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
+	spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count);
+	spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	pthread_mutex_lock(&g_bdev_nvme_mutex);
+	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+		trid = &nvme_ctrlr->trid;
+
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "method", "construct_nvme_bdev");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "name", nvme_ctrlr->name);
+		spdk_bdev_nvme_dump_trid_json(trid, w);
+
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+
+	/* Dump as last parameter to give all NVMe bdevs chance to be constructed
+	 * before enabling hotplug poller.
+	 */
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "set_bdev_nvme_hotplug");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
+	spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	pthread_mutex_unlock(&g_bdev_nvme_mutex);
+	return 0;
+}
+
+struct spdk_nvme_ctrlr *
+spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
+{
+	if (!bdev || bdev->module != &nvme_if) {
+		return NULL;
+	}
+
+	return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ctrlr->ctrlr;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME)
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.h b/src/spdk/lib/bdev/nvme/bdev_nvme.h
new file mode 100644
index 00000000..b8c458e8
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme.h
@@ -0,0 +1,112 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_NVME_H
+#define SPDK_BDEV_NVME_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+#include "spdk/nvme.h"
+#include "spdk/bdev_module.h"
+
+#define NVME_MAX_CONTROLLERS 1024
+
+enum spdk_bdev_timeout_action {
+	SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0,
+	SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET,
+	SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT,
+};
+
+struct spdk_bdev_nvme_opts {
+	enum spdk_bdev_timeout_action action_on_timeout;
+	uint64_t timeout_us;
+	uint32_t retry_count;
+	uint64_t nvme_adminq_poll_period_us;
+};
+
+struct nvme_ctrlr {
+	/**
+	 * points to pinned, physically contiguous memory region;
+	 * contains 4KB IDENTIFY structure for controller which is
+	 *  target for CONTROLLER IDENTIFY command during initialization
+	 */
+	struct spdk_nvme_ctrlr		*ctrlr;
+	struct spdk_nvme_transport_id	trid;
+	char				*name;
+	int				ref;
+	uint32_t			num_ns;
+	/** Array of bdevs indexed by nsid - 1 */
+	struct nvme_bdev		*bdevs;
+
+	struct spdk_poller		*adminq_timer_poller;
+
+	/** linked list pointer for device list */
+	TAILQ_ENTRY(nvme_ctrlr)	tailq;
+};
+
+struct nvme_bdev {
+	struct spdk_bdev	disk;
+	struct nvme_ctrlr	*nvme_ctrlr;
+	uint32_t		id;
+	bool			active;
+	struct spdk_nvme_ns	*ns;
+};
+
+void spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid,
+				   struct spdk_json_write_ctx *w);
+
+struct spdk_nvme_qpair *spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
+struct nvme_ctrlr *spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name);
+struct nvme_ctrlr *spdk_bdev_nvme_first_ctrlr(void);
+struct nvme_ctrlr *spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev);
+void spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
+int spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
+int spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx);
+
+int spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+			  const char *base_name,
+			  const char **names, size_t *count,
+			  const char *hostnqn);
+struct spdk_nvme_ctrlr *spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev);
+
+/**
+ * Delete NVMe controller with all bdevs on top of it.
+ * Requires to pass name of NVMe controller.
+ *
+ * \param name NVMe controller name
+ * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found
+ */
+int spdk_bdev_nvme_delete(const char *name);
+
+#endif // SPDK_BDEV_NVME_H
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c
new file mode 100644
index 00000000..0312a756
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c
@@ -0,0 +1,740 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/bdev_module.h"
+
+struct open_descriptors {
+	void *desc;
+	struct  spdk_bdev *bdev;
+	TAILQ_ENTRY(open_descriptors) tqlst;
+};
+typedef TAILQ_HEAD(, open_descriptors) open_descriptors_t;
+
+static int
+rpc_decode_action_on_timeout(const struct spdk_json_val *val, void *out)
+{
+	enum spdk_bdev_timeout_action *action = out;
+
+	if (spdk_json_strequal(val, "none") == true) {
+		*action = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE;
+	} else if (spdk_json_strequal(val, "abort") == true) {
+		*action = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+	} else if (spdk_json_strequal(val, "reset") == true) {
+		*action = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+	} else {
+		SPDK_NOTICELOG("Invalid parameter value: action_on_timeout\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {
+	{"action_on_timeout", offsetof(struct spdk_bdev_nvme_opts, action_on_timeout), rpc_decode_action_on_timeout, true},
+	{"timeout_us", offsetof(struct spdk_bdev_nvme_opts, timeout_us), spdk_json_decode_uint64, true},
+	{"retry_count", offsetof(struct spdk_bdev_nvme_opts, retry_count), spdk_json_decode_uint32, true},
+	{"nvme_adminq_poll_period_us", offsetof(struct spdk_bdev_nvme_opts, nvme_adminq_poll_period_us), spdk_json_decode_uint64, true},
+};
+
+static void
+spdk_rpc_set_bdev_nvme_options(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct spdk_bdev_nvme_opts opts;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	spdk_bdev_nvme_get_opts(&opts);
+	if (params && spdk_json_decode_object(params, rpc_bdev_nvme_options_decoders,
+					      SPDK_COUNTOF(rpc_bdev_nvme_options_decoders),
+					      &opts)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_bdev_nvme_set_opts(&opts);
+	if (rc) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w != NULL) {
+		spdk_json_write_bool(w, true);
+		spdk_jsonrpc_end_result(request, w);
+	}
+
+	return;
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("set_bdev_nvme_options", spdk_rpc_set_bdev_nvme_options, SPDK_RPC_STARTUP)
+
+struct rpc_bdev_nvme_hotplug {
+	bool enabled;
+	uint64_t period_us;
+};
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_hotplug_decoders[] = {
+	{"enable", offsetof(struct rpc_bdev_nvme_hotplug, enabled), spdk_json_decode_bool, false},
+	{"period_us", offsetof(struct rpc_bdev_nvme_hotplug, period_us), spdk_json_decode_uint64, true},
+};
+
+static void
+rpc_set_bdev_nvme_hotplug_done(void *ctx)
+{
+	struct spdk_jsonrpc_request *request = ctx;
+	struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request);
+
+	if (w != NULL) {
+		spdk_json_write_bool(w, true);
+		spdk_jsonrpc_end_result(request, w);
+	}
+}
+
+static void
+spdk_rpc_set_bdev_nvme_hotplug(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_bdev_nvme_hotplug req = {false, 0};
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_bdev_nvme_hotplug_decoders,
+				    SPDK_COUNTOF(rpc_bdev_nvme_hotplug_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_bdev_nvme_set_hotplug(req.enabled, req.period_us, rpc_set_bdev_nvme_hotplug_done,
+					request);
+	if (rc) {
+		goto invalid;
+	}
+
+	return;
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("set_bdev_nvme_hotplug", spdk_rpc_set_bdev_nvme_hotplug, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_nvme {
+	char *name;
+	char *trtype;
+	char *adrfam;
+	char *traddr;
+	char *trsvcid;
+	char *subnqn;
+	char *hostnqn;
+};
+
+static void
+free_rpc_construct_nvme(struct rpc_construct_nvme *req)
+{
+	free(req->name);
+	free(req->trtype);
+	free(req->adrfam);
+	free(req->traddr);
+	free(req->trsvcid);
+	free(req->subnqn);
+	free(req->hostnqn);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_nvme_decoders[] = {
+	{"name", offsetof(struct rpc_construct_nvme, name), spdk_json_decode_string},
+	{"trtype", offsetof(struct rpc_construct_nvme, trtype), spdk_json_decode_string},
+	{"traddr", offsetof(struct rpc_construct_nvme, traddr), spdk_json_decode_string},
+
+	{"adrfam", offsetof(struct rpc_construct_nvme, adrfam), spdk_json_decode_string, true},
+	{"trsvcid", offsetof(struct rpc_construct_nvme, trsvcid), spdk_json_decode_string, true},
+	{"subnqn", offsetof(struct rpc_construct_nvme, subnqn), spdk_json_decode_string, true},
+	{"hostnqn", offsetof(struct rpc_construct_nvme, hostnqn), spdk_json_decode_string, true}
+};
+
+#define NVME_MAX_BDEVS_PER_RPC 128
+
+static void
+spdk_rpc_construct_nvme_bdev(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_construct_nvme req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_nvme_transport_id trid = {};
+	const char *names[NVME_MAX_BDEVS_PER_RPC];
+	size_t count;
+	size_t i;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_nvme_decoders,
+				    SPDK_COUNTOF(rpc_construct_nvme_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	/* Parse trtype */
+	rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, req.trtype);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to parse trtype: %s\n", req.trtype);
+		goto invalid;
+	}
+
+	/* Parse traddr */
+	snprintf(trid.traddr, sizeof(trid.traddr), "%s", req.traddr);
+
+	/* Parse adrfam */
+	if (req.adrfam) {
+		rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, req.adrfam);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to parse adrfam: %s\n", req.adrfam);
+			goto invalid;
+		}
+	}
+
+	/* Parse trsvcid */
+	if (req.trsvcid) {
+		snprintf(trid.trsvcid, sizeof(trid.trsvcid), "%s", req.trsvcid);
+	}
+
+	/* Parse subnqn */
+	if (req.subnqn) {
+		snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", req.subnqn);
+	}
+
+	count = NVME_MAX_BDEVS_PER_RPC;
+	if (spdk_bdev_nvme_create(&trid, req.name, names, &count, req.hostnqn)) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_nvme(&req);
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	for (i = 0; i < count; i++) {
+		spdk_json_write_string(w, names[i]);
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+
+	free_rpc_construct_nvme(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_construct_nvme(&req);
+}
+SPDK_RPC_REGISTER("construct_nvme_bdev", spdk_rpc_construct_nvme_bdev, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_dump_nvme_controller_info(struct spdk_json_write_ctx *w,
+				   struct nvme_ctrlr *nvme_ctrlr)
+{
+	struct spdk_nvme_transport_id	*trid;
+
+	trid = &nvme_ctrlr->trid;
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "name", nvme_ctrlr->name);
+
+	spdk_json_write_named_object_begin(w, "trid");
+	spdk_bdev_nvme_dump_trid_json(trid, w);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+struct rpc_get_nvme_controllers {
+	char *name;
+};
+
+static void
+free_rpc_get_nvme_controllers(struct rpc_get_nvme_controllers *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_nvme_controllers_decoders[] = {
+	{"name", offsetof(struct rpc_get_nvme_controllers, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_nvme_controllers(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_get_nvme_controllers req = {};
+	struct spdk_json_write_ctx *w;
+	struct nvme_ctrlr *ctrlr = NULL;
+
+	if (params && spdk_json_decode_object(params, rpc_get_nvme_controllers_decoders,
+					      SPDK_COUNTOF(rpc_get_nvme_controllers_decoders),
+					      &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.name) {
+		ctrlr = spdk_bdev_nvme_lookup_ctrlr(req.name);
+		if (ctrlr == NULL) {
+			SPDK_ERRLOG("ctrlr '%s' does not exist\n", req.name);
+			goto invalid;
+		}
+	}
+
+	free_rpc_get_nvme_controllers(&req);
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	if (ctrlr != NULL) {
+		spdk_rpc_dump_nvme_controller_info(w, ctrlr);
+	} else {
+		for (ctrlr = spdk_bdev_nvme_first_ctrlr(); ctrlr; ctrlr = spdk_bdev_nvme_next_ctrlr(ctrlr))  {
+			spdk_rpc_dump_nvme_controller_info(w, ctrlr);
+		}
+	}
+
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+	free_rpc_get_nvme_controllers(&req);
+}
+SPDK_RPC_REGISTER("get_nvme_controllers", spdk_rpc_get_nvme_controllers, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_nvme {
+	char *name;
+};
+
+static void
+free_rpc_delete_nvme(struct rpc_delete_nvme *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_nvme_decoders[] = {
+	{"name", offsetof(struct rpc_delete_nvme, name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_delete_nvme_ctrlr(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_delete_nvme req = {NULL};
+	struct spdk_json_write_ctx *w;
+	int rc = 0;
+
+	if (spdk_json_decode_object(params, rpc_delete_nvme_decoders,
+				    SPDK_COUNTOF(rpc_delete_nvme_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_bdev_nvme_delete(req.name);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	free_rpc_delete_nvme(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_delete_nvme(&req);
+}
+SPDK_RPC_REGISTER("delete_nvme_controller", spdk_rpc_delete_nvme_ctrlr, SPDK_RPC_RUNTIME)
+
+struct rpc_apply_firmware {
+	char *filename;
+	char *bdev_name;
+};
+
+static void
+free_rpc_apply_firmware(struct rpc_apply_firmware *req)
+{
+	free(req->filename);
+	free(req->bdev_name);
+}
+
+static const struct spdk_json_object_decoder rpc_apply_firmware_decoders[] = {
+	{"filename", offsetof(struct rpc_apply_firmware, filename), spdk_json_decode_string},
+	{"bdev_name", offsetof(struct rpc_apply_firmware, bdev_name), spdk_json_decode_string},
+};
+
+struct firmware_update_info {
+	void				*fw_image;
+	void				*p;
+	unsigned int			size;
+	unsigned int			size_remaining;
+	unsigned int			offset;
+	unsigned int			transfer;
+
+	void				*desc;
+	struct spdk_io_channel		*ch;
+	struct spdk_jsonrpc_request	*request;
+	struct spdk_nvme_ctrlr		*ctrlr;
+	open_descriptors_t		desc_head;
+	struct rpc_apply_firmware	*req;
+};
+
+static void
+apply_firmware_cleanup(void *cb_arg)
+{
+	struct open_descriptors			*opt, *tmp;
+	struct firmware_update_info *firm_ctx = cb_arg;
+
+	if (!firm_ctx) {
+		return;
+	}
+
+	if (firm_ctx->fw_image) {
+		spdk_dma_free(firm_ctx->fw_image);
+	}
+
+	if (firm_ctx->req) {
+		free_rpc_apply_firmware(firm_ctx->req);
+		free(firm_ctx->req);
+	}
+	TAILQ_FOREACH_SAFE(opt, &firm_ctx->desc_head, tqlst, tmp) {
+		TAILQ_REMOVE(&firm_ctx->desc_head, opt, tqlst);
+		spdk_bdev_close(opt->desc);
+		free(opt);
+	}
+	free(firm_ctx);
+}
+
+static void
+apply_firmware_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	int					rc;
+	struct spdk_json_write_ctx		*w;
+	struct firmware_update_info *firm_ctx = cb_arg;
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (!success) {
+		spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "firmware commit failed.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	if ((rc = spdk_nvme_ctrlr_reset(firm_ctx->ctrlr)) != 0) {
+		spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Controller reset failed.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	if (!(w = spdk_jsonrpc_begin_result(firm_ctx->request))) {
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	spdk_json_write_string(w, "firmware commit succeeded. Controller reset in progress.");
+	spdk_jsonrpc_end_result(firm_ctx->request, w);
+	apply_firmware_cleanup(firm_ctx);
+}
+
+static void
+apply_firmware_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_nvme_cmd			cmd = {};
+	struct spdk_nvme_fw_commit		fw_commit;
+	int					slot = 0;
+	int					rc;
+	struct firmware_update_info *firm_ctx = cb_arg;
+	enum spdk_nvme_fw_commit_action commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG;
+
+	if (!success) {
+		spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "firmware download failed .");
+		spdk_bdev_free_io(bdev_io);
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	firm_ctx->p += firm_ctx->transfer;
+	firm_ctx->offset += firm_ctx->transfer;
+	firm_ctx->size_remaining -= firm_ctx->transfer;
+
+	switch (firm_ctx->size_remaining) {
+	case 0:
+		/* firmware download completed. Commit firmware */
+		memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
+		fw_commit.fs = slot;
+		fw_commit.ca = commit_action;
+
+		cmd.opc = SPDK_NVME_OPC_FIRMWARE_COMMIT;
+		memcpy(&cmd.cdw10, &fw_commit, sizeof(uint32_t));
+		rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, NULL, 0,
+						   apply_firmware_complete_reset, firm_ctx);
+		if (rc) {
+			spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+							 "firmware commit failed.");
+			spdk_bdev_free_io(bdev_io);
+			apply_firmware_cleanup(firm_ctx);
+			return;
+		}
+		break;
+	default:
+		firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+		cmd.opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+		cmd.cdw10 = (firm_ctx->transfer >> 2) - 1;
+		cmd.cdw11 = firm_ctx->offset >> 2;
+		rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, firm_ctx->p,
+						   firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+		if (rc) {
+			spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+							 "firmware download failed.");
+			spdk_bdev_free_io(bdev_io);
+			apply_firmware_cleanup(firm_ctx);
+			return;
+		}
+		break;
+	}
+}
+
+static void
+spdk_rpc_apply_nvme_firmware(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	int					rc;
+	int					fd = -1;
+	struct stat				fw_stat;
+	struct spdk_nvme_ctrlr			*ctrlr;
+	char					msg[1024];
+	struct spdk_bdev			*bdev;
+	struct spdk_bdev			*bdev2;
+	struct open_descriptors			*opt;
+	struct spdk_bdev_desc			*desc;
+	struct spdk_nvme_cmd			*cmd;
+	struct firmware_update_info		*firm_ctx;
+
+	firm_ctx = malloc(sizeof(struct firmware_update_info));
+	if (!firm_ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Memory allocation error.");
+		return;
+	}
+	firm_ctx->fw_image = NULL;
+	TAILQ_INIT(&firm_ctx->desc_head);
+	firm_ctx->request = request;
+
+	firm_ctx->req = malloc(sizeof(struct rpc_apply_firmware));
+	if (!firm_ctx->req) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Memory allocation error.");
+		free(firm_ctx);
+		return;
+	}
+
+	if (spdk_json_decode_object(params, rpc_apply_firmware_decoders,
+				    SPDK_COUNTOF(rpc_apply_firmware_decoders), firm_ctx->req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "spdk_json_decode_object failed.");
+		free(firm_ctx->req);
+		free(firm_ctx);
+		return;
+	}
+
+	if ((bdev = spdk_bdev_get_by_name(firm_ctx->req->bdev_name)) == NULL) {
+		snprintf(msg, sizeof(msg), "bdev %s were not found", firm_ctx->req->bdev_name);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	if ((ctrlr = spdk_bdev_nvme_get_ctrlr(bdev)) == NULL) {
+		snprintf(msg, sizeof(msg), "Controller information for %s were not found.",
+			 firm_ctx->req->bdev_name);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+	firm_ctx->ctrlr = ctrlr;
+
+	for (bdev2 = spdk_bdev_first(); bdev2; bdev2 = spdk_bdev_next(bdev2)) {
+
+		if (spdk_bdev_nvme_get_ctrlr(bdev2) != ctrlr) {
+			continue;
+		}
+
+		if (!(opt = malloc(sizeof(struct open_descriptors)))) {
+			snprintf(msg, sizeof(msg), "Memory allocation error.");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+			apply_firmware_cleanup(firm_ctx);
+			return;
+		}
+
+		if ((rc = spdk_bdev_open(bdev2, true, NULL, NULL, &desc)) != 0) {
+			snprintf(msg, sizeof(msg), "Device %s is in use.", firm_ctx->req->bdev_name);
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+			free(opt);
+			apply_firmware_cleanup(firm_ctx);
+			return;
+		}
+
+		opt->desc = desc;
+		opt->bdev = bdev;
+		TAILQ_INSERT_TAIL(&firm_ctx->desc_head, opt, tqlst);
+	}
+
+	/*
+	 * find a descriptor associated with our bdev
+	 */
+	firm_ctx->desc = NULL;
+	TAILQ_FOREACH(opt, &firm_ctx->desc_head, tqlst) {
+		if (opt->bdev == bdev) {
+			firm_ctx->desc = opt->desc;
+			break;
+		}
+	}
+
+	if (!firm_ctx->desc) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "No descriptor were found.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	firm_ctx->ch = spdk_bdev_get_io_channel(firm_ctx->desc);
+	if (!firm_ctx->ch) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "No channels were found.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	fd = open(firm_ctx->req->filename, O_RDONLY);
+	if (fd < 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "open file failed.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	rc = fstat(fd, &fw_stat);
+	if (rc < 0) {
+		close(fd);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "fstat failed.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	firm_ctx->size = fw_stat.st_size;
+	if (fw_stat.st_size % 4) {
+		close(fd);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Firmware image size is not multiple of 4.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+
+	firm_ctx->fw_image = spdk_dma_zmalloc(firm_ctx->size, 4096, NULL);
+	if (!firm_ctx->fw_image) {
+		close(fd);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Memory allocation error.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+	firm_ctx->p = firm_ctx->fw_image;
+
+	if (read(fd, firm_ctx->p, firm_ctx->size) != ((ssize_t)(firm_ctx->size))) {
+		close(fd);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Read firmware image failed!");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+	close(fd);
+
+	firm_ctx->offset = 0;
+	firm_ctx->size_remaining = firm_ctx->size;
+	firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+
+	cmd = malloc(sizeof(struct spdk_nvme_cmd));
+	if (!cmd) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Memory allocation error.");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+	memset(cmd, 0, sizeof(struct spdk_nvme_cmd));
+	cmd->opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+	cmd->cdw10 = (firm_ctx->transfer >> 2) - 1;
+	cmd->cdw11 = firm_ctx->offset >> 2;
+
+	rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, cmd, firm_ctx->p,
+					   firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+	if (rc) {
+		free(cmd);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Read firmware image failed!");
+		apply_firmware_cleanup(firm_ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("apply_nvme_firmware", spdk_rpc_apply_nvme_firmware, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/nvme/nvme_rpc.c b/src/spdk/lib/bdev/nvme/nvme_rpc.c
new file mode 100644
index 00000000..b49a7d42
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/nvme_rpc.c
@@ -0,0 +1,487 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_nvme.h"
+#include "spdk/base64.h"
+
+enum spdk_nvme_rpc_type {
+	NVME_ADMIN_CMD = 1,
+	NVME_IO_CMD,
+};
+
+struct rpc_send_nvme_cmd_req {
+	char			*name;
+	int			cmd_type;
+	int			data_direction;
+	uint32_t		timeout_ms;
+	uint32_t		data_len;
+	uint32_t		md_len;
+
+	struct spdk_nvme_cmd	*cmdbuf;
+	char			*data;
+	char			*md;
+};
+
+struct rpc_send_nvme_cmd_resp {
+	char	*cpl_text;
+	char	*data_text;
+	char	*md_text;
+};
+
+struct rpc_send_nvme_cmd_ctx {
+	struct spdk_jsonrpc_request	*jsonrpc_request;
+	struct rpc_send_nvme_cmd_req	req;
+	struct rpc_send_nvme_cmd_resp	resp;
+	struct nvme_ctrlr		*nvme_ctrlr;
+	struct spdk_io_channel		*ctrlr_io_ch;
+};
+
+static void
+free_rpc_send_nvme_cmd_ctx(struct rpc_send_nvme_cmd_ctx *ctx)
+{
+	assert(ctx != NULL);
+
+	free(ctx->req.name);
+	free(ctx->req.cmdbuf);
+	spdk_dma_free(ctx->req.data);
+	spdk_dma_free(ctx->req.md);
+	free(ctx->resp.cpl_text);
+	free(ctx->resp.data_text);
+	free(ctx->resp.md_text);
+	free(ctx);
+}
+
+static int
+rpc_send_nvme_cmd_resp_construct(struct rpc_send_nvme_cmd_resp *resp,
+				 struct rpc_send_nvme_cmd_req *req,
+				 const struct spdk_nvme_cpl *cpl)
+{
+	resp->cpl_text = malloc(spdk_base64_get_encoded_strlen(sizeof(*cpl)) + 1);
+	if (!resp->cpl_text) {
+		return -ENOMEM;
+	}
+	spdk_base64_urlsafe_encode(resp->cpl_text, cpl, sizeof(*cpl));
+
+	if (req->data_direction == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+		if (req->data_len) {
+			resp->data_text = malloc(spdk_base64_get_encoded_strlen(req->data_len) + 1);
+			if (!resp->data_text) {
+				return -ENOMEM;
+			}
+			spdk_base64_urlsafe_encode(resp->data_text, req->data, req->data_len);
+		}
+		if (req->md_len) {
+			resp->md_text = malloc(spdk_base64_get_encoded_strlen(req->md_len) + 1);
+			if (!resp->md_text) {
+				return -ENOMEM;
+			}
+			spdk_base64_urlsafe_encode(resp->md_text, req->md, req->md_len);
+		}
+	}
+
+	return 0;
+}
+
+static void
+spdk_rpc_send_nvme_cmd_complete(struct rpc_send_nvme_cmd_ctx *ctx, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_jsonrpc_request *request = ctx->jsonrpc_request;
+	struct spdk_json_write_ctx *w;
+	int ret;
+
+	ret = rpc_send_nvme_cmd_resp_construct(&ctx->resp, &ctx->req, cpl);
+	if (ret) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 spdk_strerror(-ret));
+		goto out;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		goto out;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "cpl", ctx->resp.cpl_text);
+
+	if (ctx->resp.data_text) {
+		spdk_json_write_named_string(w, "data", ctx->resp.data_text);
+	}
+
+	if (ctx->resp.md_text) {
+		spdk_json_write_named_string(w, "metadata", ctx->resp.md_text);
+	}
+
+	spdk_json_write_object_end(w);
+	spdk_jsonrpc_end_result(request, w);
+
+out:
+	free_rpc_send_nvme_cmd_ctx(ctx);
+	return;
+}
+
+static void
+nvme_rpc_bdev_nvme_cb(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+	struct rpc_send_nvme_cmd_ctx *ctx = (struct rpc_send_nvme_cmd_ctx *)ref;
+
+	if (ctx->ctrlr_io_ch) {
+		spdk_put_io_channel(ctx->ctrlr_io_ch);
+		ctx->ctrlr_io_ch = NULL;
+	}
+
+	spdk_rpc_send_nvme_cmd_complete(ctx, cpl);
+}
+
+static int
+nvme_rpc_admin_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+			     void *buf, uint32_t nbytes, uint32_t timeout_ms)
+{
+	struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr;
+	int ret;
+
+	ret = spdk_nvme_ctrlr_cmd_admin_raw(_nvme_ctrlr->ctrlr, cmd, buf,
+					    nbytes, nvme_rpc_bdev_nvme_cb, ctx);
+
+	return ret;
+}
+
+static int
+nvme_rpc_io_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+			  void *buf, uint32_t nbytes, void *md_buf, uint32_t md_len,
+			  uint32_t timeout_ms)
+{
+	struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr;
+	struct spdk_nvme_qpair *io_qpair;
+	int ret;
+
+	ctx->ctrlr_io_ch = spdk_get_io_channel(_nvme_ctrlr->ctrlr);
+	io_qpair = spdk_bdev_nvme_get_io_qpair(ctx->ctrlr_io_ch);
+
+	ret = spdk_nvme_ctrlr_cmd_io_raw_with_md(_nvme_ctrlr->ctrlr, io_qpair,
+			cmd, buf, nbytes, md_buf, nvme_rpc_bdev_nvme_cb, ctx);
+	if (ret) {
+		spdk_put_io_channel(ctx->ctrlr_io_ch);
+	}
+
+	return ret;
+
+}
+
+static int
+rpc_send_nvme_cmd_exec(struct rpc_send_nvme_cmd_ctx *ctx)
+{
+	struct rpc_send_nvme_cmd_req *req = &ctx->req;
+	int ret = -EINVAL;
+
+	switch (req->cmd_type) {
+	case NVME_ADMIN_CMD:
+		ret = nvme_rpc_admin_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+						   req->data_len, req->timeout_ms);
+		break;
+	case NVME_IO_CMD:
+		ret = nvme_rpc_io_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+						req->data_len, req->md, req->md_len, req->timeout_ms);
+		break;
+	}
+
+	return ret;
+}
+
+static int
+rpc_decode_cmd_type(const struct spdk_json_val *val, void *out)
+{
+	int *cmd_type = out;
+
+	if (spdk_json_strequal(val, "admin") == true) {
+		*cmd_type = NVME_ADMIN_CMD;
+	} else if (spdk_json_strequal(val, "io") == true) {
+		*cmd_type = NVME_IO_CMD;
+	} else {
+		SPDK_NOTICELOG("Invalid parameter value: cmd_type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+rpc_decode_data_direction(const struct spdk_json_val *val, void *out)
+{
+	int *data_direction = out;
+
+	if (spdk_json_strequal(val, "h2c") == true) {
+		*data_direction = SPDK_NVME_DATA_HOST_TO_CONTROLLER;
+	} else if (spdk_json_strequal(val, "c2h") == true) {
+		*data_direction = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
+	} else {
+		SPDK_NOTICELOG("Invalid parameter value: data_direction\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+rpc_decode_cmdbuf(const struct spdk_json_val *val, void *out)
+{
+	char *text = NULL;
+	size_t text_strlen, raw_len;
+	struct spdk_nvme_cmd *cmdbuf, **_cmdbuf = out;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &text);
+	if (rc) {
+		return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+	}
+
+	text_strlen = strlen(text);
+	raw_len = spdk_base64_get_decoded_len(text_strlen);
+	cmdbuf = malloc(raw_len);
+	if (!cmdbuf) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = spdk_base64_urlsafe_decode(cmdbuf, &raw_len, text);
+	if (rc) {
+		goto out;
+	}
+	if (raw_len != sizeof(*cmdbuf)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	*_cmdbuf = cmdbuf;
+
+out:
+	free(text);
+	return rc;
+}
+
+static int
+rpc_decode_data(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+	char *text = NULL;
+	size_t text_strlen;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &text);
+	if (rc) {
+		return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+	}
+	text_strlen = strlen(text);
+
+	if (req->data_len) {
+		/* data_len is decoded by param "data_len" */
+		if (req->data_len != spdk_base64_get_decoded_len(text_strlen)) {
+			rc = -EINVAL;
+			goto out;
+		}
+	} else {
+		req->data_len = spdk_base64_get_decoded_len(text_strlen);
+		req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL);
+		if (!req->data) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	}
+
+	rc = spdk_base64_urlsafe_decode(req->data, (size_t *)&req->data_len, text);
+
+out:
+	free(text);
+	return rc;
+}
+
+static int
+rpc_decode_data_len(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+	uint32_t data_len;
+	int rc;
+
+	rc = spdk_json_decode_uint32(val, &data_len);
+	if (rc) {
+		return rc;
+	}
+
+	if (req->data_len) {
+		/* data_len is decoded by param "data" */
+		if (req->data_len != data_len) {
+			rc = -EINVAL;
+		}
+	} else {
+		req->data_len = data_len;
+		req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL);
+		if (!req->data) {
+			rc = -ENOMEM;
+		}
+	}
+
+	return rc;
+}
+
+static int
+rpc_decode_metadata(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+	char *text = NULL;
+	size_t text_strlen;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &text);
+	if (rc) {
+		return rc = val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+	}
+	text_strlen = strlen(text);
+
+	if (req->md_len) {
+		/* md_len is decoded by param "metadata_len" */
+		if (req->md_len != spdk_base64_get_decoded_len(text_strlen)) {
+			rc = -EINVAL;
+			goto out;
+		}
+	} else {
+		req->md_len = spdk_base64_get_decoded_len(text_strlen);
+		req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL);
+		if (!req->md) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	}
+
+	rc = spdk_base64_urlsafe_decode(req->md, (size_t *)&req->md_len, text);
+
+out:
+	free(text);
+	return rc;
+}
+
+static int
+rpc_decode_metadata_len(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+	uint32_t md_len;
+	int rc;
+
+	rc = spdk_json_decode_uint32(val, &md_len);
+	if (rc) {
+		return rc;
+	}
+
+	if (req->md_len) {
+		/* md_len is decoded by param "metadata" */
+		if (req->md_len != md_len) {
+			rc = -EINVAL;
+		}
+	} else {
+		req->md_len = md_len;
+		req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL);
+		if (!req->md) {
+			rc = -ENOMEM;
+		}
+	}
+
+	return rc;
+}
+
+static const struct spdk_json_object_decoder rpc_send_nvme_cmd_req_decoders[] = {
+	{"name", offsetof(struct rpc_send_nvme_cmd_req, name), spdk_json_decode_string},
+	{"cmd_type", offsetof(struct rpc_send_nvme_cmd_req, cmd_type), rpc_decode_cmd_type},
+	{"data_direction", offsetof(struct rpc_send_nvme_cmd_req, data_direction), rpc_decode_data_direction},
+	{"cmdbuf", offsetof(struct rpc_send_nvme_cmd_req, cmdbuf), rpc_decode_cmdbuf},
+	{"timeout_ms", offsetof(struct rpc_send_nvme_cmd_req, timeout_ms), spdk_json_decode_uint32, true},
+	{"data_len", 0, rpc_decode_data_len, true},
+	{"metadata_len", 0, rpc_decode_metadata_len, true},
+	{"data", 0, rpc_decode_data, true},
+	{"metadata", 0, rpc_decode_metadata, true},
+};
+
+static void
+spdk_rpc_send_nvme_cmd(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	struct rpc_send_nvme_cmd_ctx *ctx;
+	int ret, error_code;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		SPDK_ERRLOG("Failed at Malloc ctx\n");
+		error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+		ret = -ENOMEM;
+		goto invalid;
+	}
+
+	if (spdk_json_decode_object(params, rpc_send_nvme_cmd_req_decoders,
+				    SPDK_COUNTOF(rpc_send_nvme_cmd_req_decoders),
+				    &ctx->req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+		ret = -EINVAL;
+		goto invalid;
+	}
+
+	ctx->nvme_ctrlr = spdk_bdev_nvme_lookup_ctrlr(ctx->req.name);
+	if (ctx->nvme_ctrlr == NULL) {
+		SPDK_ERRLOG("Failed at device lookup\n");
+		error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+		ret = -EINVAL;
+		goto invalid;
+	}
+
+	ctx->jsonrpc_request = request;
+
+	ret = rpc_send_nvme_cmd_exec(ctx);
+	if (ret < 0) {
+		SPDK_NOTICELOG("Failed at rpc_send_nvme_cmd_exec\n");
+		error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+		goto invalid;
+	}
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, error_code, spdk_strerror(-ret));
+	free_rpc_send_nvme_cmd_ctx(ctx);
+	return;
+}
+SPDK_RPC_REGISTER("send_nvme_cmd", spdk_rpc_send_nvme_cmd, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/part.c b/src/spdk/lib/bdev/part.c
new file mode 100644
index 00000000..0cb4759b
--- /dev/null
+++ b/src/spdk/lib/bdev/part.c
@@ -0,0 +1,373 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Common code for partition-like virtual bdevs.
+ */
+
+#include "spdk/bdev.h"
+#include "spdk/log.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+
+struct spdk_bdev_part_base {
+	struct spdk_bdev		*bdev;
+	struct spdk_bdev_desc		*desc;
+	uint32_t			ref;
+	uint32_t			channel_size;
+	spdk_bdev_part_base_free_fn	base_free_fn;
+	void				*ctx;
+	bool				claimed;
+	struct spdk_bdev_module		*module;
+	struct spdk_bdev_fn_table	*fn_table;
+	struct bdev_part_tailq		*tailq;
+	spdk_io_channel_create_cb	ch_create_cb;
+	spdk_io_channel_destroy_cb	ch_destroy_cb;
+};
+
+struct spdk_bdev *
+spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
+{
+	return part_base->bdev;
+}
+
+struct spdk_bdev_desc *
+spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
+{
+	return part_base->desc;
+}
+
+struct bdev_part_tailq *
+spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
+{
+	return part_base->tailq;
+}
+
+void *
+spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
+{
+	return part_base->ctx;
+}
+
+void
+spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
+{
+	if (base->desc) {
+		spdk_bdev_close(base->desc);
+		base->desc = NULL;
+	}
+
+	if (base->base_free_fn != NULL) {
+		base->base_free_fn(base->ctx);
+	}
+
+	free(base);
+}
+
+static void
+spdk_bdev_part_free_cb(void *io_device)
+{
+	struct spdk_bdev_part *part = io_device;
+	struct spdk_bdev_part_base *base;
+
+	assert(part);
+	assert(part->internal.base);
+
+	base = part->internal.base;
+
+	TAILQ_REMOVE(base->tailq, part, tailq);
+
+	if (__sync_sub_and_fetch(&base->ref, 1) == 0) {
+		spdk_bdev_module_release_bdev(base->bdev);
+		spdk_bdev_part_base_free(base);
+	}
+
+	spdk_bdev_destruct_done(&part->internal.bdev, 0);
+	free(part->internal.bdev.name);
+	free(part->internal.bdev.product_name);
+	free(part);
+}
+
+int
+spdk_bdev_part_free(struct spdk_bdev_part *part)
+{
+	spdk_io_device_unregister(part, spdk_bdev_part_free_cb);
+
+	/* Return 1 to indicate that this is an asynchronous operation that isn't complete
+	 * until spdk_bdev_destruct_done is called */
+	return 1;
+}
+
+void
+spdk_bdev_part_base_hotremove(struct spdk_bdev *base_bdev, struct bdev_part_tailq *tailq)
+{
+	struct spdk_bdev_part *part, *tmp;
+
+	TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
+		if (part->internal.base->bdev == base_bdev) {
+			spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
+		}
+	}
+}
+
+static bool
+spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
+{
+	struct spdk_bdev_part *part = _part;
+
+	return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
+			io_type);
+}
+
+static struct spdk_io_channel *
+spdk_bdev_part_get_io_channel(void *_part)
+{
+	struct spdk_bdev_part *part = _part;
+
+	return spdk_get_io_channel(part);
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
+{
+	return &part->internal.bdev;
+}
+
+struct spdk_bdev_part_base *
+spdk_bdev_part_get_base(struct spdk_bdev_part *part)
+{
+	return part->internal.base;
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
+{
+	return part->internal.base->bdev;
+}
+
+uint64_t
+spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
+{
+	return part->internal.offset_blocks;
+}
+
+static void
+spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *part_io = cb_arg;
+	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+	spdk_bdev_io_complete(part_io, status);
+	spdk_bdev_free_io(bdev_io);
+}
+
+int
+spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct spdk_bdev_part *part = ch->part;
+	struct spdk_io_channel *base_ch = ch->base_ch;
+	struct spdk_bdev_desc *base_desc = part->internal.base->desc;
+	uint64_t offset;
+	int rc = 0;
+
+	/* Modify the I/O to adjust for the offset within the base bdev. */
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+		rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+					    bdev_io->u.bdev.iovcnt, offset,
+					    bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
+					    bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+		rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+					     bdev_io->u.bdev.iovcnt, offset,
+					     bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
+					     bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+		rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+						   spdk_bdev_part_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+		rc = spdk_bdev_unmap_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+					    spdk_bdev_part_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+		rc = spdk_bdev_flush_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+					    spdk_bdev_part_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		rc = spdk_bdev_reset(base_desc, base_ch,
+				     spdk_bdev_part_complete_io, bdev_io);
+		break;
+	default:
+		SPDK_ERRLOG("split: unknown I/O type %d\n", bdev_io->type);
+		return SPDK_BDEV_IO_STATUS_FAILED;
+	}
+
+	return rc;
+}
+
+static int
+spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+	struct spdk_bdev_part_channel *ch = ctx_buf;
+
+	ch->part = part;
+	ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
+	if (ch->base_ch == NULL) {
+		return -1;
+	}
+
+	if (part->internal.base->ch_create_cb) {
+		return part->internal.base->ch_create_cb(io_device, ctx_buf);
+	} else {
+		return 0;
+	}
+}
+
+static void
+spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+	struct spdk_bdev_part_channel *ch = ctx_buf;
+
+	if (part->internal.base->ch_destroy_cb) {
+		part->internal.base->ch_destroy_cb(io_device, ctx_buf);
+	}
+	spdk_put_io_channel(ch->base_ch);
+}
+
+struct spdk_bdev_part_base *
+	spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
+			      spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
+			      struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
+			      spdk_bdev_part_base_free_fn free_fn, void *ctx,
+			      uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
+			      spdk_io_channel_destroy_cb ch_destroy_cb)
+{
+	int rc;
+	struct spdk_bdev_part_base *base;
+
+	base = calloc(1, sizeof(*base));
+	if (!base) {
+		SPDK_ERRLOG("Memory allocation failure\n");
+		return NULL;
+	}
+	fn_table->get_io_channel = spdk_bdev_part_get_io_channel;
+	fn_table->io_type_supported = spdk_bdev_part_io_type_supported;
+
+	base->bdev = bdev;
+	base->desc = NULL;
+	base->ref = 0;
+	base->module = module;
+	base->fn_table = fn_table;
+	base->tailq = tailq;
+	base->base_free_fn = free_fn;
+	base->ctx = ctx;
+	base->claimed = false;
+	base->channel_size = channel_size;
+	base->ch_create_cb = ch_create_cb;
+	base->ch_destroy_cb = ch_destroy_cb;
+
+	rc = spdk_bdev_open(bdev, false, remove_cb, bdev, &base->desc);
+	if (rc) {
+		spdk_bdev_part_base_free(base);
+		SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+		return NULL;
+	}
+
+	return base;
+}
+
+int
+spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
+			 char *name, uint64_t offset_blocks, uint64_t num_blocks,
+			 char *product_name)
+{
+	part->internal.bdev.blocklen = base->bdev->blocklen;
+	part->internal.bdev.blockcnt = num_blocks;
+	part->internal.offset_blocks = offset_blocks;
+
+	part->internal.bdev.write_cache = base->bdev->write_cache;
+	part->internal.bdev.need_aligned_buffer = base->bdev->need_aligned_buffer;
+	part->internal.bdev.ctxt = part;
+	part->internal.bdev.module = base->module;
+	part->internal.bdev.fn_table = base->fn_table;
+
+	part->internal.bdev.name = strdup(name);
+	part->internal.bdev.product_name = strdup(product_name);
+
+	if (part->internal.bdev.name == NULL) {
+		SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
+		return -1;
+	} else if (part->internal.bdev.product_name == NULL) {
+		free(part->internal.bdev.name);
+		SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
+			    spdk_bdev_get_name(base->bdev));
+		return -1;
+	}
+
+	__sync_fetch_and_add(&base->ref, 1);
+	part->internal.base = base;
+
+	if (!base->claimed) {
+		int rc;
+
+		rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
+		if (rc) {
+			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
+			free(part->internal.bdev.name);
+			free(part->internal.bdev.product_name);
+			return -1;
+		}
+		base->claimed = true;
+	}
+
+	spdk_io_device_register(part, spdk_bdev_part_channel_create_cb,
+				spdk_bdev_part_channel_destroy_cb,
+				base->channel_size,
+				name);
+
+	spdk_vbdev_register(&part->internal.bdev, &base->bdev, 1);
+	TAILQ_INSERT_TAIL(base->tailq, part, tailq);
+
+	return 0;
+}
diff --git a/src/spdk/lib/bdev/passthru/Makefile b/src/spdk/lib/bdev/passthru/Makefile
new file mode 100644
index 00000000..5a2a383a
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+
+C_SRCS = vbdev_passthru.c vbdev_passthru_rpc.c
+LIBNAME = vbdev_passthru
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru.c b/src/spdk/lib/bdev/passthru/vbdev_passthru.c
new file mode 100644
index 00000000..4e3dacfc
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru.c
@@ -0,0 +1,671 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a simple example of a virtual block device module that passes IO
+ * down to a bdev (or bdevs) that its configured to attach to.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "vbdev_passthru.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+
+static int vbdev_passthru_init(void);
+static void vbdev_passthru_get_spdk_running_config(FILE *fp);
+static int vbdev_passthru_get_ctx_size(void);
+static void vbdev_passthru_examine(struct spdk_bdev *bdev);
+static void vbdev_passthru_finish(void);
+
+static struct spdk_bdev_module passthru_if = {
+	.name = "passthru",
+	.module_init = vbdev_passthru_init,
+	.config_text = vbdev_passthru_get_spdk_running_config,
+	.get_ctx_size = vbdev_passthru_get_ctx_size,
+	.examine_config = vbdev_passthru_examine,
+	.module_fini = vbdev_passthru_finish
+};
+
+SPDK_BDEV_MODULE_REGISTER(&passthru_if)
+
+/* List of pt_bdev names and their base bdevs via configuration file.
+ * Used so we can parse the conf once at init and use this list in examine().
+ */
+struct bdev_names {
+	char			*vbdev_name;
+	char			*bdev_name;
+	TAILQ_ENTRY(bdev_names)	link;
+};
+static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
+
+/* List of virtual bdevs and associated info for each. */
+struct vbdev_passthru {
+	struct spdk_bdev		*base_bdev; /* the thing we're attaching to */
+	struct spdk_bdev_desc		*base_desc; /* its descriptor we get from open */
+	struct spdk_bdev		pt_bdev;    /* the PT virtual bdev */
+	TAILQ_ENTRY(vbdev_passthru)	link;
+};
+static TAILQ_HEAD(, vbdev_passthru) g_pt_nodes = TAILQ_HEAD_INITIALIZER(g_pt_nodes);
+
+/* The pt vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
+ * If this vbdev needed to implement a poller or a queue for IO, this is where those things
+ * would be defined. This passthru bdev doesn't actually need to allocate a channel, it could
+ * simply pass back the channel of the bdev underneath it but for example purposes we will
+ * present its own to the upper layers.
+ */
+struct pt_io_channel {
+	struct spdk_io_channel	*base_ch; /* IO channel of base device */
+};
+
+/* Just for fun, this pt_bdev module doesn't need it but this is essentially a per IO
+ * context that we get handed by the bdev layer.
+ */
+struct passthru_bdev_io {
+	uint8_t test;
+
+	/* bdev related */
+	struct spdk_io_channel *ch;
+
+	/* for bdev_io_wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static void
+vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
+
+/* Called after we've unregistered following a hot remove callback.
+ * Our finish entry point will be called next.
+ */
+static int
+vbdev_passthru_destruct(void *ctx)
+{
+	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+	/* Unclaim the underlying bdev. */
+	spdk_bdev_module_release_bdev(pt_node->base_bdev);
+
+	/* Close the underlying bdev. */
+	spdk_bdev_close(pt_node->base_desc);
+
+	/* Done with this pt_node. */
+	TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+	free(pt_node->pt_bdev.name);
+	free(pt_node);
+	return 0;
+}
+
+/* Completion callback for IO that were issued from this bdev. The original bdev_io
+ * is passed in as an arg so we'll complete that one with the appropriate status
+ * and then free the one that this module issued.
+ */
+static void
+_pt_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *orig_io = cb_arg;
+	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)orig_io->driver_ctx;
+
+	/* We setup this value in the submission routine, just showing here that it is
+	 * passed back to us.
+	 */
+	if (io_ctx->test != 0x5a) {
+		SPDK_ERRLOG("Error, original IO device_ctx is wrong! 0x%x\n",
+			    io_ctx->test);
+	}
+
+	/* Complete the original IO and then free the one that we created here
+	 * as a result of issuing an IO via submit_reqeust.
+	 */
+	spdk_bdev_io_complete(orig_io, status);
+	spdk_bdev_free_io(bdev_io);
+}
+
+static void
+vbdev_passthru_resubmit_io(void *arg)
+{
+	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
+	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+
+	vbdev_passthru_submit_request(io_ctx->ch, bdev_io);
+}
+
+static void
+vbdev_passthru_queue_io(struct spdk_bdev_io *bdev_io)
+{
+	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+	int rc;
+
+	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
+	io_ctx->bdev_io_wait.cb_fn = vbdev_passthru_resubmit_io;
+	io_ctx->bdev_io_wait.cb_arg = bdev_io;
+
+	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->ch, &io_ctx->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in vbdev_passthru_queue_io, rc=%d.\n", rc);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+/* Callback for getting a buf from the bdev pool in the event that the caller passed
+ * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
+ * beneath us before we're done with it. That won't happen in this example but it could
+ * if this example were used as a template for something more complex.
+ */
+static void
+pt_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru,
+					 pt_bdev);
+	struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
+
+	spdk_bdev_readv_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
+			       bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+			       bdev_io->u.bdev.num_blocks, _pt_complete_io,
+			       bdev_io);
+}
+
+/* Called when someone above submits IO to this pt vbdev. We're simply passing it on here
+ * via SPDK IO calls which in turn allocate another bdev IO and call our cpl callback provided
+ * below along with the original bdiv_io so that we can complete it once this IO completes.
+ */
+static void
+vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru, pt_bdev);
+	struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
+	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+	int rc = 0;
+
+	/* Setup a per IO context value; we don't do anything with it in the vbdev other
+	 * than confirm we get the same thing back in the completion callback just to
+	 * demonstrate.
+	 */
+	io_ctx->test = 0x5a;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, pt_read_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		rc = spdk_bdev_writev_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
+					     bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+					     bdev_io->u.bdev.num_blocks, _pt_complete_io,
+					     bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		rc = spdk_bdev_write_zeroes_blocks(pt_node->base_desc, pt_ch->base_ch,
+						   bdev_io->u.bdev.offset_blocks,
+						   bdev_io->u.bdev.num_blocks,
+						   _pt_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		rc = spdk_bdev_unmap_blocks(pt_node->base_desc, pt_ch->base_ch,
+					    bdev_io->u.bdev.offset_blocks,
+					    bdev_io->u.bdev.num_blocks,
+					    _pt_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		rc = spdk_bdev_flush_blocks(pt_node->base_desc, pt_ch->base_ch,
+					    bdev_io->u.bdev.offset_blocks,
+					    bdev_io->u.bdev.num_blocks,
+					    _pt_complete_io, bdev_io);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		rc = spdk_bdev_reset(pt_node->base_desc, pt_ch->base_ch,
+				     _pt_complete_io, bdev_io);
+		break;
+	default:
+		SPDK_ERRLOG("passthru: unknown I/O type %d\n", bdev_io->type);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+	if (rc != 0) {
+		if (rc == -ENOMEM) {
+			SPDK_ERRLOG("No memory, start to queue io for passthru.\n");
+			io_ctx->ch = ch;
+			vbdev_passthru_queue_io(bdev_io);
+		} else {
+			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+/* We'll just call the base bdev and let it answer however if we were more
+ * restrictive for some reason (or less) we could get the response back
+ * and modify according to our purposes.
+ */
+static bool
+vbdev_passthru_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+	return spdk_bdev_io_type_supported(pt_node->base_bdev, io_type);
+}
+
+/* We supplied this as an entry point for upper layers who want to communicate to this
+ * bdev.  This is how they get a channel. We are passed the same context we provided when
+ * we created our PT vbdev in examine() which, for this bdev, is the address of one of
+ * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
+ * struct and we'll keep it in our PT node.
+ */
+static struct spdk_io_channel *
+vbdev_passthru_get_io_channel(void *ctx)
+{
+	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+	struct spdk_io_channel *pt_ch = NULL;
+
+	/* The IO channel code will allocate a channel for us which consists of
+	 * the SPDK channel structure plus the size of our pt_io_channel struct
+	 * that we passed in when we registered our IO device. It will then call
+	 * our channel create callback to populate any elements that we need to
+	 * update.
+	 */
+	pt_ch = spdk_get_io_channel(pt_node);
+
+	return pt_ch;
+}
+
+static int
+vbdev_passthru_info_config_json(void *ctx, struct spdk_json_write_ctx *write_ctx)
+{
+	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+	/* This is the output for get_bdevs() for this vbdev */
+	spdk_json_write_name(write_ctx, "passthru");
+	spdk_json_write_object_begin(write_ctx);
+
+	spdk_json_write_name(write_ctx, "pt_bdev_name");
+	spdk_json_write_string(write_ctx, spdk_bdev_get_name(&pt_node->pt_bdev));
+
+	spdk_json_write_name(write_ctx, "base_bdev_name");
+	spdk_json_write_string(write_ctx, spdk_bdev_get_name(pt_node->base_bdev));
+
+	spdk_json_write_object_end(write_ctx);
+
+	return 0;
+}
+
+/* We provide this callback for the SPDK channel code to create a channel using
+ * the channel struct we provided in our module get_io_channel() entry point. Here
+ * we get and save off an underlying base channel of the device below us so that
+ * we can communicate with the base bdev on a per channel basis.  If we needed
+ * our own poller for this vbdev, we'd register it here.
+ */
+static int
+pt_bdev_ch_create_cb(void *io_device, void *ctx_buf)
+{
+	struct pt_io_channel *pt_ch = ctx_buf;
+	struct vbdev_passthru *pt_node = io_device;
+
+	pt_ch->base_ch = spdk_bdev_get_io_channel(pt_node->base_desc);
+
+	return 0;
+}
+
+/* We provide this callback for the SPDK channel code to destroy a channel
+ * created with our create callback. We just need to undo anything we did
+ * when we created. If this bdev used its own poller, we'd unregsiter it here.
+ */
+static void
+pt_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct pt_io_channel *pt_ch = ctx_buf;
+
+	spdk_put_io_channel(pt_ch->base_ch);
+}
+
+/* Create the passthru association from the bdev and vbdev name and insert
+ * on the global list. */
+static int
+vbdev_passthru_insert_name(const char *bdev_name, const char *vbdev_name)
+{
+	struct bdev_names *name;
+
+	name = calloc(1, sizeof(struct bdev_names));
+	if (!name) {
+		SPDK_ERRLOG("could not allocate bdev_names\n");
+		return -ENOMEM;
+	}
+
+	name->bdev_name = strdup(bdev_name);
+	if (!name->bdev_name) {
+		SPDK_ERRLOG("could not allocate name->bdev_name\n");
+		free(name);
+		return -ENOMEM;
+	}
+
+	name->vbdev_name = strdup(vbdev_name);
+	if (!name->vbdev_name) {
+		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
+		free(name->bdev_name);
+		free(name);
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
+
+	return 0;
+}
+
+/* On init, just parse config file and build list of pt vbdevs and bdev name pairs. */
+static int
+vbdev_passthru_init(void)
+{
+	struct spdk_conf_section *sp = NULL;
+	const char *conf_bdev_name = NULL;
+	const char *conf_vbdev_name = NULL;
+	struct bdev_names *name;
+	int i, rc;
+
+	sp = spdk_conf_find_section(NULL, "Passthru");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	for (i = 0; ; i++) {
+		if (!spdk_conf_section_get_nval(sp, "PT", i)) {
+			break;
+		}
+
+		conf_bdev_name = spdk_conf_section_get_nmval(sp, "PT", i, 0);
+		if (!conf_bdev_name) {
+			SPDK_ERRLOG("Passthru configuration missing bdev name\n");
+			break;
+		}
+
+		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "PT", i, 1);
+		if (!conf_vbdev_name) {
+			SPDK_ERRLOG("Passthru configuration missing pt_bdev name\n");
+			break;
+		}
+
+		rc = vbdev_passthru_insert_name(conf_bdev_name, conf_vbdev_name);
+		if (rc != 0) {
+			return rc;
+		}
+	}
+	TAILQ_FOREACH(name, &g_bdev_names, link) {
+		SPDK_NOTICELOG("conf parse matched: %s\n", name->bdev_name);
+	}
+	return 0;
+}
+
+/* Called when the entire module is being torn down. */
+static void
+vbdev_passthru_finish(void)
+{
+	struct bdev_names *name;
+
+	while ((name = TAILQ_FIRST(&g_bdev_names))) {
+		TAILQ_REMOVE(&g_bdev_names, name, link);
+		free(name->bdev_name);
+		free(name->vbdev_name);
+		free(name);
+	}
+}
+
+/* During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_passthru_get_ctx_size(void)
+{
+	return sizeof(struct passthru_bdev_io);
+}
+
+/* Called when SPDK wants to save the current config of this vbdev module to
+ * a file.
+ */
+static void
+vbdev_passthru_get_spdk_running_config(FILE *fp)
+{
+	struct bdev_names *names = NULL;
+
+	fprintf(fp, "\n[Passthru]\n");
+	TAILQ_FOREACH(names, &g_bdev_names, link) {
+		fprintf(fp, "  PT %s %s\n", names->bdev_name, names->vbdev_name);
+	}
+	fprintf(fp, "\n");
+}
+
+/* Called when SPDK wants to output the bdev specific methods. */
+static void
+vbdev_passthru_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev, struct vbdev_passthru, pt_bdev);
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_passthru_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(pt_node->base_bdev));
+	spdk_json_write_named_string(w, "passthru_bdev_name", spdk_bdev_get_name(bdev));
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+/* When we register our bdev this is how we specify our entry points. */
+static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = {
+	.destruct		= vbdev_passthru_destruct,
+	.submit_request		= vbdev_passthru_submit_request,
+	.io_type_supported	= vbdev_passthru_io_type_supported,
+	.get_io_channel		= vbdev_passthru_get_io_channel,
+	.dump_info_json		= vbdev_passthru_info_config_json,
+	.write_config_json	= vbdev_passthru_write_json_config,
+};
+
+/* Called when the underlying base bdev goes away. */
+static void
+vbdev_passthru_base_bdev_hotremove_cb(void *ctx)
+{
+	struct vbdev_passthru *pt_node, *tmp;
+	struct spdk_bdev *bdev_find = ctx;
+
+	TAILQ_FOREACH_SAFE(pt_node, &g_pt_nodes, link, tmp) {
+		if (bdev_find == pt_node->base_bdev) {
+			spdk_bdev_unregister(&pt_node->pt_bdev, NULL, NULL);
+		}
+	}
+}
+
+/* Create and register the passthru vbdev if we find it in our list of bdev names.
+ * This can be called either by the examine path or RPC method.
+ */
+static void
+vbdev_passthru_register(struct spdk_bdev *bdev)
+{
+	struct bdev_names *name;
+	struct vbdev_passthru *pt_node;
+	int rc;
+
+	/* Check our list of names from config versus this bdev and if
+	 * there's a match, create the pt_node & bdev accordingly.
+	 */
+	TAILQ_FOREACH(name, &g_bdev_names, link) {
+		if (strcmp(name->bdev_name, bdev->name) != 0) {
+			continue;
+		}
+
+		SPDK_NOTICELOG("Match on %s\n", bdev->name);
+		pt_node = calloc(1, sizeof(struct vbdev_passthru));
+		if (!pt_node) {
+			SPDK_ERRLOG("could not allocate pt_node\n");
+			break;
+		}
+
+		/* The base bdev that we're attaching to. */
+		pt_node->base_bdev = bdev;
+		pt_node->pt_bdev.name = strdup(name->vbdev_name);
+		if (!pt_node->pt_bdev.name) {
+			SPDK_ERRLOG("could not allocate pt_bdev name\n");
+			free(pt_node);
+			break;
+		}
+		pt_node->pt_bdev.product_name = "passthru";
+
+		/* Copy some properties from the underlying base bdev. */
+		pt_node->pt_bdev.write_cache = bdev->write_cache;
+		pt_node->pt_bdev.need_aligned_buffer = bdev->need_aligned_buffer;
+		pt_node->pt_bdev.optimal_io_boundary = bdev->optimal_io_boundary;
+		pt_node->pt_bdev.blocklen = bdev->blocklen;
+		pt_node->pt_bdev.blockcnt = bdev->blockcnt;
+
+		/* This is the context that is passed to us when the bdev
+		 * layer calls in so we'll save our pt_bdev node here.
+		 */
+		pt_node->pt_bdev.ctxt = pt_node;
+		pt_node->pt_bdev.fn_table = &vbdev_passthru_fn_table;
+		pt_node->pt_bdev.module = &passthru_if;
+		TAILQ_INSERT_TAIL(&g_pt_nodes, pt_node, link);
+
+		spdk_io_device_register(pt_node, pt_bdev_ch_create_cb, pt_bdev_ch_destroy_cb,
+					sizeof(struct pt_io_channel),
+					name->bdev_name);
+		SPDK_NOTICELOG("io_device created at: 0x%p\n", pt_node);
+
+		rc = spdk_bdev_open(bdev, true, vbdev_passthru_base_bdev_hotremove_cb,
+				    bdev, &pt_node->base_desc);
+		if (rc) {
+			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+			free(pt_node->pt_bdev.name);
+			free(pt_node);
+			break;
+		}
+		SPDK_NOTICELOG("bdev opened\n");
+
+		rc = spdk_bdev_module_claim_bdev(bdev, pt_node->base_desc, pt_node->pt_bdev.module);
+		if (rc) {
+			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
+			spdk_bdev_close(pt_node->base_desc);
+			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+			free(pt_node->pt_bdev.name);
+			free(pt_node);
+			break;
+		}
+		SPDK_NOTICELOG("bdev claimed\n");
+
+		rc = spdk_vbdev_register(&pt_node->pt_bdev, &bdev, 1);
+		if (rc) {
+			SPDK_ERRLOG("could not register pt_bdev\n");
+			spdk_bdev_close(pt_node->base_desc);
+			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+			free(pt_node->pt_bdev.name);
+			free(pt_node);
+			break;
+		}
+		SPDK_NOTICELOG("pt_bdev registered\n");
+		SPDK_NOTICELOG("created pt_bdev for: %s\n", name->vbdev_name);
+	}
+}
+
+/* Create the passthru disk from the given bdev and vbdev name. */
+int
+create_passthru_disk(const char *bdev_name, const char *vbdev_name)
+{
+	struct spdk_bdev *bdev = NULL;
+	int rc = 0;
+
+	bdev = spdk_bdev_get_by_name(bdev_name);
+	if (!bdev) {
+		return -1;
+	}
+
+	rc = vbdev_passthru_insert_name(bdev_name, vbdev_name);
+	if (rc != 0) {
+		return rc;
+	}
+
+	vbdev_passthru_register(bdev);
+
+	return 0;
+}
+
+void
+delete_passthru_disk(struct spdk_bdev *bdev, spdk_delete_passthru_complete cb_fn, void *cb_arg)
+{
+	struct bdev_names *name;
+
+	if (!bdev || bdev->module != &passthru_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
+	 * vbdev does not get re-created if the same bdev is constructed at some other time,
+	 * unless the underlying bdev was hot-removed.
+	 */
+	TAILQ_FOREACH(name, &g_bdev_names, link) {
+		if (strcmp(name->vbdev_name, bdev->name) == 0) {
+			TAILQ_REMOVE(&g_bdev_names, name, link);
+			free(name->bdev_name);
+			free(name->vbdev_name);
+			free(name);
+			break;
+		}
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+/* Because we specified this function in our pt bdev function table when we
+ * registered our pt bdev, we'll get this call anytime a new bdev shows up.
+ * Here we need to decide if we care about it and if so what to do. We
+ * parsed the config file at init so we check the new bdev against the list
+ * we built up at that time and if the user configured us to attach to this
+ * bdev, here's where we do it.
+ */
+static void
+vbdev_passthru_examine(struct spdk_bdev *bdev)
+{
+	vbdev_passthru_register(bdev);
+
+	spdk_bdev_module_examine_done(&passthru_if);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_passthru", SPDK_LOG_VBDEV_PASSTHRU)
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru.h b/src/spdk/lib/bdev/passthru/vbdev_passthru.h
new file mode 100644
index 00000000..5705c4ed
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru.h
@@ -0,0 +1,62 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_PASSTHRU_H
+#define SPDK_VBDEV_PASSTHRU_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_passthru_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new pass through bdev.
+ *
+ * \param bdev_name Bdev on which pass through vbdev will be created.
+ * \param vbdev_name Vbdev name.
+ * \return 0 on success, other on failure.
+ */
+int create_passthru_disk(const char *bdev_name, const char *vbdev_name);
+
+/**
+ * Delete passthru bdev.
+ *
+ * \param bdev Pointer to pass through bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_passthru_disk(struct spdk_bdev *bdev, spdk_delete_passthru_complete cb_fn,
+			  void *cb_arg);
+
+#endif /* SPDK_VBDEV_PASSTHRU_H */
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c b/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c
new file mode 100644
index 00000000..9f0f9521
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_passthru.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+/* Structure to hold the parameters for this RPC method. */
+struct rpc_construct_passthru {
+	char *base_bdev_name;
+	char *passthru_bdev_name;
+};
+
+/* Free the allocated memory resource after the RPC handling. */
+static void
+free_rpc_construct_passthru(struct rpc_construct_passthru *r)
+{
+	free(r->base_bdev_name);
+	free(r->passthru_bdev_name);
+}
+
+/* Structure to decode the input parameters for this RPC method. */
+static const struct spdk_json_object_decoder rpc_construct_passthru_decoders[] = {
+	{"base_bdev_name", offsetof(struct rpc_construct_passthru, base_bdev_name), spdk_json_decode_string},
+	{"passthru_bdev_name", offsetof(struct rpc_construct_passthru, passthru_bdev_name), spdk_json_decode_string},
+};
+
+/* Decode the parameters for this RPC method and properly construct the passthru
+ * device. Error status returned in the failed cases.
+ */
+static void
+spdk_rpc_construct_passthru_bdev(struct spdk_jsonrpc_request *request,
+				 const struct spdk_json_val *params)
+{
+	struct rpc_construct_passthru req = {NULL};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_passthru_decoders,
+				    SPDK_COUNTOF(rpc_construct_passthru_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_PASSTHRU, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	rc = create_passthru_disk(req.base_bdev_name, req.passthru_bdev_name);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_passthru(&req);
+		return;
+	}
+
+	spdk_json_write_string(w, req.passthru_bdev_name);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_construct_passthru(&req);
+	return;
+
+invalid:
+	free_rpc_construct_passthru(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_passthru_bdev", spdk_rpc_construct_passthru_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_passthru {
+	char *name;
+};
+
+static void
+free_rpc_delete_passthru(struct rpc_delete_passthru *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_passthru_decoders[] = {
+	{"name", offsetof(struct rpc_delete_passthru, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_passthru_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_passthru_bdev(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_delete_passthru req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_passthru_decoders,
+				    SPDK_COUNTOF(rpc_delete_passthru_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	delete_passthru_disk(bdev, _spdk_rpc_delete_passthru_bdev_cb, request);
+
+	free_rpc_delete_passthru(&req);
+
+	return;
+
+invalid:
+	free_rpc_delete_passthru(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_passthru_bdev", spdk_rpc_delete_passthru_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/pmem/Makefile b/src/spdk/lib/bdev/pmem/Makefile
new file mode 100644
index 00000000..19f0da8c
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_pmem.c bdev_pmem_rpc.c
+LIBNAME = bdev_pmem
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem.c b/src/spdk/lib/bdev/pmem/bdev_pmem.c
new file mode 100644
index 00000000..9238e085
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem.c
@@ -0,0 +1,465 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/conf.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+#include "spdk/rpc.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_pmem.h"
+#include "libpmemblk.h"
+
+struct pmem_disk {
+	struct spdk_bdev	disk;
+	PMEMblkpool *pool;
+	char pmem_file[NAME_MAX];
+	TAILQ_ENTRY(pmem_disk) tailq;
+};
+
+static TAILQ_HEAD(, pmem_disk) g_pmem_disks = TAILQ_HEAD_INITIALIZER(g_pmem_disks);
+
+static int bdev_pmem_initialize(void);
+static void bdev_pmem_finish(void);
+
+static struct spdk_bdev_module pmem_if = {
+	.name = "pmem",
+	.module_init = bdev_pmem_initialize,
+	.module_fini = bdev_pmem_finish,
+	.async_fini = true,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&pmem_if)
+
+typedef int(*spdk_bdev_pmem_io_request)(PMEMblkpool *pbp, void *buf, long long blockno);
+
+static int
+_bdev_pmem_submit_io_read(PMEMblkpool *pbp, void *buf, long long blockno)
+{
+	return pmemblk_read(pbp, buf, blockno);
+}
+
+static int
+_bdev_pmem_submit_io_write(PMEMblkpool *pbp, void *buf, long long blockno)
+{
+	return pmemblk_write(pbp, buf, blockno);
+}
+
+static int
+bdev_pmem_destruct(void *ctx)
+{
+	struct pmem_disk *pdisk = ctx;
+
+	TAILQ_REMOVE(&g_pmem_disks, pdisk, tailq);
+	free(pdisk->disk.name);
+	pmemblk_close(pdisk->pool);
+	free(pdisk);
+
+	return 0;
+}
+
+static int
+bdev_pmem_check_iov_len(struct iovec *iovs, int iovcnt, size_t num_blocks, uint32_t block_size)
+{
+	size_t nbytes = num_blocks * block_size;
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		if (spdk_unlikely(iovs[i].iov_base == NULL && iovs[i].iov_len != 0)) {
+			return -1;
+		}
+
+		if (nbytes <= iovs[i].iov_len) {
+			return 0;
+		}
+
+		if (spdk_unlikely(iovs[i].iov_len % block_size != 0)) {
+			return -1;
+		}
+
+		nbytes -= iovs[i].iov_len;
+	}
+
+	return -1;
+}
+
+static void
+bdev_pmem_submit_io(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
+		    struct spdk_io_channel *ch,
+		    struct iovec *iov, int iovcnt,
+		    uint64_t offset_blocks, size_t num_blocks, uint32_t block_size,
+		    spdk_bdev_pmem_io_request fn)
+{
+	int rc;
+	size_t nbytes, offset, len;
+	enum spdk_bdev_io_status status;
+
+	rc = bdev_pmem_check_iov_len(iov, iovcnt, num_blocks, block_size);
+	if (rc) {
+		status = SPDK_BDEV_IO_STATUS_FAILED;
+		goto end;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "io %lu bytes from offset %#lx\n",
+		      num_blocks, offset_blocks);
+
+	for (nbytes = num_blocks * block_size; nbytes > 0; iov++) {
+		len = spdk_min(iov->iov_len, nbytes);
+		nbytes -= len;
+
+		offset = 0;
+		while (offset != len) {
+			rc = fn(pdisk->pool, iov->iov_base + offset, offset_blocks);
+			if (rc != 0) {
+				SPDK_ERRLOG("pmemblk io failed: %d (%s)\n", errno, pmemblk_errormsg());
+				status = SPDK_BDEV_IO_STATUS_FAILED;
+				goto end;
+			}
+
+			offset += block_size;
+			offset_blocks++;
+		}
+	}
+
+	assert(num_blocks == offset_blocks - bdev_io->u.bdev.offset_blocks);
+	status = SPDK_BDEV_IO_STATUS_SUCCESS;
+end:
+
+	spdk_bdev_io_complete(bdev_io, status);
+}
+
+static void
+bdev_pmem_write_zeros(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
+		      struct spdk_io_channel *ch, uint64_t offset_blocks,
+		      uint64_t num_blocks, uint32_t block_size)
+{
+	int rc;
+	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+	while (num_blocks > 0) {
+		rc = pmemblk_set_zero(pdisk->pool, offset_blocks);
+		if (rc != 0) {
+			SPDK_ERRLOG("pmemblk_set_zero failed: %d (%s)\n", errno, pmemblk_errormsg());
+			status = SPDK_BDEV_IO_STATUS_FAILED;
+			break;
+		}
+		offset_blocks++;
+		num_blocks--;
+	}
+	spdk_bdev_io_complete(bdev_io, status);
+}
+
+static void
+bdev_pmem_io_get_buf_cb(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
+{
+	bdev_pmem_submit_io(bdev_io,
+			    bdev_io->bdev->ctxt,
+			    channel,
+			    bdev_io->u.bdev.iovs,
+			    bdev_io->u.bdev.iovcnt,
+			    bdev_io->u.bdev.offset_blocks,
+			    bdev_io->u.bdev.num_blocks,
+			    bdev_io->bdev->blocklen,
+			    _bdev_pmem_submit_io_read);
+}
+
+static void
+bdev_pmem_submit_request(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
+{
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_pmem_io_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		bdev_pmem_submit_io(bdev_io,
+				    bdev_io->bdev->ctxt,
+				    channel,
+				    bdev_io->u.bdev.iovs,
+				    bdev_io->u.bdev.iovcnt,
+				    bdev_io->u.bdev.offset_blocks,
+				    bdev_io->u.bdev.num_blocks,
+				    bdev_io->bdev->blocklen,
+				    _bdev_pmem_submit_io_write);
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		bdev_pmem_write_zeros(bdev_io,
+				      bdev_io->bdev->ctxt,
+				      channel,
+				      bdev_io->u.bdev.offset_blocks,
+				      bdev_io->u.bdev.num_blocks,
+				      bdev_io->bdev->blocklen);
+		break;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+		break;
+	default:
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_pmem_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_RESET:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct spdk_io_channel *
+bdev_pmem_get_io_channel(void *ctx)
+{
+	return spdk_get_io_channel(&g_pmem_disks);
+}
+
+static int
+bdev_pmem_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct pmem_disk *pdisk = ctx;
+
+	spdk_json_write_name(w, "pmem");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "pmem_file");
+	spdk_json_write_string(w, pdisk->pmem_file);
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static int
+bdev_pmem_create_cb(void *io_device, void *ctx_buf)
+{
+	return 0;
+}
+
+static void
+bdev_pmem_destroy_cb(void *io_device, void *ctx_buf)
+{
+}
+
+static void
+bdev_pmem_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct pmem_disk *disk = bdev->ctxt;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_pmem_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_string(w, "pmem_file", disk->pmem_file);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table pmem_fn_table = {
+	.destruct		= bdev_pmem_destruct,
+	.submit_request		= bdev_pmem_submit_request,
+	.io_type_supported	= bdev_pmem_io_type_supported,
+	.get_io_channel		= bdev_pmem_get_io_channel,
+	.dump_info_json		= bdev_pmem_dump_info_json,
+	.write_config_json	= bdev_pmem_write_config_json,
+};
+
+int
+spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev)
+{
+	uint64_t num_blocks;
+	uint32_t block_size;
+	struct pmem_disk *pdisk;
+	int rc;
+
+	*bdev = NULL;
+
+	if (name == NULL) {
+		SPDK_ERRLOG("Missing name parameter for spdk_create_pmem_disk()\n");
+		return EINVAL;
+	}
+
+	if (pmemblk_check(pmem_file, 0) != 1) {
+		SPDK_ERRLOG("Pool '%s' check failed: %s\n", pmem_file, pmemblk_errormsg());
+		return EIO;
+	}
+
+	pdisk = calloc(1, sizeof(*pdisk));
+	if (!pdisk) {
+		return ENOMEM;
+	}
+
+	snprintf(pdisk->pmem_file, sizeof(pdisk->pmem_file), "%s", pmem_file);
+	pdisk->pool = pmemblk_open(pmem_file, 0);
+	if (!pdisk->pool) {
+		SPDK_ERRLOG("Opening pmem pool '%s' failed: %d\n", pmem_file, errno);
+		free(pdisk);
+		return errno;
+	}
+
+	block_size = pmemblk_bsize(pdisk->pool);
+	num_blocks = pmemblk_nblock(pdisk->pool);
+
+	if (block_size == 0) {
+		SPDK_ERRLOG("Block size must be more than 0 bytes\n");
+		pmemblk_close(pdisk->pool);
+		free(pdisk);
+		return EINVAL;
+	}
+
+	if (num_blocks == 0) {
+		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+		pmemblk_close(pdisk->pool);
+		free(pdisk);
+		return EINVAL;
+	}
+
+	pdisk->disk.name = strdup(name);
+	if (!pdisk->disk.name) {
+		pmemblk_close(pdisk->pool);
+		free(pdisk);
+		return ENOMEM;
+	}
+
+	pdisk->disk.product_name = "pmemblk disk";
+	pdisk->disk.write_cache = 0;
+	pdisk->disk.blocklen = block_size;
+	pdisk->disk.blockcnt = num_blocks;
+
+	pdisk->disk.ctxt = pdisk;
+	pdisk->disk.fn_table = &pmem_fn_table;
+	pdisk->disk.module = &pmem_if;
+
+	rc = spdk_bdev_register(&pdisk->disk);
+	if (rc) {
+		pmemblk_close(pdisk->pool);
+		free(pdisk->disk.name);
+		free(pdisk);
+		return rc;
+	}
+
+	TAILQ_INSERT_TAIL(&g_pmem_disks, pdisk, tailq);
+
+	*bdev = &pdisk->disk;
+
+	return 0;
+}
+
+void
+spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn, void *cb_arg)
+{
+	if (!bdev || bdev->module != &pmem_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static void
+bdev_pmem_read_conf(void)
+{
+	struct spdk_conf_section *sp;
+	struct spdk_bdev *bdev;
+	const char *pmem_file;
+	const char *bdev_name;
+	int i;
+
+	sp = spdk_conf_find_section(NULL, "Pmem");
+	if (sp == NULL) {
+		return;
+	}
+
+	for (i = 0; ; i++) {
+		if (!spdk_conf_section_get_nval(sp, "Blk", i)) {
+			break;
+		}
+
+		pmem_file = spdk_conf_section_get_nmval(sp, "Blk", i, 0);
+		if (pmem_file == NULL) {
+			SPDK_ERRLOG("Pmem: missing filename\n");
+			continue;
+		}
+
+		bdev_name = spdk_conf_section_get_nmval(sp, "Blk", i, 1);
+		if (bdev_name == NULL) {
+			SPDK_ERRLOG("Pmem: missing bdev name\n");
+			continue;
+		}
+
+		spdk_create_pmem_disk(pmem_file, bdev_name, &bdev);
+	}
+}
+
+static int
+bdev_pmem_initialize(void)
+{
+	const char *err = pmemblk_check_version(PMEMBLK_MAJOR_VERSION, PMEMBLK_MINOR_VERSION);
+
+	if (err != NULL) {
+		SPDK_ERRLOG("Invalid libpmemblk version (expected %d.%d): %s\n", PMEMBLK_MAJOR_VERSION,
+			    PMEMBLK_MINOR_VERSION, err);
+		return -1;
+	}
+
+	spdk_io_device_register(&g_pmem_disks, bdev_pmem_create_cb, bdev_pmem_destroy_cb, 0, "pmem_bdev");
+
+	bdev_pmem_read_conf();
+
+	return 0;
+
+}
+
+static void
+bdev_pmem_finish_done(void *io_device)
+{
+	spdk_bdev_module_finish_done();
+}
+
+static void
+bdev_pmem_finish(void)
+{
+	spdk_io_device_unregister(&g_pmem_disks, bdev_pmem_finish_done);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_pmem", SPDK_LOG_BDEV_PMEM)
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem.h b/src/spdk/lib/bdev/pmem/bdev_pmem.h
new file mode 100644
index 00000000..7814166c
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem.h
@@ -0,0 +1,64 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_PMEM_H
+#define SPDK_BDEV_PMEM_H
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_pmem_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new pmem bdev.
+ *
+ * \param pmem_file Pointer to pmem pool file.
+ * \param name Bdev name.
+ * \param bdev output parameter for bdev when operation is successful.
+ * \return 0 on success.
+ *         -EIO if pool check failed
+ *         -EINVAL if input parameters check failed
+ *         -ENOMEM if buffer cannot be allocated
+ */
+int spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev);
+
+/**
+ * Delete pmem bdev.
+ *
+ * \param bdev Pointer to pmem bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn,
+			   void *cb_arg);
+
+#endif /* SPDK_BDEV_PMEM_H */
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c b/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c
new file mode 100644
index 00000000..3156cffb
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c
@@ -0,0 +1,350 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_pmem.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "libpmemblk.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_construct_pmem {
+	char *pmem_file;
+	char *name;
+};
+
+static void
+free_rpc_construct_pmem_bdev(struct rpc_construct_pmem *req)
+{
+	free(req->pmem_file);
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_pmem_decoders[] = {
+	{"pmem_file", offsetof(struct rpc_construct_pmem, pmem_file), spdk_json_decode_string},
+	{"name", offsetof(struct rpc_construct_pmem, name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_construct_pmem_bdev(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_construct_pmem req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_pmem_decoders,
+				    SPDK_COUNTOF(rpc_construct_pmem_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+		rc = EINVAL;
+		goto invalid;
+	}
+	rc = spdk_create_pmem_disk(req.pmem_file, req.name, &bdev);
+	if (rc != 0) {
+		goto invalid;
+	}
+	if (bdev == NULL) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_construct_pmem_bdev(&req);
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+
+	free_rpc_construct_pmem_bdev(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(rc));
+	free_rpc_construct_pmem_bdev(&req);
+}
+SPDK_RPC_REGISTER("construct_pmem_bdev", spdk_rpc_construct_pmem_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_pmem {
+	char *name;
+};
+
+static void
+free_rpc_delete_pmem(struct rpc_delete_pmem *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_pmem_decoders[] = {
+	{"name", offsetof(struct rpc_delete_pmem, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_pmem_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_pmem_bdev(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_delete_pmem req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_pmem_decoders,
+				    SPDK_COUNTOF(rpc_delete_pmem_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	spdk_delete_pmem_disk(bdev, _spdk_rpc_delete_pmem_bdev_cb, request);
+	free_rpc_delete_pmem(&req);
+	return;
+
+invalid:
+	free_rpc_delete_pmem(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_pmem_bdev", spdk_rpc_delete_pmem_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_create_pmem_pool {
+	char *pmem_file;
+	uint64_t num_blocks;
+	uint32_t block_size;
+};
+
+static const struct spdk_json_object_decoder rpc_create_pmem_pool_decoders[] = {
+	{"pmem_file", offsetof(struct rpc_create_pmem_pool, pmem_file), spdk_json_decode_string},
+	{"num_blocks", offsetof(struct rpc_create_pmem_pool, num_blocks), spdk_json_decode_uint64},
+	{"block_size", offsetof(struct rpc_create_pmem_pool, block_size), spdk_json_decode_uint32},
+};
+
+static void
+free_rpc_create_pmem_pool(struct rpc_create_pmem_pool *req)
+{
+	free(req->pmem_file);
+}
+
+static void
+spdk_rpc_create_pmem_pool(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_create_pmem_pool req = {};
+	struct spdk_json_write_ctx *w;
+	uint64_t pool_size;
+	PMEMblkpool *pbp;
+
+	if (spdk_json_decode_object(params, rpc_create_pmem_pool_decoders,
+				    SPDK_COUNTOF(rpc_create_pmem_pool_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	/* libpmemblk pool has to contain at least 256 blocks */
+	if (req.num_blocks < 256) {
+		goto invalid;
+	}
+
+	pool_size = req.num_blocks * req.block_size;
+	if (pool_size < PMEMBLK_MIN_POOL) {
+		goto invalid;
+	}
+
+	pbp = pmemblk_create(req.pmem_file, req.block_size, pool_size, 0666);
+	if (pbp == NULL) {
+		goto invalid;
+	}
+
+	pmemblk_close(pbp);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_create_pmem_pool(&req);
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_create_pmem_pool(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_create_pmem_pool(&req);
+}
+SPDK_RPC_REGISTER("create_pmem_pool", spdk_rpc_create_pmem_pool, SPDK_RPC_RUNTIME)
+
+struct rpc_pmem_pool_info {
+	char *pmem_file;
+};
+
+static const struct spdk_json_object_decoder rpc_pmem_pool_info_decoders[] = {
+	{"pmem_file", offsetof(struct rpc_pmem_pool_info, pmem_file), spdk_json_decode_string},
+};
+
+static void
+free_rpc_pmem_pool_info(struct rpc_pmem_pool_info *req)
+{
+	free(req->pmem_file);
+}
+
+static void
+spdk_rpc_pmem_pool_info(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_pmem_pool_info req = {};
+	struct spdk_json_write_ctx *w;
+	size_t num_blocks, block_size;
+	PMEMblkpool *pbp;
+
+	if (spdk_json_decode_object(params, rpc_pmem_pool_info_decoders,
+				    SPDK_COUNTOF(rpc_pmem_pool_info_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	pbp = pmemblk_open(req.pmem_file, 0);
+	if (pbp == NULL) {
+		goto invalid;
+	}
+
+	block_size = pmemblk_bsize(pbp);
+	num_blocks = pmemblk_nblock(pbp);
+
+
+	pmemblk_close(pbp);
+
+	/* Check pmem pool consistency */
+	if (pmemblk_check(req.pmem_file, block_size) != 1) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_pmem_pool_info(&req);
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "num_blocks");
+	spdk_json_write_uint64(w, num_blocks);
+	spdk_json_write_name(w, "block_size");
+	spdk_json_write_uint64(w, block_size);
+	spdk_json_write_object_end(w);
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_pmem_pool_info(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_pmem_pool_info(&req);
+}
+SPDK_RPC_REGISTER("pmem_pool_info", spdk_rpc_pmem_pool_info, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_pmem_pool {
+	char *pmem_file;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_pmem_pool_decoders[] = {
+	{"pmem_file", offsetof(struct rpc_delete_pmem_pool, pmem_file), spdk_json_decode_string},
+};
+
+static void
+free_rpc_delete_pmem_pool(struct rpc_delete_pmem_pool *req)
+{
+	free(req->pmem_file);
+}
+
+static void
+spdk_rpc_delete_pmem_pool(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_delete_pmem_pool req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_delete_pmem_pool_decoders,
+				    SPDK_COUNTOF(rpc_delete_pmem_pool_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	/* Check if file is actually pmem pool */
+	if (pmemblk_check(req.pmem_file, 0) != 1) {
+		goto invalid;
+	}
+
+	unlink(req.pmem_file);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_delete_pmem_pool(&req);
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_delete_pmem_pool(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_delete_pmem_pool(&req);
+}
+SPDK_RPC_REGISTER("delete_pmem_pool", spdk_rpc_delete_pmem_pool, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/raid/Makefile b/src/spdk/lib/bdev/raid/Makefile
new file mode 100644
index 00000000..8332399d
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+C_SRCS = bdev_raid.c bdev_raid_rpc.c
+LIBNAME = vbdev_raid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/raid/bdev_raid.c b/src/spdk/lib/bdev/raid/bdev_raid.c
new file mode 100644
index 00000000..51fa94ec
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid.c
@@ -0,0 +1,1624 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_raid.h"
+#include "spdk/env.h"
+#include "spdk/io_channel.h"
+#include "spdk/conf.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+#include "spdk/string.h"
+
+static bool g_shutdown_started = false;
+
+/* raid bdev config as read from config file */
+struct raid_config          g_spdk_raid_config = {
+	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_spdk_raid_config.raid_bdev_config_head),
+};
+
+/*
+ * List of raid bdev in configured list, these raid bdevs are registered with
+ * bdev layer
+ */
+struct spdk_raid_configured_tailq       g_spdk_raid_bdev_configured_list;
+
+/* List of raid bdev in configuring list */
+struct spdk_raid_configuring_tailq      g_spdk_raid_bdev_configuring_list;
+
+/* List of all raid bdevs */
+struct spdk_raid_all_tailq              g_spdk_raid_bdev_list;
+
+/* List of all raid bdevs that are offline */
+struct spdk_raid_offline_tailq          g_spdk_raid_bdev_offline_list;
+
+/* Function declarations */
+static void   raid_bdev_examine(struct spdk_bdev *bdev);
+static int    raid_bdev_init(void);
+static void   raid_bdev_waitq_io_process(void *ctx);
+static void   raid_bdev_deconfigure(struct raid_bdev *raid_bdev);
+
+
+/*
+ * brief:
+ * raid_bdev_create_cb function is a cb function for raid bdev which creates the
+ * hierarchy from raid bdev to base bdev io channels. It will be called per core
+ * params:
+ * io_device - pointer to raid bdev io device represented by raid_bdev
+ * ctx_buf - pointer to context buffer for raid bdev io channel
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_create_cb(void *io_device, void *ctx_buf)
+{
+	struct raid_bdev            *raid_bdev = io_device;
+	struct raid_bdev_io_channel *raid_ch = ctx_buf;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch);
+
+	assert(raid_bdev != NULL);
+	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
+
+	raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs,
+				       sizeof(struct spdk_io_channel *));
+	if (!raid_ch->base_channel) {
+		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
+		return -ENOMEM;
+	}
+	for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+		/*
+		 * Get the spdk_io_channel for all the base bdevs. This is used during
+		 * split logic to send the respective child bdev ios to respective base
+		 * bdev io channel.
+		 */
+		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
+						   raid_bdev->base_bdev_info[i].desc);
+		if (!raid_ch->base_channel[i]) {
+			for (uint32_t j = 0; j < i; j++) {
+				spdk_put_io_channel(raid_ch->base_channel[j]);
+			}
+			free(raid_ch->base_channel);
+			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
+ * hierarchy from raid bdev to base bdev io channels. It will be called per core
+ * params:
+ * io_device - pointer to raid bdev io device represented by raid_bdev
+ * ctx_buf - pointer to context buffer for raid bdev io channel
+ * returns:
+ * none
+ */
+static void
+raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct raid_bdev_io_channel *raid_ch = ctx_buf;
+	struct raid_bdev            *raid_bdev = io_device;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n");
+
+	assert(raid_bdev != NULL);
+	assert(raid_ch != NULL);
+	assert(raid_ch->base_channel);
+	for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+		/* Free base bdev channels */
+		assert(raid_ch->base_channel[i] != NULL);
+		spdk_put_io_channel(raid_ch->base_channel[i]);
+		raid_ch->base_channel[i] = NULL;
+	}
+	free(raid_ch->base_channel);
+	raid_ch->base_channel = NULL;
+}
+
+/*
+ * brief:
+ * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
+ * structures.
+ * params:
+ * raid_bdev - pointer to raid_bdev
+ * returns:
+ * none
+ */
+void
+raid_bdev_cleanup(struct raid_bdev *raid_bdev)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
+		      raid_bdev,
+		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
+	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
+		TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
+		TAILQ_REMOVE(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+	} else {
+		assert(0);
+	}
+	TAILQ_REMOVE(&g_spdk_raid_bdev_list, raid_bdev, global_link);
+	free(raid_bdev->bdev.name);
+	raid_bdev->bdev.name = NULL;
+	assert(raid_bdev->base_bdev_info);
+	free(raid_bdev->base_bdev_info);
+	raid_bdev->base_bdev_info = NULL;
+	if (raid_bdev->config) {
+		raid_bdev->config->raid_bdev = NULL;
+	}
+	free(raid_bdev);
+}
+
+/*
+ * brief:
+ * free resource of base bdev for raid bdev
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * base_bdev_slot - position to base bdev in raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+void
+raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint32_t base_bdev_slot)
+{
+	struct raid_base_bdev_info *info;
+
+	info = &raid_bdev->base_bdev_info[base_bdev_slot];
+
+	spdk_bdev_module_release_bdev(info->bdev);
+	spdk_bdev_close(info->desc);
+	info->desc = NULL;
+	info->bdev = NULL;
+
+	assert(raid_bdev->num_base_bdevs_discovered);
+	raid_bdev->num_base_bdevs_discovered--;
+}
+
+/*
+ * brief:
+ * raid_bdev_destruct is the destruct function table pointer for raid bdev
+ * params:
+ * ctxt - pointer to raid_bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_destruct(void *ctxt)
+{
+	struct raid_bdev *raid_bdev = ctxt;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n");
+
+	raid_bdev->destruct_called = true;
+	for (uint16_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+		/*
+		 * Close all base bdev descriptors for which call has come from below
+		 * layers.  Also close the descriptors if we have started shutdown.
+		 */
+		if (g_shutdown_started ||
+		    ((raid_bdev->base_bdev_info[i].remove_scheduled == true) &&
+		     (raid_bdev->base_bdev_info[i].bdev != NULL))) {
+			raid_bdev_free_base_bdev_resource(raid_bdev, i);
+		}
+	}
+
+	if (g_shutdown_started) {
+		TAILQ_REMOVE(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
+		TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+		spdk_io_device_unregister(raid_bdev, NULL);
+	}
+
+	if (raid_bdev->num_base_bdevs_discovered == 0) {
+		/* Free raid_bdev when there are no base bdevs left */
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n");
+		raid_bdev_cleanup(raid_bdev);
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_io_completion function is called by lower layers to notify raid
+ * module that particular bdev_io is completed.
+ * params:
+ * bdev_io - pointer to bdev io submitted to lower layers, like child io
+ * success - bdev_io status
+ * cb_arg - function callback context, like parent io pointer
+ * returns:
+ * none
+ */
+static void
+raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io         *parent_io = cb_arg;
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (success) {
+		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+	} else {
+		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+/*
+ * brief:
+ * raid_bdev_submit_rw_request function is used to submit I/O to the correct
+ * member disk
+ * params:
+ * bdev_io - parent bdev io
+ * start_strip - start strip number of this io
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip)
+{
+	struct raid_bdev_io		*raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+	struct raid_bdev_io_channel	*raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+	struct raid_bdev		*raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+	uint64_t			pd_strip;
+	uint32_t			offset_in_strip;
+	uint64_t			pd_lba;
+	uint64_t			pd_blocks;
+	uint32_t			pd_idx;
+	int				ret = 0;
+
+	pd_strip = start_strip / raid_bdev->num_base_bdevs;
+	pd_idx = start_strip % raid_bdev->num_base_bdevs;
+	offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
+	pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
+	pd_blocks = bdev_io->u.bdev.num_blocks;
+	if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
+		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
+		assert(0);
+	}
+
+	/*
+	 * Submit child io to bdev layer with using base bdev descriptors, base
+	 * bdev lba, base bdev child io length in blocks, buffer, completion
+	 * function and function callback context
+	 */
+	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+		ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
+					     raid_ch->base_channel[pd_idx],
+					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+					     pd_lba, pd_blocks, raid_bdev_io_completion,
+					     bdev_io);
+	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+		ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
+					      raid_ch->base_channel[pd_idx],
+					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+					      pd_lba, pd_blocks, raid_bdev_io_completion,
+					      bdev_io);
+	} else {
+		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
+		assert(0);
+	}
+
+	return ret;
+}
+
+/*
+ * brief:
+ * get_curr_base_bdev_index function calculates the base bdev index
+ * params:
+ * raid_bdev - pointer to pooled bdev
+ * raid_io - pointer to parent io context
+ * returns:
+ * base bdev index
+ */
+static uint8_t
+get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_io)
+{
+	struct spdk_bdev_io	*bdev_io;
+	uint64_t		start_strip;
+
+	bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx);
+	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+
+	return (start_strip % raid_bdev->num_base_bdevs);
+}
+
+/*
+ * brief:
+ * raid_bdev_io_submit_fail_process function processes the IO which failed to submit.
+ * It will try to queue the IOs after storing the context to bdev wait queue logic.
+ * params:
+ * bdev_io - pointer to bdev_io
+ * raid_io - pointer to raid bdev io
+ * ret - return code
+ * returns:
+ * none
+ */
+static void
+raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io,
+				 struct raid_bdev_io *raid_io, int ret)
+{
+	struct   raid_bdev_io_channel *raid_ch;
+	uint8_t pd_idx;
+
+	if (ret != -ENOMEM) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	} else {
+		/* Queue the IO to bdev layer wait queue */
+		pd_idx = get_curr_base_bdev_index(raid_bdev, raid_io);
+		raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev;
+		raid_io->waitq_entry.cb_fn = raid_bdev_waitq_io_process;
+		raid_io->waitq_entry.cb_arg = raid_io;
+		raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+		if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev,
+					    raid_ch->base_channel[pd_idx],
+					    &raid_io->waitq_entry) != 0) {
+			SPDK_ERRLOG("bdev io waitq error, it should not happen\n");
+			assert(0);
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+/*
+ * brief:
+ * raid_bdev_waitq_io_process function is the callback function
+ * registered by raid bdev module to bdev when bdev_io was unavailable.
+ * params:
+ * ctx - pointer to raid_bdev_io
+ * returns:
+ * none
+ */
+static void
+raid_bdev_waitq_io_process(void *ctx)
+{
+	struct   raid_bdev_io         *raid_io = ctx;
+	struct   spdk_bdev_io         *bdev_io;
+	struct   raid_bdev            *raid_bdev;
+	int                           ret;
+	uint64_t                      start_strip;
+
+	bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx);
+	/*
+	 * Try to submit childs of parent bdev io. If failed due to resource
+	 * crunch then break the loop and don't try to process other queued IOs.
+	 */
+	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+	ret = raid_bdev_submit_rw_request(bdev_io, start_strip);
+	if (ret != 0) {
+		raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret);
+	}
+}
+
+/*
+ * brief:
+ * raid_bdev_start_rw_request function is the submit_request function for
+ * read/write requests
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+raid_bdev_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct raid_bdev_io		*raid_io;
+	struct raid_bdev		*raid_bdev;
+	uint64_t			start_strip = 0;
+	uint64_t			end_strip = 0;
+	int				ret;
+
+	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+	raid_io->ch = ch;
+	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+	end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >>
+		    raid_bdev->strip_size_shift;
+	if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
+		assert(false);
+		SPDK_ERRLOG("I/O spans strip boundary!\n");
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+	ret = raid_bdev_submit_rw_request(bdev_io, start_strip);
+	if (ret != 0) {
+		raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret);
+	}
+}
+
+/*
+ * brief:
+ * raid_bdev_reset_completion is the completion callback for member disk resets
+ * params:
+ * bdev_io - pointer to member disk reset bdev_io
+ * success - true if reset was successful, false if unsuccessful
+ * cb_arg - callback argument (parent reset bdev_io)
+ * returns:
+ * none
+ */
+static void
+raid_bdev_reset_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_bdev_io *parent_io = cb_arg;
+	struct raid_bdev *raid_bdev = (struct raid_bdev *)parent_io->bdev->ctxt;
+	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx;
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (!success) {
+		raid_io->base_bdev_reset_status = SPDK_BDEV_IO_STATUS_FAILED;
+	}
+
+	raid_io->base_bdev_reset_completed++;
+	if (raid_io->base_bdev_reset_completed == raid_bdev->num_base_bdevs) {
+		spdk_bdev_io_complete(parent_io, raid_io->base_bdev_reset_status);
+	}
+}
+
+/*
+ * brief:
+ * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests
+ * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
+ * which case it will queue it for later submission
+ * params:
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+_raid_bdev_submit_reset_request_next(void *_bdev_io)
+{
+	struct spdk_bdev_io		*bdev_io = _bdev_io;
+	struct raid_bdev_io		*raid_io;
+	struct raid_bdev		*raid_bdev;
+	struct raid_bdev_io_channel	*raid_ch;
+	int				ret;
+	uint8_t				i;
+
+	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+	raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+
+	while (raid_io->base_bdev_reset_submitted < raid_bdev->num_base_bdevs) {
+		i = raid_io->base_bdev_reset_submitted;
+		ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc,
+				      raid_ch->base_channel[i],
+				      raid_bdev_reset_completion, bdev_io);
+		if (ret == 0) {
+			raid_io->base_bdev_reset_submitted++;
+		} else if (ret == -ENOMEM) {
+			raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[i].bdev;
+			raid_io->waitq_entry.cb_fn = _raid_bdev_submit_reset_request_next;
+			raid_io->waitq_entry.cb_arg = bdev_io;
+			spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[i].bdev,
+						raid_ch->base_channel[i],
+						&raid_io->waitq_entry);
+			return;
+		} else {
+			assert(false);
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+			return;
+		}
+	}
+}
+
+/*
+ * brief:
+ * _raid_bdev_submit_reset_request function is the submit_request function for
+ * reset requests
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+_raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct raid_bdev_io		*raid_io;
+
+	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+	raid_io->ch = ch;
+	raid_io->base_bdev_reset_submitted = 0;
+	raid_io->base_bdev_reset_completed = 0;
+	raid_io->base_bdev_reset_status = SPDK_BDEV_IO_STATUS_SUCCESS;
+	_raid_bdev_submit_reset_request_next(bdev_io);
+}
+
+/*
+ * brief:
+ * raid_bdev_submit_request function is the submit_request function pointer of
+ * raid bdev function table. This is used to submit the io on raid_bdev to below
+ * layers.
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+			spdk_bdev_io_get_buf(bdev_io, raid_bdev_start_rw_request,
+					     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		} else {
+			/* Just call it directly if iov_base is already populated. */
+			raid_bdev_start_rw_request(ch, bdev_io);
+		}
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		raid_bdev_start_rw_request(ch, bdev_io);
+		break;
+
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		// TODO: support flush if requirement comes
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+		break;
+
+	case SPDK_BDEV_IO_TYPE_RESET:
+		_raid_bdev_submit_reset_request(ch, bdev_io);
+		break;
+
+	default:
+		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		break;
+	}
+
+}
+
+/*
+ * brief:
+ * raid_bdev_io_type_supported is the io_supported function for bdev function
+ * table which returns whether the particular io type is supported or not by
+ * raid bdev module
+ * params:
+ * ctx - pointer to raid bdev context
+ * type - io type
+ * returns:
+ * true - io_type is supported
+ * false - io_type is not supported
+ */
+static bool
+raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+	default:
+		return false;
+	}
+
+	return false;
+}
+
+/*
+ * brief:
+ * raid_bdev_get_io_channel is the get_io_channel function table pointer for
+ * raid bdev. This is used to return the io channel for this raid bdev
+ * params:
+ * ctxt - pointer to raid_bdev
+ * returns:
+ * pointer to io channel for raid bdev
+ */
+static struct spdk_io_channel *
+raid_bdev_get_io_channel(void *ctxt)
+{
+	struct raid_bdev *raid_bdev = ctxt;
+
+	return spdk_get_io_channel(raid_bdev);
+}
+
+/*
+ * brief:
+ * raid_bdev_dump_info_json is the function table pointer for raid bdev
+ * params:
+ * ctx - pointer to raid_bdev
+ * w - pointer to json context
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct raid_bdev *raid_bdev = ctx;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n");
+	assert(raid_bdev != NULL);
+
+	/* Dump the raid bdev configuration related information */
+	spdk_json_write_name(w, "raid");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
+	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
+	spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level);
+	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
+	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
+	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
+	spdk_json_write_name(w, "base_bdevs_list");
+	spdk_json_write_array_begin(w);
+	for (uint16_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+		if (raid_bdev->base_bdev_info[i].bdev) {
+			spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name);
+		} else {
+			spdk_json_write_null(w);
+		}
+	}
+	spdk_json_write_array_end(w);
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_write_config_json is the function table pointer for raid bdev
+ * params:
+ * bdev - pointer to spdk_bdev
+ * w - pointer to json context
+ * returns:
+ * none
+ */
+static void
+raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct raid_bdev *raid_bdev = bdev->ctxt;
+	struct spdk_bdev *base;
+	uint16_t i;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_raid_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
+	spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level);
+
+	spdk_json_write_named_array_begin(w, "base_bdevs");
+	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+		base = raid_bdev->base_bdev_info[i].bdev;
+		if (base) {
+			spdk_json_write_string(w, base->name);
+		}
+	}
+	spdk_json_write_array_end(w);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+/* g_raid_bdev_fn_table is the function table for raid bdev */
+static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
+	.destruct		= raid_bdev_destruct,
+	.submit_request		= raid_bdev_submit_request,
+	.io_type_supported	= raid_bdev_io_type_supported,
+	.get_io_channel		= raid_bdev_get_io_channel,
+	.dump_info_json		= raid_bdev_dump_info_json,
+	.write_config_json	= raid_bdev_write_config_json,
+};
+
+/*
+ * brief:
+ * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
+ * params:
+ * raid_cfg - pointer to raid_bdev_config structure
+ * returns:
+ * none
+ */
+void
+raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
+{
+	uint32_t i;
+
+	TAILQ_REMOVE(&g_spdk_raid_config.raid_bdev_config_head, raid_cfg, link);
+	g_spdk_raid_config.total_raid_bdev--;
+
+	if (raid_cfg->base_bdev) {
+		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+			free(raid_cfg->base_bdev[i].name);
+		}
+		free(raid_cfg->base_bdev);
+	}
+	free(raid_cfg->name);
+	free(raid_cfg);
+}
+
+/*
+ * brief:
+ * raid_bdev_free is the raid bdev function table function pointer. This is
+ * called on bdev free path
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_free(void)
+{
+	struct raid_bdev_config *raid_cfg, *tmp;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n");
+	TAILQ_FOREACH_SAFE(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link, tmp) {
+		raid_bdev_config_cleanup(raid_cfg);
+	}
+}
+
+/* brief
+ * raid_bdev_config_find_by_name is a helper function to find raid bdev config
+ * by name as key.
+ *
+ * params:
+ * raid_name - name for raid bdev.
+ */
+struct raid_bdev_config *
+raid_bdev_config_find_by_name(const char *raid_name)
+{
+	struct raid_bdev_config *raid_cfg;
+
+	TAILQ_FOREACH(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link) {
+		if (!strcmp(raid_cfg->name, raid_name)) {
+			return raid_cfg;
+		}
+	}
+
+	return raid_cfg;
+}
+
+/*
+ * brief
+ * raid_bdev_config_add function adds config for newly created raid bdev.
+ *
+ * params:
+ * raid_name - name for raid bdev.
+ * strip_size - strip size in KB
+ * num_base_bdevs - number of base bdevs.
+ * raid_level - raid level, only raid level 0 is supported.
+ * _raid_cfg - Pointer to newly added configuration
+ */
+int
+raid_bdev_config_add(const char *raid_name, int strip_size, int num_base_bdevs,
+		     int raid_level, struct raid_bdev_config **_raid_cfg)
+{
+	struct raid_bdev_config *raid_cfg;
+
+	raid_cfg = raid_bdev_config_find_by_name(raid_name);
+	if (raid_cfg != NULL) {
+		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
+			    raid_name);
+		return -EEXIST;
+	}
+
+	if (spdk_u32_is_pow2(strip_size) == false) {
+		SPDK_ERRLOG("Invalid strip size %d\n", strip_size);
+		return -EINVAL;
+	}
+
+	if (num_base_bdevs <= 0) {
+		SPDK_ERRLOG("Invalid base device count %d\n", num_base_bdevs);
+		return -EINVAL;
+	}
+
+	if (raid_level != 0) {
+		SPDK_ERRLOG("invalid raid level %d, only raid level 0 is supported\n",
+			    raid_level);
+		return -EINVAL;
+	}
+
+	raid_cfg = calloc(1, sizeof(*raid_cfg));
+	if (raid_cfg == NULL) {
+		SPDK_ERRLOG("unable to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	raid_cfg->name = strdup(raid_name);
+	if (!raid_cfg->name) {
+		free(raid_cfg);
+		SPDK_ERRLOG("unable to allocate memory\n");
+		return -ENOMEM;
+	}
+	raid_cfg->strip_size = strip_size;
+	raid_cfg->num_base_bdevs = num_base_bdevs;
+	raid_cfg->raid_level = raid_level;
+
+	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
+	if (raid_cfg->base_bdev == NULL) {
+		free(raid_cfg->name);
+		free(raid_cfg);
+		SPDK_ERRLOG("unable to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_TAIL(&g_spdk_raid_config.raid_bdev_config_head, raid_cfg, link);
+	g_spdk_raid_config.total_raid_bdev++;
+
+	*_raid_cfg = raid_cfg;
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
+ *
+ * params:
+ * raid_cfg - pointer to raid bdev configuration
+ * base_bdev_name - name of base bdev
+ * slot - Position to add base bdev
+ */
+int
+raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
+			       uint32_t slot)
+{
+	uint32_t i;
+	struct raid_bdev_config *tmp;
+
+	if (slot >= raid_cfg->num_base_bdevs) {
+		return -EINVAL;
+	}
+
+	TAILQ_FOREACH(tmp, &g_spdk_raid_config.raid_bdev_config_head, link) {
+		for (i = 0; i < tmp->num_base_bdevs; i++) {
+			if (tmp->base_bdev[i].name != NULL) {
+				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
+					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
+						    base_bdev_name);
+					return -EEXIST;
+				}
+			}
+		}
+	}
+
+	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
+	if (raid_cfg->base_bdev[slot].name == NULL) {
+		SPDK_ERRLOG("unable to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+/*
+ * brief:
+ * raid_bdev_parse_raid is used to parse the raid bdev from config file based on
+ * pre-defined raid bdev format in config file.
+ * Format of config file:
+ *   [RAID1]
+ *   Name raid1
+ *   StripSize 64
+ *   NumDevices 2
+ *   RaidLevel 0
+ *   Devices Nvme0n1 Nvme1n1
+ *
+ *   [RAID2]
+ *   Name raid2
+ *   StripSize 64
+ *   NumDevices 3
+ *   RaidLevel 0
+ *   Devices Nvme2n1 Nvme3n1 Nvme4n1
+ *
+ * params:
+ * conf_section - pointer to config section
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_parse_raid(struct spdk_conf_section *conf_section)
+{
+	const char *raid_name;
+	int strip_size;
+	int i, num_base_bdevs;
+	int raid_level;
+	const char *base_bdev_name;
+	struct raid_bdev_config *raid_cfg;
+	int rc;
+
+	raid_name = spdk_conf_section_get_val(conf_section, "Name");
+	if (raid_name == NULL) {
+		SPDK_ERRLOG("raid_name %s is null\n", raid_name);
+		return -EINVAL;
+	}
+
+	strip_size = spdk_conf_section_get_intval(conf_section, "StripSize");
+	num_base_bdevs = spdk_conf_section_get_intval(conf_section, "NumDevices");
+	raid_level = spdk_conf_section_get_intval(conf_section, "RaidLevel");
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %d %d %d\n", raid_name, strip_size, num_base_bdevs,
+		      raid_level);
+
+	rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, raid_level,
+				  &raid_cfg);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to add raid bdev config\n");
+		return rc;
+	}
+
+	for (i = 0; true; i++) {
+		base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i);
+		if (base_bdev_name == NULL) {
+			break;
+		}
+		if (i >= num_base_bdevs) {
+			raid_bdev_config_cleanup(raid_cfg);
+			SPDK_ERRLOG("Number of devices mentioned is more than count\n");
+			return -EINVAL;
+		}
+
+		rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i);
+		if (rc != 0) {
+			raid_bdev_config_cleanup(raid_cfg);
+			SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n");
+			return rc;
+		}
+	}
+
+	if (i != raid_cfg->num_base_bdevs) {
+		raid_bdev_config_cleanup(raid_cfg);
+		SPDK_ERRLOG("Number of devices mentioned is less than count\n");
+		return -EINVAL;
+	}
+
+	rc = raid_bdev_create(raid_cfg);
+	if (rc != 0) {
+		raid_bdev_config_cleanup(raid_cfg);
+		SPDK_ERRLOG("Failed to create raid bdev\n");
+		return rc;
+	}
+
+	rc = raid_bdev_add_base_devices(raid_cfg);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n");
+		/* Config is not removed in this case. */
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_parse_config is used to find the raid bdev config section and parse it
+ * Format of config file:
+ * params:
+ * none
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_parse_config(void)
+{
+	int                      ret;
+	struct spdk_conf_section *conf_section;
+
+	conf_section = spdk_conf_first_section(NULL);
+	while (conf_section != NULL) {
+		if (spdk_conf_section_match_prefix(conf_section, "RAID")) {
+			ret = raid_bdev_parse_raid(conf_section);
+			if (ret < 0) {
+				SPDK_ERRLOG("Unable to parse raid bdev section\n");
+				return ret;
+			}
+		}
+		conf_section = spdk_conf_next_section(conf_section);
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_fini_start is called when bdev layer is starting the
+ * shutdown process
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_fini_start(void)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n");
+	g_shutdown_started = true;
+}
+
+/*
+ * brief:
+ * raid_bdev_exit is called on raid bdev module exit time by bdev layer
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_exit(void)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n");
+	raid_bdev_free();
+}
+
+/*
+ * brief:
+ * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
+ * module
+ * params:
+ * none
+ * returns:
+ * size of spdk_bdev_io context for raid
+ */
+static int
+raid_bdev_get_ctx_size(void)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n");
+	return sizeof(struct raid_bdev_io);
+}
+
+/*
+ * brief:
+ * raid_bdev_get_running_config is used to get the configuration options.
+ *
+ * params:
+ * fp - The pointer to a file that will be written to the configuration options.
+ * returns:
+ * none
+ */
+static void
+raid_bdev_get_running_config(FILE *fp)
+{
+	struct raid_bdev *raid_bdev;
+	struct spdk_bdev *base;
+	int index = 1;
+	uint16_t i;
+
+	TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configured_list, state_link) {
+		fprintf(fp,
+			"\n"
+			"[RAID%d]\n"
+			"  Name %s\n"
+			"  StripSize %" PRIu32 "\n"
+			"  NumDevices %hu\n"
+			"  RaidLevel %hhu\n",
+			index, raid_bdev->bdev.name, raid_bdev->strip_size,
+			raid_bdev->num_base_bdevs, raid_bdev->raid_level);
+		fprintf(fp,
+			"  Devices ");
+		for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+			base = raid_bdev->base_bdev_info[i].bdev;
+			if (base) {
+				fprintf(fp,
+					"%s ",
+					base->name);
+			}
+		}
+		fprintf(fp,
+			"\n");
+		index++;
+	}
+}
+
+/*
+ * brief:
+ * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
+ * claimed by raid bdev or not.
+ * params:
+ * bdev_name - represents base bdev name
+ * _raid_cfg - pointer to raid bdev config parsed from config file
+ * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
+ * slot. This field is only valid if return value of this function is true
+ * returns:
+ * true - if bdev can be claimed
+ * false - if bdev can't be claimed
+ */
+static bool
+raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
+			 uint32_t *base_bdev_slot)
+{
+	struct raid_bdev_config *raid_cfg;
+	uint32_t i;
+
+	TAILQ_FOREACH(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link) {
+		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+			/*
+			 * Check if the base bdev name is part of raid bdev configuration.
+			 * If match is found then return true and the slot information where
+			 * this base bdev should be inserted in raid bdev
+			 */
+			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
+				*_raid_cfg = raid_cfg;
+				*base_bdev_slot = i;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+
+static struct spdk_bdev_module g_raid_if = {
+	.name = "raid",
+	.module_init = raid_bdev_init,
+	.fini_start = raid_bdev_fini_start,
+	.module_fini = raid_bdev_exit,
+	.get_ctx_size = raid_bdev_get_ctx_size,
+	.examine_config = raid_bdev_examine,
+	.config_text = raid_bdev_get_running_config,
+	.async_init = false,
+	.async_fini = false,
+};
+SPDK_BDEV_MODULE_REGISTER(&g_raid_if)
+
+/*
+ * brief:
+ * raid_bdev_init is the initialization function for raid bdev module
+ * params:
+ * none
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_init(void)
+{
+	int ret;
+
+	TAILQ_INIT(&g_spdk_raid_bdev_configured_list);
+	TAILQ_INIT(&g_spdk_raid_bdev_configuring_list);
+	TAILQ_INIT(&g_spdk_raid_bdev_list);
+	TAILQ_INIT(&g_spdk_raid_bdev_offline_list);
+
+	/* Parse config file for raids */
+	ret = raid_bdev_parse_config();
+	if (ret < 0) {
+		SPDK_ERRLOG("raid bdev init failed parsing\n");
+		raid_bdev_free();
+		return ret;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n");
+
+	return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_create allocates raid bdev based on passed configuration
+ * params:
+ * raid_cfg - configuration of raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+int
+raid_bdev_create(struct raid_bdev_config *raid_cfg)
+{
+	struct raid_bdev *raid_bdev;
+	struct spdk_bdev *raid_bdev_gen;
+
+	raid_bdev = calloc(1, sizeof(*raid_bdev));
+	if (!raid_bdev) {
+		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
+		return -ENOMEM;
+	}
+
+	assert(raid_cfg->num_base_bdevs != 0);
+	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
+	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
+					   sizeof(struct raid_base_bdev_info));
+	if (!raid_bdev->base_bdev_info) {
+		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
+		free(raid_bdev);
+		return -ENOMEM;
+	}
+
+	raid_bdev->strip_size = raid_cfg->strip_size;
+	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
+	raid_bdev->config = raid_cfg;
+
+	raid_bdev_gen = &raid_bdev->bdev;
+
+	raid_bdev_gen->name = strdup(raid_cfg->name);
+	if (!raid_bdev_gen->name) {
+		SPDK_ERRLOG("Unable to allocate name for raid\n");
+		free(raid_bdev->base_bdev_info);
+		free(raid_bdev);
+		return -ENOMEM;
+	}
+
+	raid_bdev_gen->product_name = "Pooled Device";
+	raid_bdev_gen->ctxt = raid_bdev;
+	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
+	raid_bdev_gen->module = &g_raid_if;
+
+	TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+	TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_list, raid_bdev, global_link);
+
+	raid_cfg->raid_bdev = raid_bdev;
+
+	return 0;
+}
+
+/*
+ * brief
+ * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * bdev - pointer to base bdev
+ * base_bdev_slot - position to add base bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev,
+				   uint32_t base_bdev_slot)
+{
+	struct spdk_bdev_desc *desc;
+	int rc;
+
+	rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc);
+	if (rc != 0) {
+		SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name);
+		return rc;
+	}
+
+	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
+	if (rc != 0) {
+		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
+		spdk_bdev_close(desc);
+		return rc;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name);
+
+	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
+	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
+
+	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
+	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
+	raid_bdev->num_base_bdevs_discovered++;
+	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
+
+	return 0;
+}
+
+/*
+ * brief:
+ * If raid bdev config is complete, then only register the raid bdev to
+ * bdev layer and remove this raid bdev from configuring list and
+ * insert the raid bdev to configured list
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_configure(struct raid_bdev *raid_bdev)
+{
+	uint32_t		blocklen;
+	uint64_t		min_blockcnt;
+	struct spdk_bdev	*raid_bdev_gen;
+	int rc = 0;
+
+	blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen;
+	min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt;
+	for (uint32_t i = 1; i < raid_bdev->num_base_bdevs; i++) {
+		/* Calculate minimum block count from all base bdevs */
+		if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) {
+			min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt;
+		}
+
+		/* Check blocklen for all base bdevs that it should be same */
+		if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) {
+			/*
+			 * Assumption is that all the base bdevs for any raid bdev should
+			 * have same blocklen
+			 */
+			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
+			return -EINVAL;
+		}
+	}
+
+	raid_bdev_gen = &raid_bdev->bdev;
+	raid_bdev_gen->write_cache = 0;
+	raid_bdev_gen->blocklen = blocklen;
+	raid_bdev_gen->ctxt = raid_bdev;
+	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
+	raid_bdev_gen->module = &g_raid_if;
+	raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen;
+	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
+	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
+	if (raid_bdev->num_base_bdevs > 1) {
+		raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size;
+		raid_bdev_gen->split_on_optimal_io_boundary = true;
+	} else {
+		/* Do not need to split reads/writes on single bdev RAID modules. */
+		raid_bdev_gen->optimal_io_boundary = 0;
+		raid_bdev_gen->split_on_optimal_io_boundary = false;
+	}
+
+	/*
+	 * RAID bdev logic is for striping so take the minimum block count based
+	 * approach where total block count of raid bdev is the number of base
+	 * bdev times the minimum block count of any base bdev
+	 */
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu,  numbasedev %u, strip size shift %u\n",
+		      min_blockcnt,
+		      raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
+	raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) <<
+				   raid_bdev->strip_size_shift)  * raid_bdev->num_base_bdevs;
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev);
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt,
+		      raid_bdev_gen->blocklen);
+	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
+		raid_bdev->state = RAID_BDEV_STATE_ONLINE;
+		spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
+					sizeof(struct raid_bdev_io_channel),
+					raid_bdev->bdev.name);
+		rc = spdk_bdev_register(raid_bdev_gen);
+		if (rc != 0) {
+			SPDK_ERRLOG("Unable to register pooled bdev and stay at configuring state\n");
+			spdk_io_device_unregister(raid_bdev, NULL);
+			raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
+			return rc;
+		}
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen);
+		TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+		TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n",
+			      raid_bdev_gen->name, raid_bdev);
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * If raid bdev is online and registered, change the bdev state to
+ * configuring and unregister this raid device. Queue this raid device
+ * in configuring list
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * returns:
+ * none
+ */
+static void
+raid_bdev_deconfigure(struct raid_bdev *raid_bdev)
+{
+	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
+		return;
+	}
+
+	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
+	TAILQ_REMOVE(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
+	assert(raid_bdev->num_base_bdevs_discovered);
+	TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n");
+
+	spdk_io_device_unregister(raid_bdev, NULL);
+	spdk_bdev_unregister(&raid_bdev->bdev, NULL, NULL);
+}
+
+/*
+ * brief:
+ * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
+ * is removed. This function checks if this base bdev is part of any raid bdev
+ * or not. If yes, it takes necessary action on that particular raid bdev.
+ * params:
+ * ctx - pointer to base bdev pointer which got removed
+ * returns:
+ * none
+ */
+void
+raid_bdev_remove_base_bdev(void *ctx)
+{
+	struct    spdk_bdev       *base_bdev = ctx;
+	struct    raid_bdev       *raid_bdev;
+	uint16_t                  i;
+	bool                      found = false;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n");
+
+	/* Find the raid_bdev which has claimed this base_bdev */
+	TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, global_link) {
+		for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+			if (raid_bdev->base_bdev_info[i].bdev == base_bdev) {
+				found = true;
+				break;
+			}
+		}
+		if (found == true) {
+			break;
+		}
+	}
+
+	if (found == false) {
+		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
+		return;
+	}
+
+	assert(raid_bdev != NULL);
+	assert(raid_bdev->base_bdev_info[i].bdev);
+	assert(raid_bdev->base_bdev_info[i].desc);
+	raid_bdev->base_bdev_info[i].remove_scheduled = true;
+
+	if ((raid_bdev->destruct_called == true ||
+	     raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) &&
+	    raid_bdev->base_bdev_info[i].bdev != NULL) {
+		/*
+		 * As raid bdev is not registered yet or already unregistered, so cleanup
+		 * should be done here itself
+		 */
+		raid_bdev_free_base_bdev_resource(raid_bdev, i);
+		if (raid_bdev->num_base_bdevs_discovered == 0) {
+			/* Since there is no base bdev for this raid, so free the raid device */
+			raid_bdev_cleanup(raid_bdev);
+			return;
+		}
+	}
+
+	raid_bdev_deconfigure(raid_bdev);
+}
+
+/*
+ * brief:
+ * raid_bdev_add_base_device function is the actual function which either adds
+ * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
+ * the base device and keep the open descriptor.
+ * params:
+ * raid_cfg - pointer to raid bdev config
+ * bdev - pointer to base bdev
+ * base_bdev_slot - position to add base bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev,
+			  uint32_t base_bdev_slot)
+{
+	struct raid_bdev	*raid_bdev;
+	int			rc;
+
+	raid_bdev = raid_cfg->raid_bdev;
+	if (!raid_bdev) {
+		SPDK_ERRLOG("Raid bdev is not created yet '%s'\n", bdev->name);
+		return -ENODEV;
+	}
+
+	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name);
+		return rc;
+	}
+
+	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
+
+	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
+		rc = raid_bdev_configure(raid_bdev);
+		if (rc != 0) {
+			SPDK_ERRLOG("Failed to configure raid bdev\n");
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * brief:
+ * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
+ *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
+ *  moves to the configured state and becomes available. Otherwise, the raid bdev
+ *  stays at the configuring state with added base bdevs.
+ * params:
+ * raid_cfg - pointer to raid bdev config
+ * returns:
+ * 0 - The raid bdev moves to the configured state or stays at the configuring
+ *     state with added base bdevs due to any nonexistent base bdev.
+ * non zero - Failed to add any base bdev and stays at the configuring state with
+ *            added base bdevs.
+ */
+int
+raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
+{
+	struct spdk_bdev	*base_bdev;
+	uint8_t			i;
+	int			rc = 0, _rc;
+
+	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+		base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
+		if (base_bdev == NULL) {
+			SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n",
+				      raid_cfg->base_bdev[i].name);
+			continue;
+		}
+
+		_rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i);
+		if (_rc != 0) {
+			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
+				    raid_cfg->base_bdev[i].name, raid_cfg->name,
+				    spdk_strerror(-_rc));
+			if (rc == 0) {
+				rc = _rc;
+			}
+		}
+	}
+
+	return rc;
+}
+
+/*
+ * brief:
+ * raid_bdev_examine function is the examine function call by the below layers
+ * like bdev_nvme layer. This function will check if this base bdev can be
+ * claimed by this raid bdev or not.
+ * params:
+ * bdev - pointer to base bdev
+ * returns:
+ * none
+ */
+static void
+raid_bdev_examine(struct spdk_bdev *bdev)
+{
+	struct raid_bdev_config	*raid_cfg;
+	uint32_t		base_bdev_slot;
+
+	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
+		raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n",
+			      bdev->name);
+	}
+
+	spdk_bdev_module_examine_done(&g_raid_if);
+}
+
+/* Log component for bdev raid bdev module */
+SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID)
diff --git a/src/spdk/lib/bdev/raid/bdev_raid.h b/src/spdk/lib/bdev/raid/bdev_raid.h
new file mode 100644
index 00000000..39f055ed
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid.h
@@ -0,0 +1,225 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_RAID_INTERNAL_H
+#define SPDK_BDEV_RAID_INTERNAL_H
+
+#include "spdk/bdev_module.h"
+
+/*
+ * Raid state describes the state of the raid. This raid bdev can be either in
+ * configured list or configuring list
+ */
+enum raid_bdev_state {
+	/* raid bdev is ready and is seen by upper layers */
+	RAID_BDEV_STATE_ONLINE,
+
+	/*
+	 * raid bdev is configuring, not all underlying bdevs are present.
+	 * And can't be seen by upper layers.
+	 */
+	RAID_BDEV_STATE_CONFIGURING,
+
+	/*
+	 * In offline state, raid bdev layer will complete all incoming commands without
+	 * submitting to underlying base nvme bdevs
+	 */
+	RAID_BDEV_STATE_OFFLINE,
+
+	/* raid bdev max, new states should be added before this */
+	RAID_BDEV_MAX
+};
+
+/*
+ * raid_base_bdev_info contains information for the base bdevs which are part of some
+ * raid. This structure contains the per base bdev information. Whatever is
+ * required per base device for raid bdev will be kept here
+ */
+struct raid_base_bdev_info {
+	/* pointer to base spdk bdev */
+	struct spdk_bdev	*bdev;
+
+	/* pointer to base bdev descriptor opened by raid bdev */
+	struct spdk_bdev_desc	*desc;
+
+	/*
+	 * When underlying base device calls the hot plug function on drive removal,
+	 * this flag will be set and later after doing some processing, base device
+	 * descriptor will be closed
+	 */
+	bool			remove_scheduled;
+};
+
+/*
+ * raid_bdev is the single entity structure which contains SPDK block device
+ * and the information related to any raid bdev either configured or
+ * in configuring list. io device is created on this.
+ */
+struct raid_bdev {
+	/* raid bdev device, this will get registered in bdev layer */
+	struct spdk_bdev            bdev;
+
+	/* link of raid bdev to link it to configured, configuring or offline list */
+	TAILQ_ENTRY(raid_bdev)      state_link;
+
+	/* link of raid bdev to link it to global raid bdev list */
+	TAILQ_ENTRY(raid_bdev)      global_link;
+
+	/* pointer to config file entry */
+	struct raid_bdev_config     *config;
+
+	/* array of base bdev info */
+	struct raid_base_bdev_info  *base_bdev_info;
+
+	/* strip size of raid bdev in blocks */
+	uint32_t                    strip_size;
+
+	/* strip size bit shift for optimized calculation */
+	uint32_t                    strip_size_shift;
+
+	/* block length bit shift for optimized calculation */
+	uint32_t                    blocklen_shift;
+
+	/* state of raid bdev */
+	enum raid_bdev_state        state;
+
+	/* number of base bdevs comprising raid bdev  */
+	uint16_t                    num_base_bdevs;
+
+	/* number of base bdevs discovered */
+	uint16_t                    num_base_bdevs_discovered;
+
+	/* Raid Level of this raid bdev */
+	uint8_t                     raid_level;
+
+	/* Set to true if destruct is called for this raid bdev */
+	bool                        destruct_called;
+};
+
+/*
+ * raid_bdev_io is the context part of bdev_io. It contains the information
+ * related to bdev_io for a pooled bdev
+ */
+struct raid_bdev_io {
+	/* WaitQ entry, used only in waitq logic */
+	struct spdk_bdev_io_wait_entry	waitq_entry;
+
+	/* Original channel for this IO, used in queuing logic */
+	struct spdk_io_channel		*ch;
+
+	/* Used for tracking progress on resets sent to member disks. */
+	uint8_t				base_bdev_reset_submitted;
+	uint8_t				base_bdev_reset_completed;
+	uint8_t				base_bdev_reset_status;
+};
+
+/*
+ * raid_base_bdev_config is the per base bdev data structure which contains
+ * information w.r.t to per base bdev during parsing config
+ */
+struct raid_base_bdev_config {
+	/* base bdev name from config file */
+	char				*name;
+};
+
+/*
+ * raid_bdev_config contains the raid bdev  config related information after
+ * parsing the config file
+ */
+struct raid_bdev_config {
+	/* base bdev config per underlying bdev */
+	struct raid_base_bdev_config  *base_bdev;
+
+	/* Points to already created raid bdev  */
+	struct raid_bdev              *raid_bdev;
+
+	char                          *name;
+
+	/* strip size of this raid bdev  in kilo bytes */
+	uint32_t                      strip_size;
+
+	/* number of base bdevs */
+	uint8_t                       num_base_bdevs;
+
+	/* raid level */
+	uint8_t                       raid_level;
+
+	TAILQ_ENTRY(raid_bdev_config) link;
+};
+
+/*
+ * raid_config is the top level structure representing the raid bdev config as read
+ * from config file for all raids
+ */
+struct raid_config {
+	/* raid bdev  context from config file */
+	TAILQ_HEAD(, raid_bdev_config) raid_bdev_config_head;
+
+	/* total raid bdev  from config file */
+	uint8_t total_raid_bdev;
+};
+
+/*
+ * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
+ * contains the relationship of raid bdev io channel with base bdev io channels.
+ */
+struct raid_bdev_io_channel {
+	/* Array of IO channels of base bdevs */
+	struct spdk_io_channel      **base_channel;
+};
+
+/* TAIL heads for various raid bdev lists */
+TAILQ_HEAD(spdk_raid_configured_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_configuring_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_all_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_offline_tailq, raid_bdev);
+
+extern struct spdk_raid_configured_tailq    g_spdk_raid_bdev_configured_list;
+extern struct spdk_raid_configuring_tailq   g_spdk_raid_bdev_configuring_list;
+extern struct spdk_raid_all_tailq           g_spdk_raid_bdev_list;
+extern struct spdk_raid_offline_tailq       g_spdk_raid_bdev_offline_list;
+extern struct raid_config                   g_spdk_raid_config;
+
+int raid_bdev_create(struct raid_bdev_config *raid_cfg);
+void raid_bdev_remove_base_bdev(void *ctx);
+int raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg);
+void raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint32_t slot);
+void raid_bdev_cleanup(struct raid_bdev *raid_bdev);
+int raid_bdev_config_add(const char *raid_name, int strip_size, int num_base_bdevs,
+			 int raid_level, struct raid_bdev_config **_raid_cfg);
+int raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg,
+				   const char *base_bdev_name, uint32_t slot);
+void raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg);
+struct raid_bdev_config *raid_bdev_config_find_by_name(const char *raid_name);
+
+#endif // SPDK_BDEV_RAID_INTERNAL_H
diff --git a/src/spdk/lib/bdev/raid/bdev_raid_rpc.c b/src/spdk/lib/bdev/raid/bdev_raid_rpc.c
new file mode 100644
index 00000000..00b3bc9d
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid_rpc.c
@@ -0,0 +1,408 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/bdev.h"
+#include "bdev_raid.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+#include "spdk/env.h"
+
+#define RPC_MAX_BASE_BDEVS 255
+
+SPDK_LOG_REGISTER_COMPONENT("raidrpc", SPDK_LOG_RAID_RPC)
+
+/*
+ * Input structure for get_raid_bdevs RPC
+ */
+struct rpc_get_raid_bdevs {
+	/* category - all or online or configuring or offline */
+	char *category;
+};
+
+/*
+ * brief:
+ * free_rpc_get_raids function frees RPC get_raids related parameters
+ * params:
+ * req - pointer to RPC request
+ * returns:
+ * none
+ */
+static void
+free_rpc_get_raid_bdevs(struct rpc_get_raid_bdevs *req)
+{
+	free(req->category);
+}
+
+/*
+ * Decoder object for RPC get_raids
+ */
+static const struct spdk_json_object_decoder rpc_get_raid_bdevs_decoders[] = {
+	{"category", offsetof(struct rpc_get_raid_bdevs, category), spdk_json_decode_string},
+};
+
+/*
+ * brief:
+ * spdk_rpc_get_raids function is the RPC for get_raids. This is used to list
+ * all the raid bdev names based on the input category requested. Category should be
+ * one of "all", "online", "configuring" or "offline". "all" means all the raids
+ * whether they are online or configuring or offline. "online" is the raid bdev which
+ * is registered with bdev layer. "configuring" is the raid bdev which does not have
+ * full configuration discovered yet. "offline" is the raid bdev which is not
+ * registered with bdev as of now and it has encountered any error or user has
+ * requested to offline the raid.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_get_raid_bdevs(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+	struct rpc_get_raid_bdevs   req = {};
+	struct spdk_json_write_ctx  *w;
+	struct raid_bdev            *raid_bdev;
+
+	if (spdk_json_decode_object(params, rpc_get_raid_bdevs_decoders,
+				    SPDK_COUNTOF(rpc_get_raid_bdevs_decoders),
+				    &req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		free_rpc_get_raid_bdevs(&req);
+		return;
+	}
+
+	if (!(strcmp(req.category, "all") == 0 ||
+	      strcmp(req.category, "online") == 0 ||
+	      strcmp(req.category, "configuring") == 0 ||
+	      strcmp(req.category, "offline") == 0)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		free_rpc_get_raid_bdevs(&req);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_get_raid_bdevs(&req);
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	/* Get raid bdev list based on the category requested */
+	if (strcmp(req.category, "all") == 0) {
+		TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, global_link) {
+			spdk_json_write_string(w, raid_bdev->bdev.name);
+		}
+	} else if (strcmp(req.category, "online") == 0) {
+		TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configured_list, state_link) {
+			spdk_json_write_string(w, raid_bdev->bdev.name);
+		}
+	} else if (strcmp(req.category, "configuring") == 0) {
+		TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configuring_list, state_link) {
+			spdk_json_write_string(w, raid_bdev->bdev.name);
+		}
+	} else {
+		TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_offline_list, state_link) {
+			spdk_json_write_string(w, raid_bdev->bdev.name);
+		}
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_get_raid_bdevs(&req);
+}
+SPDK_RPC_REGISTER("get_raid_bdevs", spdk_rpc_get_raid_bdevs, SPDK_RPC_RUNTIME)
+
+/*
+ * Base bdevs in RPC construct_raid
+ */
+struct rpc_construct_raid_base_bdevs {
+	/* Number of base bdevs */
+	size_t           num_base_bdevs;
+
+	/* List of base bdevs names */
+	char             *base_bdevs[RPC_MAX_BASE_BDEVS];
+};
+
+/*
+ * Input structure for RPC construct_raid
+ */
+struct rpc_construct_raid_bdev {
+	/* Raid bdev name */
+	char                                 *name;
+
+	/* RAID strip size */
+	uint32_t                             strip_size;
+
+	/* RAID raid level */
+	uint8_t                              raid_level;
+
+	/* Base bdevs information */
+	struct rpc_construct_raid_base_bdevs base_bdevs;
+};
+
+/*
+ * brief:
+ * free_rpc_construct_raid_bdev function is to free RPC construct_raid_bdev related parameters
+ * params:
+ * req - pointer to RPC request
+ * returns:
+ * none
+ */
+static void
+free_rpc_construct_raid_bdev(struct rpc_construct_raid_bdev *req)
+{
+	free(req->name);
+	for (size_t i = 0; i < req->base_bdevs.num_base_bdevs; i++) {
+		free(req->base_bdevs.base_bdevs[i]);
+	}
+}
+
+/*
+ * Decoder function for RPC construct_raid_bdev to decode base bdevs list
+ */
+static int
+decode_base_bdevs(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_construct_raid_base_bdevs *base_bdevs = out;
+	return spdk_json_decode_array(val, spdk_json_decode_string, base_bdevs->base_bdevs,
+				      RPC_MAX_BASE_BDEVS, &base_bdevs->num_base_bdevs, sizeof(char *));
+}
+
+/*
+ * Decoder object for RPC construct_raid
+ */
+static const struct spdk_json_object_decoder rpc_construct_raid_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_construct_raid_bdev, name), spdk_json_decode_string},
+	{"strip_size", offsetof(struct rpc_construct_raid_bdev, strip_size), spdk_json_decode_uint32},
+	{"raid_level", offsetof(struct rpc_construct_raid_bdev, raid_level), spdk_json_decode_uint32},
+	{"base_bdevs", offsetof(struct rpc_construct_raid_bdev, base_bdevs), decode_base_bdevs},
+};
+
+/*
+ * brief:
+ * spdk_rpc_construct_raid_bdev function is the RPC for construct_raids. It takes
+ * input as raid bdev name, raid level, strip size in KB and list of base bdev names.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_construct_raid_bdev(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_construct_raid_bdev req = {};
+	struct spdk_json_write_ctx     *w;
+	struct raid_bdev_config        *raid_cfg;
+	int			       rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_raid_bdev_decoders,
+				    SPDK_COUNTOF(rpc_construct_raid_bdev_decoders),
+				    &req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		free_rpc_construct_raid_bdev(&req);
+		return;
+	}
+
+	rc = raid_bdev_config_add(req.name, req.strip_size, req.base_bdevs.num_base_bdevs, req.raid_level,
+				  &raid_cfg);
+	if (rc != 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Failed to add RAID bdev config %s: %s",
+						     req.name, spdk_strerror(-rc));
+		free_rpc_construct_raid_bdev(&req);
+		return;
+	}
+
+	for (size_t i = 0; i < req.base_bdevs.num_base_bdevs; i++) {
+		rc = raid_bdev_config_add_base_bdev(raid_cfg, req.base_bdevs.base_bdevs[i], i);
+		if (rc != 0) {
+			raid_bdev_config_cleanup(raid_cfg);
+			spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+							     "Failed to add base bdev %s to RAID bdev config %s: %s",
+							     req.base_bdevs.base_bdevs[i], req.name,
+							     spdk_strerror(-rc));
+			free_rpc_construct_raid_bdev(&req);
+			return;
+		}
+	}
+
+	rc = raid_bdev_create(raid_cfg);
+	if (rc != 0) {
+		raid_bdev_config_cleanup(raid_cfg);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Failed to create RAID bdev %s: %s",
+						     req.name, spdk_strerror(-rc));
+		free_rpc_construct_raid_bdev(&req);
+		return;
+	}
+
+	rc = raid_bdev_add_base_devices(raid_cfg);
+	if (rc != 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Failed to add any base bdev to RAID bdev %s: %s",
+						     req.name, spdk_strerror(-rc));
+		free_rpc_construct_raid_bdev(&req);
+		return;
+	}
+
+	free_rpc_construct_raid_bdev(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("construct_raid_bdev", spdk_rpc_construct_raid_bdev, SPDK_RPC_RUNTIME)
+
+/*
+ * Input structure for RPC destroy_raid
+ */
+struct rpc_destroy_raid_bdev {
+	/* raid bdev name */
+	char *name;
+};
+
+/*
+ * brief:
+ * free_rpc_destroy_raid_bdev function is used to free RPC destroy_raid_bdev related parameters
+ * params:
+ * req - pointer to RPC request
+ * params:
+ * none
+ */
+static void
+free_rpc_destroy_raid_bdev(struct rpc_destroy_raid_bdev *req)
+{
+	free(req->name);
+}
+
+/*
+ * Decoder object for RPC destroy_raid
+ */
+static const struct spdk_json_object_decoder rpc_destroy_raid_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_destroy_raid_bdev, name), spdk_json_decode_string},
+};
+
+/*
+ * brief:
+ * Since destroying raid_bdev is asynchronous operation, so this function is
+ * used to check if raid bdev still exists. If raid bdev is still there it will create
+ * event and check later, otherwise it will proceed with cleanup
+ * params:
+ * arg - pointer to raid bdev cfg
+ * returns:
+ * none
+ */
+static void
+raid_bdev_config_destroy(void *arg)
+{
+	struct raid_bdev_config	*raid_cfg = arg;
+
+	assert(raid_cfg != NULL);
+	if (raid_cfg->raid_bdev != NULL) {
+		/*
+		 * If raid bdev exists for this config, wait for raid bdev to get
+		 * destroyed and come back later
+		 */
+		spdk_thread_send_msg(spdk_get_thread(), raid_bdev_config_destroy,
+				     raid_cfg);
+	} else {
+		raid_bdev_config_cleanup(raid_cfg);
+	}
+}
+
+/*
+ * brief:
+ * spdk_rpc_destroy_raid_bdev function is the RPC for destroy_raid. It takes raid
+ * name as input and destroy that raid bdev including freeing the base bdev
+ * resources.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_destroy_raid_bdev(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+	struct rpc_destroy_raid_bdev req = {};
+	struct spdk_json_write_ctx   *w;
+	struct raid_bdev_config      *raid_cfg = NULL;
+	struct spdk_bdev             *base_bdev;
+
+	if (spdk_json_decode_object(params, rpc_destroy_raid_bdev_decoders,
+				    SPDK_COUNTOF(rpc_destroy_raid_bdev_decoders),
+				    &req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		free_rpc_destroy_raid_bdev(&req);
+		return;
+	}
+
+	raid_cfg = raid_bdev_config_find_by_name(req.name);
+	if (raid_cfg == NULL) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "raid bdev %s is not found in config", req.name);
+		free_rpc_destroy_raid_bdev(&req);
+		return;
+	}
+
+	/* Remove all the base bdevs from this raid bdev before destroying the raid bdev */
+	for (uint32_t i = 0; i < raid_cfg->num_base_bdevs; i++) {
+		base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
+		if (base_bdev != NULL) {
+			raid_bdev_remove_base_bdev(base_bdev);
+		}
+	}
+
+	raid_bdev_config_destroy(raid_cfg);
+
+	free_rpc_destroy_raid_bdev(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("destroy_raid_bdev", spdk_rpc_destroy_raid_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/rbd/Makefile b/src/spdk/lib/bdev/rbd/Makefile
new file mode 100644
index 00000000..e7c97aca
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_rbd.c bdev_rbd_rpc.c
+LIBNAME = bdev_rbd
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd.c b/src/spdk/lib/bdev/rbd/bdev_rbd.c
new file mode 100644
index 00000000..34c2466b
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd.c
@@ -0,0 +1,740 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_rbd.h"
+
+#include <rbd/librbd.h>
+#include <rados/librados.h>
+#include <sys/eventfd.h>
+
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/bdev.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#define SPDK_RBD_QUEUE_DEPTH 128
+
+static int bdev_rbd_count = 0;
+
+#define BDEV_RBD_POLL_US 50
+
+struct bdev_rbd {
+	struct spdk_bdev disk;
+	char *rbd_name;
+	char *pool_name;
+	rbd_image_info_t info;
+	TAILQ_ENTRY(bdev_rbd) tailq;
+	struct spdk_poller *reset_timer;
+	struct spdk_bdev_io *reset_bdev_io;
+};
+
+struct bdev_rbd_io_channel {
+	rados_ioctx_t io_ctx;
+	rados_t cluster;
+	struct pollfd pfd;
+	rbd_image_t image;
+	struct bdev_rbd *disk;
+	struct spdk_poller *poller;
+};
+
+struct bdev_rbd_io {
+	uint64_t remaining_len;
+	int num_segments;
+	bool failed;
+};
+
+static void
+bdev_rbd_free(struct bdev_rbd *rbd)
+{
+	if (!rbd) {
+		return;
+	}
+
+	free(rbd->disk.name);
+	free(rbd->rbd_name);
+	free(rbd->pool_name);
+	free(rbd);
+}
+
+static int
+bdev_rados_context_init(const char *rbd_pool_name, rados_t *cluster,
+			rados_ioctx_t *io_ctx)
+{
+	int ret;
+
+	ret = rados_create(cluster, NULL);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to create rados_t struct\n");
+		return -1;
+	}
+
+	ret = rados_conf_read_file(*cluster, NULL);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to read conf file\n");
+		rados_shutdown(*cluster);
+		return -1;
+	}
+
+	ret = rados_connect(*cluster);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to connect to rbd_pool\n");
+		rados_shutdown(*cluster);
+		return -1;
+	}
+
+	ret = rados_ioctx_create(*cluster, rbd_pool_name, io_ctx);
+
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to create ioctx\n");
+		rados_shutdown(*cluster);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+bdev_rbd_init(const char *rbd_pool_name, const char *rbd_name, rbd_image_info_t *info)
+{
+	int ret;
+	rados_t cluster = NULL;
+	rados_ioctx_t io_ctx = NULL;
+	rbd_image_t image = NULL;
+
+	ret = bdev_rados_context_init(rbd_pool_name, &cluster, &io_ctx);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to create rados context for rbd_pool=%s\n",
+			    rbd_pool_name);
+		return -1;
+	}
+
+	ret = rbd_open(io_ctx, rbd_name, &image, NULL);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to open specified rbd device\n");
+		goto err;
+	}
+	ret = rbd_stat(image, info, sizeof(*info));
+	rbd_close(image);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to stat specified rbd device\n");
+		goto err;
+	}
+
+	rados_ioctx_destroy(io_ctx);
+	return 0;
+err:
+	rados_ioctx_destroy(io_ctx);
+	rados_shutdown(cluster);
+	return -1;
+}
+
+static void
+bdev_rbd_exit(rbd_image_t image)
+{
+	rbd_flush(image);
+	rbd_close(image);
+}
+
+static void
+bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
+{
+	/* Doing nothing here */
+}
+
+static int
+bdev_rbd_start_aio(rbd_image_t image, struct spdk_bdev_io *bdev_io,
+		   void *buf, uint64_t offset, size_t len)
+{
+	int ret;
+	rbd_completion_t comp;
+
+	ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
+					&comp);
+	if (ret < 0) {
+		return -1;
+	}
+
+	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+		ret = rbd_aio_read(image, offset, len,
+				   buf, comp);
+	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+		ret = rbd_aio_write(image, offset, len,
+				    buf, comp);
+	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
+		ret = rbd_aio_flush(image, comp);
+	}
+
+	if (ret < 0) {
+		rbd_aio_release(comp);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int bdev_rbd_library_init(void);
+
+static int
+bdev_rbd_get_ctx_size(void)
+{
+	return sizeof(struct bdev_rbd_io);
+}
+
+static struct spdk_bdev_module rbd_if = {
+	.name = "rbd",
+	.module_init = bdev_rbd_library_init,
+	.get_ctx_size = bdev_rbd_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&rbd_if)
+
+static int64_t
+bdev_rbd_rw(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+	    struct spdk_bdev_io *bdev_io, struct iovec *iov,
+	    int iovcnt, size_t len, uint64_t offset)
+{
+	struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+	struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+	size_t remaining = len;
+	int i, rc;
+
+	rbd_io->remaining_len = 0;
+	rbd_io->num_segments = 0;
+	rbd_io->failed = false;
+
+	for (i = 0; i < iovcnt && remaining > 0; i++) {
+		size_t seg_len = spdk_min(remaining, iov[i].iov_len);
+
+		rc = bdev_rbd_start_aio(rbdio_ch->image, bdev_io, iov[i].iov_base, offset, seg_len);
+		if (rc) {
+			/*
+			 * This bdev_rbd_start_aio() call failed, but if any previous ones were
+			 * submitted, we need to wait for them to finish.
+			 */
+			if (rbd_io->num_segments == 0) {
+				/* No previous I/O submitted - return error code immediately. */
+				return rc;
+			}
+
+			/* Return and wait for outstanding I/O to complete. */
+			rbd_io->failed = true;
+			return 0;
+		}
+
+		rbd_io->num_segments++;
+		rbd_io->remaining_len += seg_len;
+
+		offset += seg_len;
+		remaining -= seg_len;
+	}
+
+	return 0;
+}
+
+static int64_t
+bdev_rbd_flush(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+	       struct spdk_bdev_io *bdev_io, uint64_t offset, uint64_t nbytes)
+{
+	struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+
+	return bdev_rbd_start_aio(rbdio_ch->image, bdev_io, NULL, offset, nbytes);
+}
+
+static int
+bdev_rbd_reset_timer(void *arg)
+{
+	struct bdev_rbd *disk = arg;
+
+	/*
+	 * TODO: This should check if any I/O is still in flight before completing the reset.
+	 * For now, just complete after the timer expires.
+	 */
+	spdk_bdev_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+	spdk_poller_unregister(&disk->reset_timer);
+	disk->reset_bdev_io = NULL;
+
+	return -1;
+}
+
+static int
+bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io)
+{
+	/*
+	 * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
+	 * timer to wait for in-flight I/O to complete.
+	 */
+	assert(disk->reset_bdev_io == NULL);
+	disk->reset_bdev_io = bdev_io;
+	disk->reset_timer = spdk_poller_register(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000);
+
+	return 0;
+}
+
+static int
+bdev_rbd_destruct(void *ctx)
+{
+	struct bdev_rbd *rbd = ctx;
+
+	spdk_io_device_unregister(rbd, NULL);
+
+	bdev_rbd_free(rbd);
+	return 0;
+}
+
+static void bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	int ret;
+
+	ret = bdev_rbd_rw(bdev_io->bdev->ctxt,
+			  ch,
+			  bdev_io,
+			  bdev_io->u.bdev.iovs,
+			  bdev_io->u.bdev.iovcnt,
+			  bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+			  bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+	if (ret != 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static int _bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		return 0;
+
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		return bdev_rbd_rw((struct bdev_rbd *)bdev_io->bdev->ctxt,
+				   ch,
+				   bdev_io,
+				   bdev_io->u.bdev.iovs,
+				   bdev_io->u.bdev.iovcnt,
+				   bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+				   bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+		return bdev_rbd_flush((struct bdev_rbd *)bdev_io->bdev->ctxt,
+				      ch,
+				      bdev_io,
+				      bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
+				      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt,
+				      bdev_io);
+
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+static void bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	if (_bdev_rbd_submit_request(ch, bdev_io) < 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static int
+bdev_rbd_io_poll(void *arg)
+{
+	struct bdev_rbd_io_channel *ch = arg;
+	int i, io_status, rc;
+	rbd_completion_t comps[SPDK_RBD_QUEUE_DEPTH];
+	struct spdk_bdev_io *bdev_io;
+	struct bdev_rbd_io *rbd_io;
+
+	rc = poll(&ch->pfd, 1, 0);
+
+	/* check the return value of poll since we have only one fd for each channel */
+	if (rc != 1) {
+		return 0;
+	}
+
+	rc = rbd_poll_io_events(ch->image, comps, SPDK_RBD_QUEUE_DEPTH);
+	for (i = 0; i < rc; i++) {
+		bdev_io = rbd_aio_get_arg(comps[i]);
+		rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+		io_status = rbd_aio_get_return_value(comps[i]);
+
+		assert(rbd_io->num_segments > 0);
+		rbd_io->num_segments--;
+
+		if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+			if (io_status > 0) {
+				/* For reads, io_status is the length */
+				rbd_io->remaining_len -= io_status;
+			}
+
+			if (rbd_io->num_segments == 0 && rbd_io->remaining_len != 0) {
+				rbd_io->failed = true;
+			}
+		} else {
+			/* For others, 0 means success */
+			if (io_status != 0) {
+				rbd_io->failed = true;
+			}
+		}
+
+		rbd_aio_release(comps[i]);
+
+		if (rbd_io->num_segments == 0) {
+			spdk_bdev_io_complete(bdev_io,
+					      rbd_io->failed ? SPDK_BDEV_IO_STATUS_FAILED : SPDK_BDEV_IO_STATUS_SUCCESS);
+		}
+	}
+
+	return rc;
+}
+
+static void
+bdev_rbd_free_channel(struct bdev_rbd_io_channel *ch)
+{
+	if (!ch) {
+		return;
+	}
+
+	if (ch->image) {
+		bdev_rbd_exit(ch->image);
+	}
+
+	if (ch->io_ctx) {
+		rados_ioctx_destroy(ch->io_ctx);
+	}
+
+	if (ch->cluster) {
+		rados_shutdown(ch->cluster);
+	}
+
+	if (ch->pfd.fd >= 0) {
+		close(ch->pfd.fd);
+	}
+}
+
+static void *
+bdev_rbd_handle(void *arg)
+{
+	struct bdev_rbd_io_channel *ch = arg;
+	void *ret = arg;
+
+	if (rbd_open(ch->io_ctx, ch->disk->rbd_name, &ch->image, NULL) < 0) {
+		SPDK_ERRLOG("Failed to open specified rbd device\n");
+		ret = NULL;
+	}
+
+	return ret;
+}
+
+static int
+bdev_rbd_create_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_rbd_io_channel *ch = ctx_buf;
+	int ret;
+
+	ch->disk = io_device;
+	ch->image = NULL;
+	ch->io_ctx = NULL;
+	ch->pfd.fd = -1;
+
+	ret = bdev_rados_context_init(ch->disk->pool_name, &ch->cluster, &ch->io_ctx);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to create rados context for rbd_pool=%s\n",
+			    ch->disk->pool_name);
+		goto err;
+	}
+
+	if (spdk_call_unaffinitized(bdev_rbd_handle, ch) == NULL) {
+		goto err;
+	}
+
+	ch->pfd.fd = eventfd(0, EFD_NONBLOCK);
+	if (ch->pfd.fd < 0) {
+		SPDK_ERRLOG("Failed to get eventfd\n");
+		goto err;
+	}
+
+	ch->pfd.events = POLLIN;
+	ret = rbd_set_image_notification(ch->image, ch->pfd.fd, EVENT_TYPE_EVENTFD);
+	if (ret < 0) {
+		SPDK_ERRLOG("Failed to set rbd image notification\n");
+		goto err;
+	}
+
+	ch->poller = spdk_poller_register(bdev_rbd_io_poll, ch, BDEV_RBD_POLL_US);
+
+	return 0;
+
+err:
+	bdev_rbd_free_channel(ch);
+	return -1;
+}
+
+static void
+bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_rbd_io_channel *io_channel = ctx_buf;
+
+	bdev_rbd_free_channel(io_channel);
+
+	spdk_poller_unregister(&io_channel->poller);
+}
+
+static struct spdk_io_channel *
+bdev_rbd_get_io_channel(void *ctx)
+{
+	struct bdev_rbd *rbd_bdev = ctx;
+
+	return spdk_get_io_channel(rbd_bdev);
+}
+
+static int
+bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct bdev_rbd *rbd_bdev = ctx;
+
+	spdk_json_write_name(w, "rbd");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "pool_name");
+	spdk_json_write_string(w, rbd_bdev->pool_name);
+
+	spdk_json_write_name(w, "rbd_name");
+	spdk_json_write_string(w, rbd_bdev->rbd_name);
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct bdev_rbd *rbd = bdev->ctxt;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_rbd_bdev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bdev->name);
+	spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
+	spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
+	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table rbd_fn_table = {
+	.destruct		= bdev_rbd_destruct,
+	.submit_request		= bdev_rbd_submit_request,
+	.io_type_supported	= bdev_rbd_io_type_supported,
+	.get_io_channel		= bdev_rbd_get_io_channel,
+	.dump_info_json		= bdev_rbd_dump_info_json,
+	.write_config_json	= bdev_rbd_write_config_json,
+};
+
+struct spdk_bdev *
+spdk_bdev_rbd_create(const char *name, const char *pool_name, const char *rbd_name,
+		     uint32_t block_size)
+{
+	struct bdev_rbd *rbd;
+	int ret;
+
+	if ((pool_name == NULL) || (rbd_name == NULL)) {
+		return NULL;
+	}
+
+	rbd = calloc(1, sizeof(struct bdev_rbd));
+	if (rbd == NULL) {
+		SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
+		return NULL;
+	}
+
+	rbd->rbd_name = strdup(rbd_name);
+	if (!rbd->rbd_name) {
+		bdev_rbd_free(rbd);
+		return NULL;
+	}
+
+	rbd->pool_name = strdup(pool_name);
+	if (!rbd->pool_name) {
+		bdev_rbd_free(rbd);
+		return NULL;
+	}
+
+	ret = bdev_rbd_init(rbd->pool_name, rbd_name, &rbd->info);
+	if (ret < 0) {
+		bdev_rbd_free(rbd);
+		SPDK_ERRLOG("Failed to init rbd device\n");
+		return NULL;
+	}
+
+	if (name) {
+		rbd->disk.name = strdup(name);
+	} else {
+		rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
+	}
+	if (!rbd->disk.name) {
+		bdev_rbd_free(rbd);
+		return NULL;
+	}
+	rbd->disk.product_name = "Ceph Rbd Disk";
+	bdev_rbd_count++;
+
+	rbd->disk.write_cache = 0;
+	rbd->disk.blocklen = block_size;
+	rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
+	rbd->disk.ctxt = rbd;
+	rbd->disk.fn_table = &rbd_fn_table;
+	rbd->disk.module = &rbd_if;
+
+	SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
+
+	spdk_io_device_register(rbd, bdev_rbd_create_cb,
+				bdev_rbd_destroy_cb,
+				sizeof(struct bdev_rbd_io_channel),
+				rbd_name);
+	ret = spdk_bdev_register(&rbd->disk);
+	if (ret) {
+		spdk_io_device_unregister(rbd, NULL);
+		bdev_rbd_free(rbd);
+		return NULL;
+	}
+
+	return &rbd->disk;
+}
+
+void
+spdk_bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, void *cb_arg)
+{
+	if (!bdev || bdev->module != &rbd_if) {
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int
+bdev_rbd_library_init(void)
+{
+	int i, rc = 0;
+	const char *val;
+	const char *pool_name;
+	const char *rbd_name;
+	uint32_t block_size;
+
+	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Ceph");
+
+	if (sp == NULL) {
+		/*
+		 * Ceph section not found.  Do not initialize any rbd LUNS.
+		 */
+		goto end;
+	}
+
+	/* Init rbd block devices */
+	for (i = 0; ; i++) {
+		val = spdk_conf_section_get_nval(sp, "Ceph", i);
+		if (val == NULL) {
+			break;
+		}
+
+		/* get the Rbd_pool name */
+		pool_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 0);
+		if (pool_name == NULL) {
+			SPDK_ERRLOG("Ceph%d: rbd pool name needs to be provided\n", i);
+			rc = -1;
+			goto end;
+		}
+
+		rbd_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 1);
+		if (rbd_name == NULL) {
+			SPDK_ERRLOG("Ceph%d: format error\n", i);
+			rc = -1;
+			goto end;
+		}
+
+		val = spdk_conf_section_get_nmval(sp, "Ceph", i, 2);
+
+		if (val == NULL) {
+			block_size = 512; /* default value */
+		} else {
+			block_size = (int)strtol(val, NULL, 10);
+			if (block_size & 0x1ff) {
+				SPDK_ERRLOG("current block_size = %d, it should be multiple of 512\n",
+					    block_size);
+				rc = -1;
+				goto end;
+			}
+		}
+
+		if (spdk_bdev_rbd_create(NULL, pool_name, rbd_name, block_size) == NULL) {
+			rc = -1;
+			goto end;
+		}
+	}
+
+end:
+	return rc;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_rbd", SPDK_LOG_BDEV_RBD)
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd.h b/src/spdk/lib/bdev/rbd/bdev_rbd.h
new file mode 100644
index 00000000..dd2448e1
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd.h
@@ -0,0 +1,55 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_RBD_H
+#define SPDK_BDEV_RBD_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev *spdk_bdev_rbd_create(const char *name, const char *pool_name,
+				       const char *rbd_name, uint32_t block_size);
+/**
+ * Delete rbd bdev.
+ *
+ * \param bdev Pointer to rbd bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void spdk_bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn,
+			  void *cb_arg);
+
+#endif // SPDK_BDEV_RBD_H
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c b/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c
new file mode 100644
index 00000000..745a90ed
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c
@@ -0,0 +1,157 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_rbd.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_rbd {
+	char *name;
+	char *pool_name;
+	char *rbd_name;
+	uint32_t block_size;
+};
+
+static void
+free_rpc_construct_rbd(struct rpc_construct_rbd *req)
+{
+	free(req->name);
+	free(req->pool_name);
+	free(req->rbd_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_rbd_decoders[] = {
+	{"name", offsetof(struct rpc_construct_rbd, name), spdk_json_decode_string, true},
+	{"pool_name", offsetof(struct rpc_construct_rbd, pool_name), spdk_json_decode_string},
+	{"rbd_name", offsetof(struct rpc_construct_rbd, rbd_name), spdk_json_decode_string},
+	{"block_size", offsetof(struct rpc_construct_rbd, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_rbd_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_construct_rbd req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev;
+
+	if (spdk_json_decode_object(params, rpc_construct_rbd_decoders,
+				    SPDK_COUNTOF(rpc_construct_rbd_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RBD, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_rbd_create(req.name, req.pool_name, req.rbd_name, req.block_size);
+	if (bdev == NULL) {
+		goto invalid;
+	}
+
+	free_rpc_construct_rbd(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_construct_rbd(&req);
+}
+SPDK_RPC_REGISTER("construct_rbd_bdev", spdk_rpc_construct_rbd_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_rbd {
+	char *name;
+};
+
+static void
+free_rpc_delete_rbd(struct rpc_delete_rbd *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_rbd_decoders[] = {
+	{"name", offsetof(struct rpc_delete_rbd, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_rbd_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_rbd_bdev(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct rpc_delete_rbd req = {NULL};
+	struct spdk_bdev *bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_rbd_decoders,
+				    SPDK_COUNTOF(rpc_delete_rbd_decoders),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	spdk_bdev_rbd_delete(bdev, _spdk_rpc_delete_rbd_bdev_cb, request);
+	free_rpc_delete_rbd(&req);
+	return;
+
+invalid:
+	free_rpc_delete_rbd(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_rbd_bdev", spdk_rpc_delete_rbd_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/rpc/Makefile b/src/spdk/lib/bdev/rpc/Makefile
new file mode 100644
index 00000000..4c1fcc0c
--- /dev/null
+++ b/src/spdk/lib/bdev/rpc/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_rpc.c
+LIBNAME = bdev_rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/rpc/bdev_rpc.c b/src/spdk/lib/bdev/rpc/bdev_rpc.c
new file mode 100644
index 00000000..1989f6d2
--- /dev/null
+++ b/src/spdk/lib/bdev/rpc/bdev_rpc.c
@@ -0,0 +1,587 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+
+struct rpc_get_bdevs_iostat_ctx {
+	int bdev_count;
+	struct spdk_jsonrpc_request *request;
+	struct spdk_json_write_ctx *w;
+};
+
+static void
+spdk_rpc_get_bdevs_iostat_cb(struct spdk_bdev *bdev,
+			     struct spdk_bdev_io_stat *stat, void *cb_arg, int rc)
+{
+	struct rpc_get_bdevs_iostat_ctx *ctx = cb_arg;
+	struct spdk_json_write_ctx *w = ctx->w;
+	const char *bdev_name;
+
+	if (rc != 0) {
+		goto done;
+	}
+
+	bdev_name = spdk_bdev_get_name(bdev);
+	if (bdev_name != NULL) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "name");
+		spdk_json_write_string(w, bdev_name);
+
+		spdk_json_write_name(w, "bytes_read");
+		spdk_json_write_uint64(w, stat->bytes_read);
+
+		spdk_json_write_name(w, "num_read_ops");
+		spdk_json_write_uint64(w, stat->num_read_ops);
+
+		spdk_json_write_name(w, "bytes_written");
+		spdk_json_write_uint64(w, stat->bytes_written);
+
+		spdk_json_write_name(w, "num_write_ops");
+		spdk_json_write_uint64(w, stat->num_write_ops);
+
+		spdk_json_write_name(w, "read_latency_ticks");
+		spdk_json_write_uint64(w, stat->read_latency_ticks);
+
+		spdk_json_write_name(w, "write_latency_ticks");
+		spdk_json_write_uint64(w, stat->write_latency_ticks);
+
+		if (spdk_bdev_get_qd_sampling_period(bdev)) {
+			spdk_json_write_name(w, "queue_depth_polling_period");
+			spdk_json_write_uint64(w, spdk_bdev_get_qd_sampling_period(bdev));
+
+			spdk_json_write_name(w, "queue_depth");
+			spdk_json_write_uint64(w, spdk_bdev_get_qd(bdev));
+
+			spdk_json_write_name(w, "io_time");
+			spdk_json_write_uint64(w, spdk_bdev_get_io_time(bdev));
+
+			spdk_json_write_name(w, "weighted_io_time");
+			spdk_json_write_uint64(w, spdk_bdev_get_weighted_io_time(bdev));
+		}
+
+		spdk_json_write_object_end(w);
+	}
+
+done:
+	free(stat);
+	if (--ctx->bdev_count == 0) {
+		spdk_json_write_array_end(ctx->w);
+		spdk_jsonrpc_end_result(ctx->request, ctx->w);
+		free(ctx);
+	}
+}
+
+struct rpc_get_bdevs_iostat {
+	char *name;
+};
+
+static void
+free_rpc_get_bdevs_iostat(struct rpc_get_bdevs_iostat *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_bdevs_iostat_decoders[] = {
+	{"name", offsetof(struct rpc_get_bdevs_iostat, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_bdevs_iostat(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_get_bdevs_iostat req = {};
+	struct spdk_bdev *bdev = NULL;
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev_io_stat *stat;
+	struct rpc_get_bdevs_iostat_ctx *ctx;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_get_bdevs_iostat_decoders,
+					    SPDK_COUNTOF(rpc_get_bdevs_iostat_decoders),
+					    &req)) {
+			SPDK_ERRLOG("spdk_json_decode_object failed\n");
+			goto invalid;
+		}
+
+		if (req.name) {
+			bdev = spdk_bdev_get_by_name(req.name);
+			if (bdev == NULL) {
+				SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+				goto invalid;
+			}
+		}
+	}
+
+	free_rpc_get_bdevs_iostat(&req);
+
+	ctx = calloc(1, sizeof(struct rpc_get_bdevs_iostat_ctx));
+	if (ctx == NULL) {
+		SPDK_ERRLOG("Failed to allocate rpc_get_bdevs_iostat_ctx struct\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "No memory left");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free(ctx);
+		return;
+	}
+
+	/*
+	 * Increment initial bdev_count so that it will never reach 0 in the middle
+	 * of iterating.
+	 */
+	ctx->bdev_count++;
+	ctx->request = request;
+	ctx->w = w;
+
+	spdk_json_write_array_begin(w);
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "tick_rate");
+	spdk_json_write_uint64(w, spdk_get_ticks_hz());
+	spdk_json_write_object_end(w);
+
+	if (bdev != NULL) {
+		stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
+		if (stat == NULL) {
+			SPDK_ERRLOG("Failed to allocate rpc_get_bdevs_iostat_ctx struct\n");
+		} else {
+			ctx->bdev_count++;
+			spdk_bdev_get_device_stat(bdev, stat, spdk_rpc_get_bdevs_iostat_cb, ctx);
+		}
+	} else {
+		for (bdev = spdk_bdev_first(); bdev != NULL; bdev = spdk_bdev_next(bdev)) {
+			stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
+			if (stat == NULL) {
+				SPDK_ERRLOG("Failed to allocate spdk_bdev_io_stat struct\n");
+				break;
+			}
+			ctx->bdev_count++;
+			spdk_bdev_get_device_stat(bdev, stat, spdk_rpc_get_bdevs_iostat_cb, ctx);
+		}
+	}
+
+	if (--ctx->bdev_count == 0) {
+		spdk_json_write_array_end(w);
+		spdk_jsonrpc_end_result(request, w);
+		free(ctx);
+	}
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+	free_rpc_get_bdevs_iostat(&req);
+}
+SPDK_RPC_REGISTER("get_bdevs_iostat", spdk_rpc_get_bdevs_iostat, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_dump_bdev_info(struct spdk_json_write_ctx *w,
+			struct spdk_bdev *bdev)
+{
+	struct spdk_bdev_alias *tmp;
+	uint64_t qos_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+	int i;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "name");
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+
+	spdk_json_write_name(w, "aliases");
+	spdk_json_write_array_begin(w);
+
+	TAILQ_FOREACH(tmp, spdk_bdev_get_aliases(bdev), tailq) {
+		spdk_json_write_string(w, tmp->alias);
+	}
+
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_name(w, "product_name");
+	spdk_json_write_string(w, spdk_bdev_get_product_name(bdev));
+
+	spdk_json_write_name(w, "block_size");
+	spdk_json_write_uint32(w, spdk_bdev_get_block_size(bdev));
+
+	spdk_json_write_name(w, "num_blocks");
+	spdk_json_write_uint64(w, spdk_bdev_get_num_blocks(bdev));
+
+	if (!spdk_mem_all_zero(&bdev->uuid, sizeof(bdev->uuid))) {
+		char uuid_str[SPDK_UUID_STRING_LEN];
+
+		spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+		spdk_json_write_named_string(w, "uuid", uuid_str);
+	}
+
+	spdk_json_write_name(w, "assigned_rate_limits");
+	spdk_json_write_object_begin(w);
+	spdk_bdev_get_qos_rate_limits(bdev, qos_limits);
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		spdk_json_write_name(w, spdk_bdev_get_qos_rpc_type(i));
+		spdk_json_write_uint64(w, qos_limits[i]);
+	}
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_name(w, "claimed");
+	spdk_json_write_bool(w, (bdev->internal.claim_module != NULL));
+
+	spdk_json_write_name(w, "supported_io_types");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "read");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ));
+	spdk_json_write_name(w, "write");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE));
+	spdk_json_write_name(w, "unmap");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP));
+	spdk_json_write_name(w, "write_zeroes");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES));
+	spdk_json_write_name(w, "flush");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH));
+	spdk_json_write_name(w, "reset");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_RESET));
+	spdk_json_write_name(w, "nvme_admin");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN));
+	spdk_json_write_name(w, "nvme_io");
+	spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO));
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_name(w, "driver_specific");
+	spdk_json_write_object_begin(w);
+	spdk_bdev_dump_info_json(bdev, w);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+struct rpc_get_bdevs {
+	char *name;
+};
+
+static void
+free_rpc_get_bdevs(struct rpc_get_bdevs *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_bdevs_decoders[] = {
+	{"name", offsetof(struct rpc_get_bdevs, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_bdevs(struct spdk_jsonrpc_request *request,
+		   const struct spdk_json_val *params)
+{
+	struct rpc_get_bdevs req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev = NULL;
+
+	if (params && spdk_json_decode_object(params, rpc_get_bdevs_decoders,
+					      SPDK_COUNTOF(rpc_get_bdevs_decoders),
+					      &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.name) {
+		bdev = spdk_bdev_get_by_name(req.name);
+		if (bdev == NULL) {
+			SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+			goto invalid;
+		}
+	}
+
+	free_rpc_get_bdevs(&req);
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	if (bdev != NULL) {
+		spdk_rpc_dump_bdev_info(w, bdev);
+	} else {
+		for (bdev = spdk_bdev_first(); bdev != NULL; bdev = spdk_bdev_next(bdev)) {
+			spdk_rpc_dump_bdev_info(w, bdev);
+		}
+	}
+
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+	free_rpc_get_bdevs(&req);
+}
+SPDK_RPC_REGISTER("get_bdevs", spdk_rpc_get_bdevs, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_bdev {
+	char *name;
+};
+
+static void
+free_rpc_delete_bdev(struct rpc_delete_bdev *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_bdev_decoders[] = {
+	{"name", offsetof(struct rpc_delete_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_bdev_cb(void *cb_arg, int bdeverrno)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, bdeverrno == 0);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_bdev(struct spdk_jsonrpc_request *request,
+		     const struct spdk_json_val *params)
+{
+	struct rpc_delete_bdev req = {};
+	struct spdk_bdev *bdev;
+
+	if (spdk_json_decode_object(params, rpc_delete_bdev_decoders,
+				    SPDK_COUNTOF(rpc_delete_bdev_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.name == NULL) {
+		SPDK_ERRLOG("missing name param\n");
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+		goto invalid;
+	}
+
+	spdk_bdev_unregister(bdev, _spdk_rpc_delete_bdev_cb, request);
+
+	free_rpc_delete_bdev(&req);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_delete_bdev(&req);
+}
+SPDK_RPC_REGISTER("delete_bdev", spdk_rpc_delete_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_set_bdev_qd_sampling_period {
+	char *name;
+	uint64_t period;
+};
+
+static void
+free_rpc_set_bdev_qd_sampling_period(struct rpc_set_bdev_qd_sampling_period *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder
+	rpc_set_bdev_qd_sampling_period_decoders[] = {
+	{"name", offsetof(struct rpc_set_bdev_qd_sampling_period, name), spdk_json_decode_string},
+	{"period", offsetof(struct rpc_set_bdev_qd_sampling_period, period), spdk_json_decode_uint64},
+};
+
+static void
+spdk_rpc_set_bdev_qd_sampling_period(struct spdk_jsonrpc_request *request,
+				     const struct spdk_json_val *params)
+{
+	struct rpc_set_bdev_qd_sampling_period req = {0};
+	struct spdk_bdev *bdev;
+	struct spdk_json_write_ctx *w;
+
+	req.period = UINT64_MAX;
+
+	if (spdk_json_decode_object(params, rpc_set_bdev_qd_sampling_period_decoders,
+				    SPDK_COUNTOF(rpc_set_bdev_qd_sampling_period_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.name) {
+		bdev = spdk_bdev_get_by_name(req.name);
+		if (bdev == NULL) {
+			SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+			goto invalid;
+		}
+	} else {
+		SPDK_ERRLOG("Missing name param\n");
+		goto invalid;
+	}
+
+	if (req.period == UINT64_MAX) {
+		SPDK_ERRLOG("Missing period param");
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	spdk_bdev_set_qd_sampling_period(bdev, req.period);
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_set_bdev_qd_sampling_period(&req);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_set_bdev_qd_sampling_period(&req);
+	return;
+}
+SPDK_RPC_REGISTER("set_bdev_qd_sampling_period",
+		  spdk_rpc_set_bdev_qd_sampling_period,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_set_bdev_qos_limit {
+	char		*name;
+	uint64_t	limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+};
+
+static void
+free_rpc_set_bdev_qos_limit(struct rpc_set_bdev_qos_limit *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_set_bdev_qos_limit_decoders[] = {
+	{"name", offsetof(struct rpc_set_bdev_qos_limit, name), spdk_json_decode_string},
+	{
+		"rw_ios_per_sec", offsetof(struct rpc_set_bdev_qos_limit,
+					   limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT]),
+		spdk_json_decode_uint64, true
+	},
+	{
+		"rw_mbytes_per_sec", offsetof(struct rpc_set_bdev_qos_limit,
+					      limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT]),
+		spdk_json_decode_uint64, true
+	},
+};
+
+static void
+spdk_rpc_set_bdev_qos_limit_complete(void *cb_arg, int status)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	if (status != 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Failed to configure rate limit: %s",
+						     spdk_strerror(-status));
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_set_bdev_qos_limit(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_set_bdev_qos_limit req = {NULL, {UINT64_MAX, UINT64_MAX}};
+	struct spdk_bdev *bdev;
+	bool valid_limit = false;
+	int i;
+
+	if (spdk_json_decode_object(params, rpc_set_bdev_qos_limit_decoders,
+				    SPDK_COUNTOF(rpc_set_bdev_qos_limit_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	bdev = spdk_bdev_get_by_name(req.name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Bdev does not exist");
+		goto exit;
+	}
+
+	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+		if (req.limits[i] != UINT64_MAX) {
+			valid_limit = true;
+		}
+	}
+
+	if (valid_limit == false) {
+		SPDK_ERRLOG("no rate limits specified\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "No rate limits specified");
+		goto exit;
+	}
+
+	free_rpc_set_bdev_qos_limit(&req);
+	spdk_bdev_set_qos_rate_limits(bdev, req.limits, spdk_rpc_set_bdev_qos_limit_complete, request);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+exit:
+	free_rpc_set_bdev_qos_limit(&req);
+}
+
+SPDK_RPC_REGISTER("set_bdev_qos_limit", spdk_rpc_set_bdev_qos_limit, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/scsi_nvme.c b/src/spdk/lib/bdev/scsi_nvme.c
new file mode 100644
index 00000000..385b9036
--- /dev/null
+++ b/src/spdk/lib/bdev/scsi_nvme.c
@@ -0,0 +1,261 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2016 FUJITSU LIMITED, All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of the copyright holder nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/bdev_module.h"
+
+#include "spdk/nvme_spec.h"
+
+void
+spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io, int *sc, int *sk,
+			 int *asc, int *ascq)
+{
+	int nvme_sct = bdev_io->internal.error.nvme.sct;
+	int nvme_sc = bdev_io->internal.error.nvme.sc;
+
+	switch (nvme_sct) {
+	case SPDK_NVME_SCT_GENERIC:
+		switch (nvme_sc) {
+		case SPDK_NVME_SC_SUCCESS:
+			*sc   = SPDK_SCSI_STATUS_GOOD;
+			*sk   = SPDK_SCSI_SENSE_NO_SENSE;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_INVALID_OPCODE:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_INVALID_FIELD:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_DATA_TRANSFER_ERROR:
+		case SPDK_NVME_SC_CAPACITY_EXCEEDED:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_ABORTED_POWER_LOSS:
+			*sc   = SPDK_SCSI_STATUS_TASK_ABORTED;
+			*sk   = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+			*asc  = SPDK_SCSI_ASC_WARNING;
+			*ascq = SPDK_SCSI_ASCQ_POWER_LOSS_EXPECTED;
+			break;
+		case SPDK_NVME_SC_INTERNAL_DEVICE_ERROR:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_HARDWARE_ERROR;
+			*asc  = SPDK_SCSI_ASC_INTERNAL_TARGET_FAILURE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_ABORTED_BY_REQUEST:
+		case SPDK_NVME_SC_ABORTED_SQ_DELETION:
+		case SPDK_NVME_SC_ABORTED_FAILED_FUSED:
+		case SPDK_NVME_SC_ABORTED_MISSING_FUSED:
+			*sc   = SPDK_SCSI_STATUS_TASK_ABORTED;
+			*sk   = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_ACCESS_DENIED;
+			*ascq = SPDK_SCSI_ASCQ_INVALID_LU_IDENTIFIER;
+			break;
+		case SPDK_NVME_SC_LBA_OUT_OF_RANGE:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_NAMESPACE_NOT_READY:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_NOT_READY;
+			*asc  = SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_RESERVATION_CONFLICT:
+			*sc   = SPDK_SCSI_STATUS_RESERVATION_CONFLICT;
+			*sk   = SPDK_SCSI_SENSE_NO_SENSE;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_COMMAND_ID_CONFLICT:
+		case SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR:
+		case SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR:
+		case SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS:
+		case SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID:
+		case SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID:
+		case SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID:
+		case SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF:
+		case SPDK_NVME_SC_INVALID_PRP_OFFSET:
+		case SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED:
+		case SPDK_NVME_SC_INVALID_SGL_OFFSET:
+		case SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT:
+		case SPDK_NVME_SC_KEEP_ALIVE_EXPIRED:
+		case SPDK_NVME_SC_KEEP_ALIVE_INVALID:
+		case SPDK_NVME_SC_FORMAT_IN_PROGRESS:
+		default:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		}
+		break;
+	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
+		switch (nvme_sc) {
+		case SPDK_NVME_SC_COMPLETION_QUEUE_INVALID:
+		case SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_INVALID_FORMAT:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_FORMAT_COMMAND_FAILED;
+			*ascq = SPDK_SCSI_ASCQ_FORMAT_COMMAND_FAILED;
+			break;
+		case SPDK_NVME_SC_CONFLICTING_ATTRIBUTES:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_DATA_PROTECT;
+			*asc  = SPDK_SCSI_ASC_WRITE_PROTECTED;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER:
+		case SPDK_NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED:
+		case SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED:
+		case SPDK_NVME_SC_INVALID_FIRMWARE_SLOT:
+		case SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE:
+		case SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR:
+		case SPDK_NVME_SC_INVALID_LOG_PAGE:
+		case SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET:
+		case SPDK_NVME_SC_INVALID_QUEUE_DELETION:
+		case SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE:
+		case SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE:
+		case SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC:
+		case SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET:
+		case SPDK_NVME_SC_FIRMWARE_REQ_RESET:
+		case SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION:
+		case SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED:
+		case SPDK_NVME_SC_OVERLAPPING_RANGE:
+		case SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY:
+		case SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE:
+		case SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED:
+		case SPDK_NVME_SC_NAMESPACE_IS_PRIVATE:
+		case SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED:
+		case SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED:
+		case SPDK_NVME_SC_CONTROLLER_LIST_INVALID:
+		case SPDK_NVME_SC_INVALID_PROTECTION_INFO:
+		default:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		}
+		break;
+	case SPDK_NVME_SCT_MEDIA_ERROR:
+		switch (nvme_sc) {
+		case SPDK_NVME_SC_WRITE_FAULTS:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_UNRECOVERED_READ_ERROR:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_UNRECOVERED_READ_ERROR;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_GUARD_CHECK_ERROR:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+			*ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+			break;
+		case SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+			*ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+			break;
+		case SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+			*asc  = SPDK_SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+			*ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+			break;
+		case SPDK_NVME_SC_COMPARE_FAILURE:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_MISCOMPARE;
+			*asc  = SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		case SPDK_NVME_SC_ACCESS_DENIED:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_DATA_PROTECT;
+			*asc  = SPDK_SCSI_ASC_ACCESS_DENIED;
+			*ascq = SPDK_SCSI_ASCQ_NO_ACCESS_RIGHTS;
+			break;
+		case SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK:
+		default:
+			*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+			*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+			*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+			*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+			break;
+		}
+		break;
+	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
+	default:
+		*sc   = SPDK_SCSI_STATUS_CHECK_CONDITION;
+		*sk   = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+		*asc  = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+		*ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+		break;
+	}
+}
diff --git a/src/spdk/lib/bdev/split/Makefile b/src/spdk/lib/bdev/split/Makefile
new file mode 100644
index 00000000..46edf89a
--- /dev/null
+++ b/src/spdk/lib/bdev/split/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_split.c vbdev_split_rpc.c
+LIBNAME = vbdev_split
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/split/vbdev_split.c b/src/spdk/lib/bdev/split/vbdev_split.c
new file mode 100644
index 00000000..97f11984
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split.c
@@ -0,0 +1,565 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a simple example of a virtual block device that takes a single
+ * bdev and slices it into multiple smaller bdevs.
+ */
+
+#include "vbdev_split.h"
+
+#include "spdk/rpc.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+struct spdk_vbdev_split_config {
+	char *base_bdev;
+	unsigned split_count;
+	uint64_t split_size_mb;
+
+	struct spdk_bdev_part_base *split_base;
+	bool removed;
+
+	TAILQ_ENTRY(spdk_vbdev_split_config) tailq;
+};
+
+static TAILQ_HEAD(, spdk_vbdev_split_config) g_split_config = TAILQ_HEAD_INITIALIZER(
+			g_split_config);
+static SPDK_BDEV_PART_TAILQ g_split_disks = TAILQ_HEAD_INITIALIZER(g_split_disks);
+
+struct vbdev_split_channel {
+	struct spdk_bdev_part_channel	part_ch;
+};
+
+struct vbdev_split_bdev_io {
+	struct spdk_io_channel *ch;
+	struct spdk_bdev_io *bdev_io;
+
+	/* for bdev_io_wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static void vbdev_split_del_config(struct spdk_vbdev_split_config *cfg);
+
+static int vbdev_split_init(void);
+static void vbdev_split_fini(void);
+static void vbdev_split_examine(struct spdk_bdev *bdev);
+static int vbdev_split_config_json(struct spdk_json_write_ctx *w);
+static int vbdev_split_get_ctx_size(void);
+
+static void
+vbdev_split_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
+
+static struct spdk_bdev_module split_if = {
+	.name = "split",
+	.module_init = vbdev_split_init,
+	.module_fini = vbdev_split_fini,
+	.get_ctx_size = vbdev_split_get_ctx_size,
+	.examine_config = vbdev_split_examine,
+	.config_json = vbdev_split_config_json,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&split_if)
+
+static void
+vbdev_split_base_free(void *ctx)
+{
+	struct spdk_vbdev_split_config *cfg = ctx;
+
+	cfg->split_base = NULL;
+	if (cfg->removed) {
+		vbdev_split_del_config(cfg);
+	}
+}
+
+static int
+vbdev_split_destruct(void *ctx)
+{
+	struct spdk_bdev_part *part = ctx;
+
+	return spdk_bdev_part_free(part);
+}
+
+static void
+vbdev_split_base_bdev_hotremove_cb(void *_base_bdev)
+{
+	spdk_bdev_part_base_hotremove(_base_bdev, &g_split_disks);
+}
+
+static void
+vbdev_split_resubmit_io(void *arg)
+{
+	struct vbdev_split_bdev_io *split_io = (struct vbdev_split_bdev_io *)arg;
+
+	vbdev_split_submit_request(split_io->ch, split_io->bdev_io);
+}
+
+static void
+vbdev_split_queue_io(struct vbdev_split_bdev_io *split_io)
+{
+	int rc;
+
+	split_io->bdev_io_wait.bdev = split_io->bdev_io->bdev;
+	split_io->bdev_io_wait.cb_fn = vbdev_split_resubmit_io;
+	split_io->bdev_io_wait.cb_arg = split_io;
+
+	rc = spdk_bdev_queue_io_wait(split_io->bdev_io->bdev,
+				     split_io->ch, &split_io->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in vbdev_split_queue_io, rc=%d\n", rc);
+		spdk_bdev_io_complete(split_io->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static void
+vbdev_split_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+	struct vbdev_split_channel *ch = spdk_io_channel_get_ctx(_ch);
+	struct vbdev_split_bdev_io *io_ctx = (struct vbdev_split_bdev_io *)bdev_io->driver_ctx;
+	int rc;
+
+	rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+	if (rc) {
+		if (rc == -ENOMEM) {
+			SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "split: no memory, queue io.\n");
+			io_ctx->ch = _ch;
+			io_ctx->bdev_io = bdev_io;
+			vbdev_split_queue_io(io_ctx);
+		} else {
+			SPDK_ERRLOG("split: error on io submission, rc=%d.\n", rc);
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		}
+	}
+}
+
+static int
+vbdev_split_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct spdk_bdev_part *part = ctx;
+	struct spdk_bdev *split_base_bdev = spdk_bdev_part_get_base_bdev(part);
+	uint64_t offset_blocks = spdk_bdev_part_get_offset_blocks(part);
+
+	spdk_json_write_name(w, "split");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "base_bdev");
+	spdk_json_write_string(w, spdk_bdev_get_name(split_base_bdev));
+	spdk_json_write_name(w, "offset_blocks");
+	spdk_json_write_uint64(w, offset_blocks);
+
+	spdk_json_write_object_end(w);
+
+	return 0;
+}
+
+static void
+vbdev_split_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* No config per bdev needed */
+}
+
+static struct spdk_bdev_fn_table vbdev_split_fn_table = {
+	.destruct		= vbdev_split_destruct,
+	.submit_request		= vbdev_split_submit_request,
+	.dump_info_json		= vbdev_split_dump_info_json,
+	.write_config_json	= vbdev_split_write_config_json
+};
+
+static int
+vbdev_split_create(struct spdk_vbdev_split_config *cfg)
+{
+	uint64_t split_size_blocks, offset_blocks;
+	uint64_t split_count, max_split_count;
+	uint64_t mb = 1024 * 1024;
+	uint64_t i;
+	int rc;
+	char *name;
+	struct spdk_bdev *base_bdev;
+	struct spdk_bdev *split_base_bdev;
+	struct bdev_part_tailq *split_base_tailq;
+
+	assert(cfg->split_count > 0);
+
+	base_bdev = spdk_bdev_get_by_name(cfg->base_bdev);
+	if (!base_bdev) {
+		return -ENODEV;
+	}
+
+	if (cfg->split_size_mb) {
+		if (((cfg->split_size_mb * mb) % base_bdev->blocklen) != 0) {
+			SPDK_ERRLOG("Split size %" PRIu64 " MB is not possible with block size "
+				    "%" PRIu32 "\n",
+				    cfg->split_size_mb, base_bdev->blocklen);
+			return -EINVAL;
+		}
+		split_size_blocks = (cfg->split_size_mb * mb) / base_bdev->blocklen;
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "Split size %" PRIu64 " MB specified by user\n",
+			      cfg->split_size_mb);
+	} else {
+		split_size_blocks = base_bdev->blockcnt / cfg->split_count;
+		SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "Split size not specified by user\n");
+	}
+
+	max_split_count = base_bdev->blockcnt / split_size_blocks;
+	split_count = cfg->split_count;
+	if (split_count > max_split_count) {
+		SPDK_WARNLOG("Split count %" PRIu64 " is greater than maximum possible split count "
+			     "%" PRIu64 " - clamping\n", split_count, max_split_count);
+		split_count = max_split_count;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "base_bdev: %s split_count: %" PRIu64
+		      " split_size_blocks: %" PRIu64 "\n",
+		      spdk_bdev_get_name(base_bdev), split_count, split_size_blocks);
+
+	cfg->split_base = spdk_bdev_part_base_construct(base_bdev,
+			  vbdev_split_base_bdev_hotremove_cb,
+			  &split_if, &vbdev_split_fn_table,
+			  &g_split_disks, vbdev_split_base_free, cfg,
+			  sizeof(struct vbdev_split_channel), NULL, NULL);
+	if (!cfg->split_base) {
+		SPDK_ERRLOG("Cannot construct bdev part base\n");
+		return -ENOMEM;
+	}
+
+	offset_blocks = 0;
+	for (i = 0; i < split_count; i++) {
+		struct spdk_bdev_part *d;
+
+		d = calloc(1, sizeof(*d));
+		if (d == NULL) {
+			SPDK_ERRLOG("could not allocate bdev part\n");
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		name = spdk_sprintf_alloc("%sp%" PRIu64, cfg->base_bdev, i);
+		if (!name) {
+			SPDK_ERRLOG("could not allocate name\n");
+			free(d);
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		rc = spdk_bdev_part_construct(d, cfg->split_base, name, offset_blocks, split_size_blocks,
+					      "Split Disk");
+		free(name);
+		if (rc) {
+			SPDK_ERRLOG("could not construct bdev part\n");
+			/* spdk_bdev_part_construct will free name if it fails */
+			free(d);
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		offset_blocks += split_size_blocks;
+	}
+
+	return 0;
+err:
+	split_base_bdev = spdk_bdev_part_base_get_bdev(cfg->split_base);
+	split_base_tailq = spdk_bdev_part_base_get_tailq(cfg->split_base);
+	cfg->removed = true;
+	spdk_bdev_part_base_hotremove(split_base_bdev, split_base_tailq);
+	return rc;
+}
+
+static void
+vbdev_split_del_config(struct spdk_vbdev_split_config *cfg)
+{
+	TAILQ_REMOVE(&g_split_config, cfg, tailq);
+	free(cfg->base_bdev);
+	free(cfg);
+}
+
+static void
+vbdev_split_destruct_config(struct spdk_vbdev_split_config *cfg)
+{
+	struct spdk_bdev *split_base_bdev;
+	struct bdev_part_tailq *split_base_tailq;
+
+	cfg->removed = true;
+	if (cfg->split_base != NULL) {
+		split_base_bdev = spdk_bdev_part_base_get_bdev(cfg->split_base);
+		split_base_tailq = spdk_bdev_part_base_get_tailq(cfg->split_base);
+		spdk_bdev_part_base_hotremove(split_base_bdev, split_base_tailq);
+	} else {
+		vbdev_split_del_config(cfg);
+	}
+}
+
+static void
+vbdev_split_clear_config(void)
+{
+	struct spdk_vbdev_split_config *cfg, *tmp_cfg;
+
+	TAILQ_FOREACH_SAFE(cfg, &g_split_config, tailq, tmp_cfg) {
+		vbdev_split_destruct_config(cfg);
+	}
+}
+
+static struct spdk_vbdev_split_config *
+vbdev_split_config_find_by_base_name(const char *base_bdev_name)
+{
+	struct spdk_vbdev_split_config *cfg;
+
+	TAILQ_FOREACH(cfg, &g_split_config, tailq) {
+		if (strcmp(cfg->base_bdev, base_bdev_name) == 0) {
+			return cfg;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+vbdev_split_add_config(const char *base_bdev_name, unsigned split_count, uint64_t split_size,
+		       struct spdk_vbdev_split_config **config)
+{
+	struct spdk_vbdev_split_config *cfg;
+	assert(base_bdev_name);
+
+	if (base_bdev_name == NULL) {
+		SPDK_ERRLOG("Split bdev config: no base bdev provided.");
+		return -EINVAL;
+	}
+
+	if (split_count == 0) {
+		SPDK_ERRLOG("Split bdev config: split_count can't be 0.");
+		return -EINVAL;
+	}
+
+	/* Check if we already have 'base_bdev_name' registered in config */
+	cfg = vbdev_split_config_find_by_base_name(base_bdev_name);
+	if (cfg) {
+		SPDK_ERRLOG("Split bdev config for base bdev '%s' already exist.", base_bdev_name);
+		return -EEXIST;
+	}
+
+	cfg = calloc(1, sizeof(*cfg));
+	if (!cfg) {
+		SPDK_ERRLOG("calloc(): Out of memory");
+		return -ENOMEM;
+	}
+
+	cfg->base_bdev = strdup(base_bdev_name);
+	if (!cfg->base_bdev) {
+		SPDK_ERRLOG("strdup(): Out of memory");
+		free(cfg);
+		return -ENOMEM;
+	}
+
+	cfg->split_count = split_count;
+	cfg->split_size_mb = split_size;
+	TAILQ_INSERT_TAIL(&g_split_config, cfg, tailq);
+	if (config) {
+		*config = cfg;
+	}
+
+	return 0;
+}
+
+static int
+vbdev_split_init(void)
+{
+
+	struct spdk_conf_section *sp;
+	const char *base_bdev_name;
+	const char *split_count_str;
+	const char *split_size_str;
+	int rc, i, split_count, split_size;
+
+	sp = spdk_conf_find_section(NULL, "Split");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	for (i = 0; ; i++) {
+		if (!spdk_conf_section_get_nval(sp, "Split", i)) {
+			break;
+		}
+
+		base_bdev_name = spdk_conf_section_get_nmval(sp, "Split", i, 0);
+		if (!base_bdev_name) {
+			SPDK_ERRLOG("Split configuration missing bdev name\n");
+			rc = -EINVAL;
+			goto err;
+		}
+
+		split_count_str = spdk_conf_section_get_nmval(sp, "Split", i, 1);
+		if (!split_count_str) {
+			SPDK_ERRLOG("Split configuration missing split count\n");
+			rc = -EINVAL;
+			goto err;
+		}
+
+		split_count = atoi(split_count_str);
+		if (split_count < 1) {
+			SPDK_ERRLOG("Invalid Split count %d\n", split_count);
+			rc = -EINVAL;
+			goto err;
+		}
+
+		/* Optional split size in MB */
+		split_size = 0;
+		split_size_str = spdk_conf_section_get_nmval(sp, "Split", i, 2);
+		if (split_size_str) {
+			split_size = atoi(split_size_str);
+			if (split_size <= 0) {
+				SPDK_ERRLOG("Invalid Split size %d\n", split_size);
+				rc = -EINVAL;
+				goto err;
+			}
+		}
+
+		rc = vbdev_split_add_config(base_bdev_name, split_count, split_size, NULL);
+		if (rc != 0) {
+			goto err;
+		}
+	}
+
+	return 0;
+err:
+	vbdev_split_clear_config();
+	return rc;
+}
+
+static void
+vbdev_split_fini(void)
+{
+	vbdev_split_clear_config();
+}
+
+static void
+vbdev_split_examine(struct spdk_bdev *bdev)
+{
+	struct spdk_vbdev_split_config *cfg = vbdev_split_config_find_by_base_name(bdev->name);
+
+	if (cfg != NULL && cfg->removed == false) {
+		assert(cfg->split_base == NULL);
+
+		if (vbdev_split_create(cfg)) {
+			SPDK_ERRLOG("could not split bdev %s\n", bdev->name);
+		}
+	}
+	spdk_bdev_module_examine_done(&split_if);
+}
+
+static int
+vbdev_split_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_vbdev_split_config *cfg;
+
+	TAILQ_FOREACH(cfg, &g_split_config, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "method", "construct_split_vbdev");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "base_bdev", cfg->base_bdev);
+		spdk_json_write_named_uint32(w, "split_count", cfg->split_count);
+		spdk_json_write_named_uint64(w, "split_size_mb", cfg->split_size_mb);
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+
+	return 0;
+}
+
+int
+create_vbdev_split(const char *base_bdev_name, unsigned split_count, uint64_t split_size_mb)
+{
+	int rc;
+	struct spdk_vbdev_split_config *cfg;
+
+	rc = vbdev_split_add_config(base_bdev_name, split_count, split_size_mb, &cfg);
+	if (rc) {
+		return rc;
+	}
+
+	rc = vbdev_split_create(cfg);
+	if (rc == -ENODEV) {
+		/* It is ok if base bdev does not exist yet. */
+		rc = 0;
+	}
+
+	return rc;
+}
+
+int
+spdk_vbdev_split_destruct(const char *base_bdev_name)
+{
+	struct spdk_vbdev_split_config *cfg = vbdev_split_config_find_by_base_name(base_bdev_name);
+
+	if (!cfg) {
+		SPDK_ERRLOG("Split configuration for '%s' not found\n", base_bdev_name);
+		return -ENOENT;
+	}
+
+	vbdev_split_destruct_config(cfg);
+	return 0;
+}
+
+struct spdk_bdev_part_base *
+spdk_vbdev_split_get_part_base(struct spdk_bdev *bdev)
+{
+	struct spdk_vbdev_split_config *cfg;
+
+	cfg = vbdev_split_config_find_by_base_name(spdk_bdev_get_name(bdev));
+
+	if (cfg == NULL) {
+		return NULL;
+	}
+
+	return cfg->split_base;
+}
+
+/*
+ * During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_split_get_ctx_size(void)
+{
+	return sizeof(struct vbdev_split_bdev_io);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_split", SPDK_LOG_VBDEV_SPLIT)
diff --git a/src/spdk/lib/bdev/split/vbdev_split.h b/src/spdk/lib/bdev/split/vbdev_split.h
new file mode 100644
index 00000000..4231d443
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split.h
@@ -0,0 +1,68 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_SPLIT_H
+#define SPDK_VBDEV_SPLIT_H
+
+#include "spdk/bdev_module.h"
+
+/**
+ * Add given disk name to split config. If bdev with \c base_bdev_name name
+ * exist the split bdevs will be created right away, if not the split bdevs will
+ * be created when base bdev became be available (during examination process).
+ *
+ * \param base_bdev_name Base bdev name
+ * \param split_count number of splits to be created.
+ * \param split_size_mb size of each bdev. If 0 use base bdev size / split_count
+ * \return value >= 0 - number of splits create. Negative errno code on error.
+ */
+int create_vbdev_split(const char *base_bdev_name, unsigned split_count, uint64_t split_size_mb);
+
+/**
+ * Remove all created split bdevs and split config.
+ *
+ * \param base_bdev_name base bdev name
+ * \return 0 on success or negative errno value.
+ */
+int spdk_vbdev_split_destruct(const char *base_bdev_name);
+
+/**
+ * Get the spdk_bdev_part_base associated with the given split base_bdev.
+ *
+ * \param base_bdev Bdev to get the part_base from
+ * \return pointer to the associated spdk_bdev_part_base
+ * \return NULL if the base_bdev is not being split by the split module
+ */
+struct spdk_bdev_part_base *spdk_vbdev_split_get_part_base(struct spdk_bdev *base_bdev);
+
+#endif // SPDK_VBDEV_SPLIT_H
diff --git a/src/spdk/lib/bdev/split/vbdev_split_rpc.c b/src/spdk/lib/bdev/split/vbdev_split_rpc.c
new file mode 100644
index 00000000..fe70346f
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split_rpc.c
@@ -0,0 +1,151 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "vbdev_split.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_split {
+	char *base_bdev;
+	uint32_t split_count;
+	uint64_t split_size_mb;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_split_decoders[] = {
+	{"base_bdev", offsetof(struct rpc_construct_split, base_bdev), spdk_json_decode_string},
+	{"split_count", offsetof(struct rpc_construct_split, split_count), spdk_json_decode_uint32},
+	{"split_size_mb", offsetof(struct rpc_construct_split, split_size_mb), spdk_json_decode_uint64, true},
+};
+
+static void
+spdk_rpc_construct_split_vbdev(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_construct_split req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *base_bdev;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_split_decoders,
+				    SPDK_COUNTOF(rpc_construct_split_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		goto out;
+	}
+
+	rc = create_vbdev_split(req.base_bdev, req.split_count, req.split_size_mb);
+	if (rc < 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Failed to create %"PRIu32" split bdevs from '%s': %s",
+						     req.split_count, req.base_bdev, spdk_strerror(-rc));
+		goto out;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		goto out;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	base_bdev = spdk_bdev_get_by_name(req.base_bdev);
+	if (base_bdev != NULL) {
+		struct spdk_bdev_part_base *split_base;
+		struct bdev_part_tailq *split_base_tailq;
+		struct spdk_bdev_part *split_part;
+		struct spdk_bdev *split_bdev;
+
+		split_base = spdk_vbdev_split_get_part_base(base_bdev);
+
+		assert(split_base != NULL);
+
+		split_base_tailq = spdk_bdev_part_base_get_tailq(split_base);
+		TAILQ_FOREACH(split_part, split_base_tailq, tailq) {
+			split_bdev = spdk_bdev_part_get_bdev(split_part);
+			spdk_json_write_string(w, spdk_bdev_get_name(split_bdev));
+		}
+	}
+
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+
+out:
+	free(req.base_bdev);
+}
+SPDK_RPC_REGISTER("construct_split_vbdev", spdk_rpc_construct_split_vbdev, SPDK_RPC_RUNTIME)
+
+struct rpc_destruct_split {
+	char *base_bdev;
+};
+
+static const struct spdk_json_object_decoder rpc_destruct_split_decoders[] = {
+	{"base_bdev", offsetof(struct rpc_destruct_split, base_bdev), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_destruct_split(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_destruct_split req = {};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_destruct_split_decoders,
+				    SPDK_COUNTOF(rpc_destruct_split_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		goto out;
+	}
+
+	rc = spdk_vbdev_split_destruct(req.base_bdev);
+	if (rc < 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+		goto out;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		goto out;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+out:
+	free(req.base_bdev);
+}
+SPDK_RPC_REGISTER("destruct_split_vbdev", spdk_rpc_destruct_split, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/virtio/Makefile b/src/spdk/lib/bdev/virtio/Makefile
new file mode 100644
index 00000000..fabe2b9f
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_virtio_scsi.c bdev_virtio_blk.c bdev_virtio_rpc.c
+LIBNAME = bdev_virtio
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio.h b/src/spdk/lib/bdev/virtio/bdev_virtio.h
new file mode 100644
index 00000000..538fab8f
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio.h
@@ -0,0 +1,164 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_VIRTIO_H
+#define SPDK_BDEV_VIRTIO_H
+
+#include "spdk/bdev.h"
+#include "spdk/env.h"
+
+/**
+ * Callback for creating virtio bdevs.
+ *
+ * \param ctx opaque context set by the user
+ * \param errnum error code. 0 on success, negative errno on error.
+ * \param bdevs contiguous array of created bdevs
+ * \param bdev_cnt number of bdevs in the `bdevs` array
+ */
+typedef void (*bdev_virtio_create_cb)(void *ctx, int errnum,
+				      struct spdk_bdev **bdevs, size_t bdev_cnt);
+
+/**
+ * Callback for removing virtio devices.
+ *
+ * \param ctx opaque context set by the user
+ * \param errnum error code. 0 on success, negative errno on error.
+ */
+typedef void (*bdev_virtio_remove_cb)(void *ctx, int errnum);
+
+/**
+ * Connect to a vhost-user Unix domain socket and create a Virtio SCSI device.
+ * If the connection is successful, the device will be automatically scanned.
+ * The scan consists of probing the targets on the device and will result in
+ * creating possibly multiple Virtio SCSI bdevs - one for each target. Currently
+ * only one LUN per target is detected - LUN0. Note that the bdev creation is
+ * run asynchronously in the background. After it's finished, the `cb_fn`
+ * callback is called.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param path path to the socket
+ * \param num_queues max number of request virtqueues to use. `vdev` will be
+ * started successfully even if the host device supports less queues than requested.
+ * \param queue_size depth of each queue
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb.
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success (device scan is started) or negative error code.
+ * In case of error the \c cb_fn is not called.
+ */
+int bdev_virtio_user_scsi_dev_create(const char *name, const char *path,
+				     unsigned num_queues, unsigned queue_size,
+				     bdev_virtio_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Attach virtio-pci device. This creates a Virtio SCSI device with the same
+ * capabilities as the vhost-user equivalent. The device will be automatically
+ * scanned for exposed SCSI targets. This will result in creating possibly multiple
+ * Virtio SCSI bdevs - one for each target. Currently only one LUN per target is
+ * detected - LUN0. Note that the bdev creation is run asynchronously in the
+ * background. After it's finished, the `cb_fn` callback is called.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param pci_addr PCI address of the device to attach
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb.
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success (device scan is started) or negative error code.
+ * In case of error the \c cb_fn is not called.
+ */
+int bdev_virtio_pci_scsi_dev_create(const char *name, struct spdk_pci_addr *pci_addr,
+				    bdev_virtio_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Remove a Virtio device with given name. This will destroy all bdevs exposed
+ * by this device.
+ *
+ * \param name virtio device name
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb. Possible
+ * error codes are:
+ *  * ENODEV - couldn't find device with given name
+ *  * EBUSY - device is already being removed
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success or -ENODEV if scsi dev does not exist
+ */
+int bdev_virtio_scsi_dev_remove(const char *name,
+				bdev_virtio_remove_cb cb_fn, void *cb_arg);
+
+/**
+ * Remove a Virtio device with given name.
+ *
+ * \param bdev virtio blk device bdev
+ * \param cb_fn function to be called after removing bdev
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success, -ENODEV if bdev with 'name' does not exist or
+ * -EINVAL if bdev with 'name' is not a virtio blk device.
+ */
+int bdev_virtio_blk_dev_remove(const char *name,
+			       bdev_virtio_remove_cb cb_fn, void *cb_arg);
+
+/**
+ * List all created Virtio-SCSI devices.
+ *
+ * \param write_ctx JSON context to write into
+ */
+void bdev_virtio_scsi_dev_list(struct spdk_json_write_ctx *write_ctx);
+
+/**
+ * Connect to a vhost-user Unix domain socket and create a Virtio BLK bdev.
+ *
+ * \param name name for the virtio bdev
+ * \param path path to the socket
+ * \param num_queues max number of request virtqueues to use. `vdev` will be
+ * started successfully even if the host device supports less queues than requested.
+ * \param queue_size depth of each queue
+ * \return virtio-blk bdev or NULL
+ */
+struct spdk_bdev *bdev_virtio_user_blk_dev_create(const char *name, const char *path,
+		unsigned num_queues, unsigned queue_size);
+
+/**
+ * Attach virtio-pci device. This creates a Virtio BLK device with the same
+ * capabilities as the vhost-user equivalent.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param pci_addr PCI address of the device to attach
+ * \return virtio-blk bdev or NULL
+ */
+struct spdk_bdev *bdev_virtio_pci_blk_dev_create(const char *name,
+		struct spdk_pci_addr *pci_addr);
+
+#endif /* SPDK_BDEV_VIRTIO_H */
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c b/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c
new file mode 100644
index 00000000..598f7f15
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c
@@ -0,0 +1,707 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/virtio.h"
+
+#include <linux/virtio_blk.h>
+
+#include "bdev_virtio.h"
+
+struct virtio_blk_dev {
+	struct virtio_dev		vdev;
+	struct spdk_bdev		bdev;
+	bool				readonly;
+};
+
+struct virtio_blk_io_ctx {
+	struct iovec			iov_req;
+	struct iovec			iov_resp;
+	struct virtio_blk_outhdr	req;
+	uint8_t				resp;
+};
+
+struct bdev_virtio_blk_io_channel {
+	struct virtio_dev		*vdev;
+
+	/** Virtqueue exclusively assigned to this channel. */
+	struct virtqueue		*vq;
+
+	/** Virtio response poller. */
+	struct spdk_poller		*poller;
+};
+
+/* Features desired/implemented by this driver. */
+#define VIRTIO_BLK_DEV_SUPPORTED_FEATURES		\
+	(1ULL << VIRTIO_BLK_F_BLK_SIZE		|	\
+	 1ULL << VIRTIO_BLK_F_TOPOLOGY		|	\
+	 1ULL << VIRTIO_BLK_F_MQ		|	\
+	 1ULL << VIRTIO_BLK_F_RO		|	\
+	 1ULL << VIRTIO_RING_F_EVENT_IDX	|	\
+	 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+static int bdev_virtio_initialize(void);
+static int bdev_virtio_blk_get_ctx_size(void);
+
+static struct spdk_bdev_module virtio_blk_if = {
+	.name = "virtio_blk",
+	.module_init = bdev_virtio_initialize,
+	.get_ctx_size = bdev_virtio_blk_get_ctx_size,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&virtio_blk_if)
+
+static int bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf);
+static void bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf);
+
+static struct virtio_blk_io_ctx *
+bdev_virtio_blk_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_blk_outhdr *req;
+	uint8_t *resp;
+	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+
+	req = &io_ctx->req;
+	resp = &io_ctx->resp;
+
+	io_ctx->iov_req.iov_base = req;
+	io_ctx->iov_req.iov_len = sizeof(*req);
+
+	io_ctx->iov_resp.iov_base = resp;
+	io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+	memset(req, 0, sizeof(*req));
+	return io_ctx;
+}
+
+static void
+bdev_virtio_blk_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct bdev_virtio_blk_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
+	struct virtqueue *vq = virtio_channel->vq;
+	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+	int rc;
+
+	rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
+	if (rc == -ENOMEM) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+		return;
+	} else if (rc != 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+	virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+			       bdev_io->type == SPDK_BDEV_IO_TYPE_READ ?
+			       SPDK_VIRTIO_DESC_WR : SPDK_VIRTIO_DESC_RO);
+	virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+
+	virtqueue_req_flush(vq);
+}
+
+static void
+bdev_virtio_rw(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_blk_io_ctx *io_ctx = bdev_virtio_blk_init_io_vreq(ch, bdev_io);
+	struct virtio_blk_outhdr *req = &io_ctx->req;
+
+	req->type = bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE ?
+		    VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
+
+	req->sector = bdev_io->u.bdev.offset_blocks *
+		      spdk_bdev_get_block_size(bdev_io->bdev) / 512;
+
+	bdev_virtio_blk_send_io(ch, bdev_io);
+}
+
+static int
+_bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_blk_dev *bvdev = bdev_io->bdev->ctxt;
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_virtio_rw,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		if (bvdev->readonly) {
+			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		} else {
+			bdev_virtio_rw(ch, bdev_io);
+		}
+		return 0;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	default:
+		return -1;
+	}
+
+	SPDK_UNREACHABLE();
+}
+
+static void
+bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct virtio_blk_dev *bvdev = ctx;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		return !bvdev->readonly;
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+	default:
+		return false;
+	}
+}
+
+static struct spdk_io_channel *
+bdev_virtio_get_io_channel(void *ctx)
+{
+	struct virtio_blk_dev *bvdev = ctx;
+
+	return spdk_get_io_channel(bvdev);
+}
+
+static void
+virtio_blk_dev_unregister_cb(void *io_device)
+{
+	struct virtio_blk_dev *bvdev = io_device;
+	struct virtio_dev *vdev = &bvdev->vdev;
+
+	virtio_dev_stop(vdev);
+	virtio_dev_destruct(vdev);
+	spdk_bdev_destruct_done(&bvdev->bdev, 0);
+	free(bvdev);
+}
+
+static int
+bdev_virtio_disk_destruct(void *ctx)
+{
+	struct virtio_blk_dev *bvdev = ctx;
+
+	spdk_io_device_unregister(bvdev, virtio_blk_dev_unregister_cb);
+	return 1;
+}
+
+int
+bdev_virtio_blk_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = spdk_bdev_get_by_name(name);
+	if (bdev == NULL) {
+		return -ENODEV;
+	}
+
+	if (bdev->module != &virtio_blk_if) {
+		return -ENODEV;
+	}
+
+	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+
+	return 0;
+}
+
+static int
+bdev_virtio_dump_json_config(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct virtio_blk_dev *bvdev = ctx;
+
+	virtio_dev_dump_json_info(&bvdev->vdev, w);
+	return 0;
+}
+
+static void
+bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	struct virtio_blk_dev *bvdev = bdev->ctxt;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_virtio_dev");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "name", bvdev->vdev.name);
+	spdk_json_write_named_string(w, "dev_type", "blk");
+
+	/* Write transport specific parameters. */
+	bvdev->vdev.backend_ops->write_json_config(&bvdev->vdev, w);
+
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table virtio_fn_table = {
+	.destruct		= bdev_virtio_disk_destruct,
+	.submit_request		= bdev_virtio_submit_request,
+	.io_type_supported	= bdev_virtio_io_type_supported,
+	.get_io_channel		= bdev_virtio_get_io_channel,
+	.dump_info_json		= bdev_virtio_dump_json_config,
+	.write_config_json	= bdev_virtio_write_config_json,
+};
+
+static void
+bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+
+	spdk_bdev_io_complete(bdev_io, io_ctx->resp == VIRTIO_BLK_S_OK ?
+			      SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static int
+bdev_virtio_poll(void *arg)
+{
+	struct bdev_virtio_blk_io_channel *ch = arg;
+	void *io[32];
+	uint32_t io_len[32];
+	uint16_t i, cnt;
+
+	cnt = virtio_recv_pkts(ch->vq, io, io_len, SPDK_COUNTOF(io));
+	for (i = 0; i < cnt; ++i) {
+		bdev_virtio_io_cpl(io[i]);
+	}
+
+	return cnt;
+}
+
+static int
+bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf)
+{
+	struct virtio_blk_dev *bvdev = io_device;
+	struct virtio_dev *vdev = &bvdev->vdev;
+	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
+	struct virtqueue *vq;
+	int32_t queue_idx;
+
+	queue_idx = virtio_dev_find_and_acquire_queue(vdev, 0);
+	if (queue_idx < 0) {
+		SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
+		return -1;
+	}
+
+	vq = vdev->vqs[queue_idx];
+
+	ch->vdev = vdev;
+	ch->vq = vq;
+
+	ch->poller = spdk_poller_register(bdev_virtio_poll, ch, 0);
+	return 0;
+}
+
+static void
+bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct virtio_blk_dev *bvdev = io_device;
+	struct virtio_dev *vdev = &bvdev->vdev;
+	struct bdev_virtio_blk_io_channel *ch = ctx_buf;
+	struct virtqueue *vq = ch->vq;
+
+	spdk_poller_unregister(&ch->poller);
+	virtio_dev_release_queue(vdev, vq->vq_queue_index);
+}
+
+static int
+virtio_blk_dev_init(struct virtio_blk_dev *bvdev, uint16_t max_queues)
+{
+	struct virtio_dev *vdev = &bvdev->vdev;
+	struct spdk_bdev *bdev = &bvdev->bdev;
+	uint64_t capacity, num_blocks;
+	uint32_t block_size;
+	uint16_t host_max_queues;
+	int rc;
+
+	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) {
+		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, blk_size),
+						&block_size, sizeof(block_size));
+		if (rc) {
+			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+			return rc;
+		}
+
+		if (block_size == 0 || block_size % 512 != 0) {
+			SPDK_ERRLOG("%s: invalid block size (%"PRIu32"). Must be "
+				    "a multiple of 512.\n", vdev->name, block_size);
+			return -EIO;
+		}
+	} else {
+		block_size = 512;
+	}
+
+	rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, capacity),
+					&capacity, sizeof(capacity));
+	if (rc) {
+		SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+		return rc;
+	}
+
+	/* `capacity` is a number of 512-byte sectors. */
+	num_blocks = capacity * 512 / block_size;
+	if (num_blocks == 0) {
+		SPDK_ERRLOG("%s: size too small (size: %"PRIu64", blocksize: %"PRIu32").\n",
+			    vdev->name, capacity * 512, block_size);
+		return -EIO;
+	}
+
+	if ((capacity * 512) % block_size != 0) {
+		SPDK_WARNLOG("%s: size has been rounded down to the nearest block size boundary. "
+			     "(block size: %"PRIu32", previous size: %"PRIu64", new size: %"PRIu64")\n",
+			     vdev->name, block_size, capacity * 512, num_blocks * block_size);
+	}
+
+	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
+		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
+						&host_max_queues, sizeof(host_max_queues));
+		if (rc) {
+			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+			return rc;
+		}
+	} else {
+		host_max_queues = 1;
+	}
+
+	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_RO)) {
+		bvdev->readonly = true;
+	}
+
+	if (max_queues == 0) {
+		SPDK_ERRLOG("%s: requested 0 request queues (%"PRIu16" available).\n",
+			    vdev->name, host_max_queues);
+		return -EINVAL;
+	}
+
+	if (max_queues > host_max_queues) {
+		SPDK_WARNLOG("%s: requested %"PRIu16" request queues "
+			     "but only %"PRIu16" available.\n",
+			     vdev->name, max_queues, host_max_queues);
+		max_queues = host_max_queues;
+	}
+
+	/* bdev is tied with the virtio device; we can reuse the name */
+	bdev->name = vdev->name;
+	rc = virtio_dev_start(vdev, max_queues, 0);
+	if (rc != 0) {
+		return rc;
+	}
+
+	bdev->product_name = "VirtioBlk Disk";
+	bdev->write_cache = 0;
+	bdev->blocklen = block_size;
+	bdev->blockcnt = num_blocks;
+
+	bdev->ctxt = bvdev;
+	bdev->fn_table = &virtio_fn_table;
+	bdev->module = &virtio_blk_if;
+
+	spdk_io_device_register(bvdev, bdev_virtio_blk_ch_create_cb,
+				bdev_virtio_blk_ch_destroy_cb,
+				sizeof(struct bdev_virtio_blk_io_channel),
+				vdev->name);
+
+	rc = spdk_bdev_register(bdev);
+	if (rc) {
+		SPDK_ERRLOG("Failed to register bdev name=%s\n", bdev->name);
+		spdk_io_device_unregister(bvdev, NULL);
+		virtio_dev_stop(vdev);
+		return rc;
+	}
+
+	return 0;
+}
+
+static struct virtio_blk_dev *
+virtio_pci_blk_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
+{
+	static int pci_dev_counter = 0;
+	struct virtio_blk_dev *bvdev;
+	struct virtio_dev *vdev;
+	char *default_name = NULL;
+	uint16_t num_queues;
+	int rc;
+
+	bvdev = calloc(1, sizeof(*bvdev));
+	if (bvdev == NULL) {
+		SPDK_ERRLOG("virtio device calloc failed\n");
+		return NULL;
+	}
+	vdev = &bvdev->vdev;
+
+	if (name == NULL) {
+		default_name = spdk_sprintf_alloc("VirtioBlk%"PRIu32, pci_dev_counter++);
+		if (default_name == NULL) {
+			free(vdev);
+			return NULL;
+		}
+		name = default_name;
+	}
+
+	rc = virtio_pci_dev_init(vdev, name, pci_ctx);
+	free(default_name);
+
+	if (rc != 0) {
+		free(bvdev);
+		return NULL;
+	}
+
+	rc = virtio_dev_reset(vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
+	if (rc != 0) {
+		virtio_dev_destruct(vdev);
+		free(bvdev);
+		return NULL;
+	}
+
+	/* TODO: add a way to limit usable virtqueues */
+	if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
+		rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
+						&num_queues, sizeof(num_queues));
+		if (rc) {
+			SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+			virtio_dev_destruct(vdev);
+			free(bvdev);
+			return NULL;
+		}
+	} else {
+		num_queues = 1;
+	}
+
+	rc = virtio_blk_dev_init(bvdev, num_queues);
+	if (rc != 0) {
+		virtio_dev_destruct(vdev);
+		free(bvdev);
+		return NULL;
+	}
+
+	return bvdev;
+}
+
+static struct virtio_blk_dev *
+virtio_user_blk_dev_create(const char *name, const char *path,
+			   uint16_t num_queues, uint32_t queue_size)
+{
+	struct virtio_blk_dev *bvdev;
+	int rc;
+
+	bvdev = calloc(1, sizeof(*bvdev));
+	if (bvdev == NULL) {
+		SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
+		return NULL;
+	}
+
+	rc = virtio_user_dev_init(&bvdev->vdev, name, path, queue_size);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
+		free(bvdev);
+		return NULL;
+	}
+
+	rc = virtio_dev_reset(&bvdev->vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
+	if (rc != 0) {
+		virtio_dev_destruct(&bvdev->vdev);
+		free(bvdev);
+		return NULL;
+	}
+
+	rc = virtio_blk_dev_init(bvdev, num_queues);
+	if (rc != 0) {
+		virtio_dev_destruct(&bvdev->vdev);
+		free(bvdev);
+		return NULL;
+	}
+
+	return bvdev;
+}
+
+struct bdev_virtio_pci_dev_create_ctx {
+	const char *name;
+	struct virtio_blk_dev *ret;
+};
+
+static int
+bdev_virtio_pci_blk_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+	struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
+
+	create_ctx->ret = virtio_pci_blk_dev_create(create_ctx->name, pci_ctx);
+	if (create_ctx->ret == NULL) {
+		return -1;
+	}
+
+	return 0;
+}
+
+struct spdk_bdev *
+bdev_virtio_pci_blk_dev_create(const char *name, struct spdk_pci_addr *pci_addr)
+{
+	struct bdev_virtio_pci_dev_create_ctx create_ctx;
+
+	create_ctx.name = name;
+	create_ctx.ret = NULL;
+
+	virtio_pci_dev_attach(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
+			      PCI_DEVICE_ID_VIRTIO_BLK_MODERN, pci_addr);
+
+	if (create_ctx.ret == NULL) {
+		return NULL;
+	}
+
+	return &create_ctx.ret->bdev;
+}
+
+static int
+virtio_pci_blk_dev_enumerate_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+	struct virtio_blk_dev *bvdev;
+
+	bvdev = virtio_pci_blk_dev_create(NULL, pci_ctx);
+	return bvdev == NULL ? -1 : 0;
+}
+
+static int
+bdev_virtio_initialize(void)
+{
+	struct spdk_conf_section *sp;
+	struct virtio_blk_dev *bvdev;
+	char *default_name = NULL;
+	char *path, *type, *name;
+	unsigned vdev_num;
+	int num_queues;
+	bool enable_pci;
+	int rc = 0;
+
+	for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+		if (!spdk_conf_section_match_prefix(sp, "VirtioUser")) {
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VirtioUser%u", &vdev_num) != 1) {
+			SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+				    spdk_conf_section_get_name(sp));
+			return -1;
+		}
+
+		path = spdk_conf_section_get_val(sp, "Path");
+		if (path == NULL) {
+			SPDK_ERRLOG("VirtioUserBlk%u: missing Path\n", vdev_num);
+			return -1;
+		}
+
+		type = spdk_conf_section_get_val(sp, "Type");
+		if (type == NULL || strcmp(type, "Blk") != 0) {
+			continue;
+		}
+
+		num_queues = spdk_conf_section_get_intval(sp, "Queues");
+		if (num_queues < 1) {
+			num_queues = 1;
+		}
+
+		name = spdk_conf_section_get_val(sp, "Name");
+		if (name == NULL) {
+			default_name = spdk_sprintf_alloc("VirtioBlk%u", vdev_num);
+			name = default_name;
+		}
+
+		bvdev = virtio_user_blk_dev_create(name, path, num_queues, 512);
+		free(default_name);
+		default_name = NULL;
+
+		if (bvdev == NULL) {
+			return -1;
+		}
+	}
+
+	sp = spdk_conf_find_section(NULL, "VirtioPci");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	enable_pci = spdk_conf_section_get_boolval(sp, "Enable", false);
+	if (enable_pci) {
+		rc = virtio_pci_dev_enumerate(virtio_pci_blk_dev_enumerate_cb, NULL,
+					      PCI_DEVICE_ID_VIRTIO_BLK_MODERN);
+	}
+
+	return rc;
+}
+
+struct spdk_bdev *
+bdev_virtio_user_blk_dev_create(const char *name, const char *path,
+				unsigned num_queues, unsigned queue_size)
+{
+	struct virtio_blk_dev *bvdev;
+
+	bvdev = virtio_user_blk_dev_create(name, path, num_queues, queue_size);
+	if (bvdev == NULL) {
+		return NULL;
+	}
+
+	return &bvdev->bdev;
+}
+
+static int
+bdev_virtio_blk_get_ctx_size(void)
+{
+	return sizeof(struct virtio_blk_io_ctx);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio_blk", SPDK_LOG_VIRTIO_BLK)
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c b/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c
new file mode 100644
index 00000000..e96fb42a
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c
@@ -0,0 +1,613 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_virtio.h"
+
+#define SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT		1
+#define SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE		512
+
+struct rpc_construct_virtio_scsi_dev {
+	char *path;
+	char *pci_address;
+	char *name;
+	uint32_t vq_count;
+	uint32_t vq_size;
+	struct spdk_jsonrpc_request *request;
+
+};
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_user_scsi_dev[] = {
+	{"path", offsetof(struct rpc_construct_virtio_scsi_dev, path), spdk_json_decode_string },
+	{"name", offsetof(struct rpc_construct_virtio_scsi_dev, name), spdk_json_decode_string },
+	{"vq_count", offsetof(struct rpc_construct_virtio_scsi_dev, vq_size), spdk_json_decode_uint32, true },
+	{"vq_size", offsetof(struct rpc_construct_virtio_scsi_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+free_rpc_construct_virtio_scsi_dev(struct rpc_construct_virtio_scsi_dev *req)
+{
+	if (!req) {
+		return;
+	}
+
+	free(req->path);
+	free(req->pci_address);
+	free(req->name);
+	free(req);
+}
+
+static void
+rpc_construct_virtio_scsi_dev_cb(void *ctx, int result, struct spdk_bdev **bdevs, size_t cnt)
+{
+	struct rpc_construct_virtio_scsi_dev *req = ctx;
+	struct spdk_json_write_ctx *w;
+	size_t i;
+
+	if (result) {
+		spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(-result));
+		free_rpc_construct_virtio_scsi_dev(req);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(req->request);
+	if (w) {
+		spdk_json_write_array_begin(w);
+
+		for (i = 0; i < cnt; i++) {
+			spdk_json_write_string(w, spdk_bdev_get_name(bdevs[i]));
+		}
+
+		spdk_json_write_array_end(w);
+		spdk_jsonrpc_end_result(req->request, w);
+	}
+
+	free_rpc_construct_virtio_scsi_dev(ctx);
+}
+
+static void
+spdk_rpc_create_virtio_user_scsi_bdev(struct spdk_jsonrpc_request *request,
+				      const struct spdk_json_val *params)
+{
+	struct rpc_construct_virtio_scsi_dev *req;
+	int rc;
+
+	SPDK_WARNLOG("construct_virtio_user_scsi_bdev command has been deprecated and will be removed "
+		     "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->pci_address = NULL;
+	req->vq_count = SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT;
+	req->vq_size = SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE;
+
+	if (spdk_json_decode_object(params, rpc_construct_virtio_user_scsi_dev,
+				    SPDK_COUNTOF(rpc_construct_virtio_user_scsi_dev),
+				    req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	req->request = request;
+	rc = bdev_virtio_user_scsi_dev_create(req->name, req->path, req->vq_count, req->vq_size,
+					      rpc_construct_virtio_scsi_dev_cb, req);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_construct_virtio_scsi_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_user_scsi_bdev", spdk_rpc_create_virtio_user_scsi_bdev,
+		  SPDK_RPC_RUNTIME);
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_pci_scsi_dev[] = {
+	{"pci_address", offsetof(struct rpc_construct_virtio_scsi_dev, pci_address), spdk_json_decode_string },
+	{"name", offsetof(struct rpc_construct_virtio_scsi_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_construct_virtio_pci_scsi_dev(struct spdk_jsonrpc_request *request,
+				       const struct spdk_json_val *params)
+{
+	struct rpc_construct_virtio_scsi_dev *req;
+	struct spdk_pci_addr pci_addr;
+	int rc;
+
+	SPDK_WARNLOG("construct_virtio_pci_scsi_bdev command has been deprecated and will be removed "
+		     "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->path = NULL;
+
+	if (spdk_json_decode_object(params, rpc_construct_virtio_pci_scsi_dev,
+				    SPDK_COUNTOF(rpc_construct_virtio_pci_scsi_dev),
+				    req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (spdk_pci_addr_parse(&pci_addr, req->pci_address) != 0) {
+		SPDK_ERRLOG("Invalid PCI address '%s'\n", req->pci_address);
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	req->request = request;
+	rc = bdev_virtio_pci_scsi_dev_create(req->name, &pci_addr,
+					     rpc_construct_virtio_scsi_dev_cb, req);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_construct_virtio_scsi_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_pci_scsi_bdev", spdk_rpc_construct_virtio_pci_scsi_dev,
+		  SPDK_RPC_RUNTIME);
+
+struct rpc_remove_virtio_dev {
+	char *name;
+};
+
+static const struct spdk_json_object_decoder rpc_remove_virtio_dev[] = {
+	{"name", offsetof(struct rpc_remove_virtio_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_remove_virtio_scsi_bdev_cb(void *ctx, int errnum)
+{
+	struct spdk_jsonrpc_request *request = ctx;
+	struct spdk_json_write_ctx *w;
+
+	if (errnum != 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(-errnum));
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_remove_virtio_scsi_bdev(struct spdk_jsonrpc_request *request,
+				 const struct spdk_json_val *params)
+{
+	struct rpc_remove_virtio_dev req = {NULL};
+	int rc;
+
+	SPDK_WARNLOG("remove_virtio_scsi_bdev command has been deprecated and will be removed "
+		     "in the subsequent release. Please use remove_virtio_bdev instead.\n");
+
+	if (spdk_json_decode_object(params, rpc_remove_virtio_dev,
+				    SPDK_COUNTOF(rpc_remove_virtio_dev),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = bdev_virtio_scsi_dev_remove(req.name, spdk_rpc_remove_virtio_scsi_bdev_cb, request);
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	free(req.name);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free(req.name);
+}
+SPDK_RPC_REGISTER("remove_virtio_scsi_bdev", spdk_rpc_remove_virtio_scsi_bdev, SPDK_RPC_RUNTIME);
+
+static void
+spdk_rpc_remove_virtio_bdev_cb(void *ctx, int errnum)
+{
+	struct spdk_jsonrpc_request *request = ctx;
+	struct spdk_json_write_ctx *w;
+
+	if (errnum != 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(-errnum));
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_remove_virtio_bdev(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_remove_virtio_dev req = {NULL};
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_remove_virtio_dev,
+				    SPDK_COUNTOF(rpc_remove_virtio_dev),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = bdev_virtio_blk_dev_remove(req.name, spdk_rpc_remove_virtio_bdev_cb, request);
+	if (rc == -ENODEV) {
+		rc = bdev_virtio_scsi_dev_remove(req.name, spdk_rpc_remove_virtio_bdev_cb, request);
+	}
+
+	if (rc != 0) {
+		goto invalid;
+	}
+
+	free(req.name);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free(req.name);
+}
+SPDK_RPC_REGISTER("remove_virtio_bdev", spdk_rpc_remove_virtio_bdev, SPDK_RPC_RUNTIME);
+
+static void
+spdk_rpc_get_virtio_scsi_devs(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_virtio_scsi_devs requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	bdev_virtio_scsi_dev_list(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_virtio_scsi_devs", spdk_rpc_get_virtio_scsi_devs, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_virtio_blk_dev {
+	char *path;
+	char *pci_address;
+	char *name;
+	uint32_t vq_count;
+	uint32_t vq_size;
+};
+
+static void
+free_rpc_construct_virtio_blk_dev(struct rpc_construct_virtio_blk_dev *req)
+{
+	free(req->path);
+	free(req->pci_address);
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_user_blk_dev[] = {
+	{"path", offsetof(struct rpc_construct_virtio_blk_dev, path), spdk_json_decode_string },
+	{"name", offsetof(struct rpc_construct_virtio_blk_dev, name), spdk_json_decode_string },
+	{"vq_count", offsetof(struct rpc_construct_virtio_blk_dev, vq_count), spdk_json_decode_uint32, true },
+	{"vq_size", offsetof(struct rpc_construct_virtio_blk_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+spdk_rpc_create_virtio_user_blk_bdev(struct spdk_jsonrpc_request *request,
+				     const struct spdk_json_val *params)
+{
+	struct rpc_construct_virtio_blk_dev req = {0};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev;
+	int rc;
+
+	req.pci_address = NULL;
+	req.vq_count = SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT;
+	req.vq_size = SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE;
+
+	SPDK_WARNLOG("construct_virtio_user_blk_bdev command has been deprecated and will be removed "
+		     "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+	if (spdk_json_decode_object(params, rpc_construct_virtio_user_blk_dev,
+				    SPDK_COUNTOF(rpc_construct_virtio_user_blk_dev),
+				    &req)) {
+		free_rpc_construct_virtio_blk_dev(&req);
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = bdev_virtio_user_blk_dev_create(req.name, req.path, req.vq_count, req.vq_size);
+	free_rpc_construct_virtio_blk_dev(&req);
+	if (bdev == NULL) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_virtio_user_blk_bdev", spdk_rpc_create_virtio_user_blk_bdev,
+		  SPDK_RPC_RUNTIME);
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_pci_blk_dev[] = {
+	{"pci_address", offsetof(struct rpc_construct_virtio_blk_dev, pci_address), spdk_json_decode_string },
+	{"name", offsetof(struct rpc_construct_virtio_blk_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_create_virtio_pci_blk_bdev(struct spdk_jsonrpc_request *request,
+				    const struct spdk_json_val *params)
+{
+	struct rpc_construct_virtio_blk_dev req = {0};
+	struct spdk_json_write_ctx *w;
+	struct spdk_bdev *bdev;
+	struct spdk_pci_addr pci_addr;
+	int rc;
+
+	req.pci_address = NULL;
+
+	SPDK_WARNLOG("construct_virtio_pci_blk_bdev command has been deprecated and will be removed "
+		     "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+	if (spdk_json_decode_object(params, rpc_construct_virtio_pci_blk_dev,
+				    SPDK_COUNTOF(rpc_construct_virtio_pci_blk_dev),
+				    &req)) {
+		free_rpc_construct_virtio_blk_dev(&req);
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (spdk_pci_addr_parse(&pci_addr, req.pci_address) != 0) {
+		SPDK_ERRLOG("Invalid PCI address '%s'\n", req.pci_address);
+		free_rpc_construct_virtio_blk_dev(&req);
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	bdev = bdev_virtio_pci_blk_dev_create(req.name, &pci_addr);
+	free_rpc_construct_virtio_blk_dev(&req);
+	if (bdev == NULL) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_virtio_pci_blk_bdev", spdk_rpc_create_virtio_pci_blk_bdev,
+		  SPDK_RPC_RUNTIME);
+
+struct rpc_construct_virtio_dev {
+	char *name;
+	char *trtype;
+	char *traddr;
+	char *dev_type;
+	uint32_t vq_count;
+	uint32_t vq_size;
+	struct spdk_jsonrpc_request *request;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_dev[] = {
+	{"name", offsetof(struct rpc_construct_virtio_dev, name), spdk_json_decode_string },
+	{"trtype", offsetof(struct rpc_construct_virtio_dev, trtype), spdk_json_decode_string },
+	{"traddr", offsetof(struct rpc_construct_virtio_dev, traddr), spdk_json_decode_string },
+	{"dev_type", offsetof(struct rpc_construct_virtio_dev, dev_type), spdk_json_decode_string },
+	{"vq_count", offsetof(struct rpc_construct_virtio_dev, vq_count), spdk_json_decode_uint32, true },
+	{"vq_size", offsetof(struct rpc_construct_virtio_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+free_rpc_construct_virtio_dev(struct rpc_construct_virtio_dev *req)
+{
+	free(req->name);
+	free(req->trtype);
+	free(req->traddr);
+	free(req->dev_type);
+	free(req);
+}
+
+static void
+spdk_rpc_create_virtio_dev_cb(void *ctx, int result, struct spdk_bdev **bdevs, size_t cnt)
+{
+	struct rpc_construct_virtio_dev *req = ctx;
+	struct spdk_json_write_ctx *w;
+	size_t i;
+
+	if (result) {
+		spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(-result));
+		free_rpc_construct_virtio_dev(req);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(req->request);
+	if (w) {
+		spdk_json_write_array_begin(w);
+
+		for (i = 0; i < cnt; i++) {
+			spdk_json_write_string(w, spdk_bdev_get_name(bdevs[i]));
+		}
+
+		spdk_json_write_array_end(w);
+		spdk_jsonrpc_end_result(req->request, w);
+	}
+
+	free_rpc_construct_virtio_dev(ctx);
+}
+
+static void
+spdk_rpc_create_virtio_dev(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_construct_virtio_dev *req;
+	struct spdk_bdev *bdev;
+	struct spdk_pci_addr pci_addr;
+	bool pci;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("calloc() failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(ENOMEM));
+		return;
+	}
+
+	if (spdk_json_decode_object(params, rpc_construct_virtio_dev,
+				    SPDK_COUNTOF(rpc_construct_virtio_dev),
+				    req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(EINVAL));
+		goto invalid;
+	}
+
+	if (strcmp(req->trtype, "pci") == 0) {
+		if (req->vq_count != 0 || req->vq_size != 0) {
+			SPDK_ERRLOG("VQ count or size is not allowed for PCI transport type\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "vq_count or vq_size is not allowed for PCI transport type.");
+			goto invalid;
+		}
+
+		if (spdk_pci_addr_parse(&pci_addr, req->traddr) != 0) {
+			SPDK_ERRLOG("Invalid PCI address '%s'\n", req->traddr);
+			spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							     "Invalid PCI address '%s'", req->traddr);
+			goto invalid;
+		}
+
+		pci = true;
+	} else if (strcmp(req->trtype, "user") == 0) {
+		req->vq_count = req->vq_count == 0 ? SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT : req->vq_count;
+		req->vq_size = req->vq_size == 0 ? SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE : req->vq_size;
+		pci = false;
+	} else {
+		SPDK_ERRLOG("Invalid trtype '%s'\n", req->trtype);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Invalid trtype '%s'", req->trtype);
+		goto invalid;
+	}
+
+	req->request = request;
+	if (strcmp(req->dev_type, "blk") == 0) {
+		if (pci) {
+			bdev = bdev_virtio_pci_blk_dev_create(req->name, &pci_addr);
+		} else {
+			bdev = bdev_virtio_user_blk_dev_create(req->name, req->traddr, req->vq_count, req->vq_size);
+		}
+
+		/* Virtio blk doesn't use callback so call it manually to send result. */
+		rc = bdev ? 0 : -EINVAL;
+		spdk_rpc_create_virtio_dev_cb(req, rc, &bdev, bdev ? 1 : 0);
+	} else if (strcmp(req->dev_type, "scsi") == 0) {
+		if (pci) {
+			rc = bdev_virtio_pci_scsi_dev_create(req->name, &pci_addr, spdk_rpc_create_virtio_dev_cb, req);
+		} else {
+			rc = bdev_virtio_user_scsi_dev_create(req->name, req->traddr, req->vq_count, req->vq_size,
+							      spdk_rpc_create_virtio_dev_cb, req);
+		}
+
+		if (rc < 0) {
+			/* In case of error callback is not called so do it manually to send result. */
+			spdk_rpc_create_virtio_dev_cb(req, rc, NULL, 0);
+		}
+	} else {
+		SPDK_ERRLOG("Invalid dev_type '%s'\n", req->dev_type);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Invalid dev_type '%s'", req->dev_type);
+		goto invalid;
+	}
+
+	return;
+invalid:
+	free_rpc_construct_virtio_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_dev", spdk_rpc_create_virtio_dev, SPDK_RPC_RUNTIME);
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c b/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c
new file mode 100644
index 00000000..4ff3db4a
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c
@@ -0,0 +1,2017 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/virtio.h"
+
+#include <linux/virtio_scsi.h>
+
+#include "bdev_virtio.h"
+
+#define BDEV_VIRTIO_MAX_TARGET 64
+#define BDEV_VIRTIO_SCAN_PAYLOAD_SIZE 256
+#define MGMT_POLL_PERIOD_US (1000 * 5)
+#define CTRLQ_RING_SIZE 16
+#define SCAN_REQUEST_RETRIES 5
+
+/* Number of non-request queues - eventq and controlq */
+#define SPDK_VIRTIO_SCSI_QUEUE_NUM_FIXED 2
+
+#define VIRTIO_SCSI_EVENTQ_BUFFER_COUNT 16
+
+#define VIRTIO_SCSI_CONTROLQ	0
+#define VIRTIO_SCSI_EVENTQ	1
+#define VIRTIO_SCSI_REQUESTQ	2
+
+static int bdev_virtio_initialize(void);
+static void bdev_virtio_finish(void);
+
+struct virtio_scsi_dev {
+	/* Generic virtio device data. */
+	struct virtio_dev		vdev;
+
+	/** Detected SCSI LUNs */
+	TAILQ_HEAD(, virtio_scsi_disk)	luns;
+
+	/** Context for the SCSI target scan. */
+	struct virtio_scsi_scan_base	*scan_ctx;
+
+	/** Controlq poller. */
+	struct spdk_poller		*mgmt_poller;
+
+	/** Controlq messages to be sent. */
+	struct spdk_ring		*ctrlq_ring;
+
+	/** Buffers for the eventq. */
+	struct virtio_scsi_eventq_io	*eventq_ios;
+
+	/** Device marked for removal. */
+	bool				removed;
+
+	/** Callback to be called after vdev removal. */
+	bdev_virtio_remove_cb		remove_cb;
+
+	/** Context for the `remove_cb`. */
+	void				*remove_ctx;
+
+	TAILQ_ENTRY(virtio_scsi_dev) tailq;
+};
+
+struct virtio_scsi_io_ctx {
+	struct iovec			iov_req;
+	struct iovec			iov_resp;
+	union {
+		struct virtio_scsi_cmd_req req;
+		struct virtio_scsi_ctrl_tmf_req tmf_req;
+	};
+	union {
+		struct virtio_scsi_cmd_resp resp;
+		struct virtio_scsi_ctrl_tmf_resp tmf_resp;
+	};
+};
+
+struct virtio_scsi_eventq_io {
+	struct iovec			iov;
+	struct virtio_scsi_event	ev;
+};
+
+struct virtio_scsi_scan_info {
+	uint64_t			num_blocks;
+	uint32_t			block_size;
+	uint8_t				target;
+	bool				unmap_supported;
+	TAILQ_ENTRY(virtio_scsi_scan_info) tailq;
+};
+
+struct virtio_scsi_scan_base {
+	struct virtio_scsi_dev		*svdev;
+
+	/** I/O channel used for the scan I/O. */
+	struct bdev_virtio_io_channel	*channel;
+
+	bdev_virtio_create_cb		cb_fn;
+	void				*cb_arg;
+
+	/** Scan all targets on the device. */
+	bool				full_scan;
+
+	/** Start a full rescan after receiving next scan I/O response. */
+	bool				restart;
+
+	/** Additional targets to be (re)scanned. */
+	TAILQ_HEAD(, virtio_scsi_scan_info) scan_queue;
+
+	/** Remaining attempts for sending the current request. */
+	unsigned                        retries;
+
+	/** If set, the last scan I/O needs to be resent */
+	bool				needs_resend;
+
+	struct virtio_scsi_io_ctx	io_ctx;
+	struct iovec			iov;
+	uint8_t				payload[BDEV_VIRTIO_SCAN_PAYLOAD_SIZE];
+
+	/** Scan results for the current target. */
+	struct virtio_scsi_scan_info	info;
+};
+
+struct virtio_scsi_disk {
+	struct spdk_bdev		bdev;
+	struct virtio_scsi_dev		*svdev;
+	struct virtio_scsi_scan_info	info;
+
+	/** Descriptor opened just to be notified of external bdev hotremove. */
+	struct spdk_bdev_desc		*notify_desc;
+
+	/** Disk marked for removal. */
+	bool				removed;
+	TAILQ_ENTRY(virtio_scsi_disk)	link;
+};
+
+struct bdev_virtio_io_channel {
+	struct virtio_scsi_dev	*svdev;
+
+	/** Virtqueue exclusively assigned to this channel. */
+	struct virtqueue	*vq;
+
+	/** Virtio response poller. */
+	struct spdk_poller	*poller;
+};
+
+static TAILQ_HEAD(, virtio_scsi_dev) g_virtio_scsi_devs =
+	TAILQ_HEAD_INITIALIZER(g_virtio_scsi_devs);
+
+static pthread_mutex_t g_virtio_scsi_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/** Module finish in progress */
+static bool g_bdev_virtio_finish = false;
+
+/* Features desired/implemented by this driver. */
+#define VIRTIO_SCSI_DEV_SUPPORTED_FEATURES		\
+	(1ULL << VIRTIO_SCSI_F_INOUT		|	\
+	 1ULL << VIRTIO_SCSI_F_HOTPLUG		|	\
+	 1ULL << VIRTIO_RING_F_EVENT_IDX	|	\
+	 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+static void virtio_scsi_dev_unregister_cb(void *io_device);
+static void virtio_scsi_dev_remove(struct virtio_scsi_dev *svdev,
+				   bdev_virtio_remove_cb cb_fn, void *cb_arg);
+static int bdev_virtio_scsi_ch_create_cb(void *io_device, void *ctx_buf);
+static void bdev_virtio_scsi_ch_destroy_cb(void *io_device, void *ctx_buf);
+static void process_scan_resp(struct virtio_scsi_scan_base *base);
+static int bdev_virtio_mgmt_poll(void *arg);
+
+static int
+virtio_scsi_dev_send_eventq_io(struct virtqueue *vq, struct virtio_scsi_eventq_io *io)
+{
+	int rc;
+
+	rc = virtqueue_req_start(vq, io, 1);
+	if (rc != 0) {
+		return -1;
+	}
+
+	virtqueue_req_add_iovs(vq, &io->iov, 1, SPDK_VIRTIO_DESC_WR);
+	virtqueue_req_flush(vq);
+
+	return 0;
+}
+
+static int
+virtio_scsi_dev_init(struct virtio_scsi_dev *svdev, uint16_t max_queues)
+{
+	struct virtio_dev *vdev = &svdev->vdev;
+	struct spdk_ring *ctrlq_ring;
+	struct virtio_scsi_eventq_io *eventq_io;
+	struct virtqueue *eventq;
+	uint16_t i, num_events;
+	int rc;
+
+	rc = virtio_dev_reset(vdev, VIRTIO_SCSI_DEV_SUPPORTED_FEATURES);
+	if (rc != 0) {
+		return rc;
+	}
+
+	rc = virtio_dev_start(vdev, max_queues, SPDK_VIRTIO_SCSI_QUEUE_NUM_FIXED);
+	if (rc != 0) {
+		return rc;
+	}
+
+	ctrlq_ring = spdk_ring_create(SPDK_RING_TYPE_MP_SC, CTRLQ_RING_SIZE,
+				      SPDK_ENV_SOCKET_ID_ANY);
+	if (ctrlq_ring == NULL) {
+		SPDK_ERRLOG("Failed to allocate send ring for the controlq.\n");
+		return -1;
+	}
+
+	rc = virtio_dev_acquire_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to acquire the controlq.\n");
+		spdk_ring_free(ctrlq_ring);
+		return -1;
+	}
+
+	rc = virtio_dev_acquire_queue(vdev, VIRTIO_SCSI_EVENTQ);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to acquire the eventq.\n");
+		virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+		spdk_ring_free(ctrlq_ring);
+		return -1;
+	}
+
+	eventq = vdev->vqs[VIRTIO_SCSI_EVENTQ];
+	num_events = spdk_min(eventq->vq_nentries, VIRTIO_SCSI_EVENTQ_BUFFER_COUNT);
+	svdev->eventq_ios = spdk_dma_zmalloc(sizeof(*svdev->eventq_ios) * num_events,
+					     0, NULL);
+	if (svdev->eventq_ios == NULL) {
+		SPDK_ERRLOG("cannot allocate memory for %"PRIu16" eventq buffers\n",
+			    num_events);
+		virtio_dev_release_queue(vdev, VIRTIO_SCSI_EVENTQ);
+		virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+		spdk_ring_free(ctrlq_ring);
+		return -1;
+	}
+
+	for (i = 0; i < num_events; i++) {
+		eventq_io = &svdev->eventq_ios[i];
+		eventq_io->iov.iov_base = &eventq_io->ev;
+		eventq_io->iov.iov_len = sizeof(eventq_io->ev);
+		virtio_scsi_dev_send_eventq_io(eventq, eventq_io);
+	}
+
+	svdev->ctrlq_ring = ctrlq_ring;
+
+	svdev->mgmt_poller = spdk_poller_register(bdev_virtio_mgmt_poll, svdev,
+			     MGMT_POLL_PERIOD_US);
+
+	TAILQ_INIT(&svdev->luns);
+	svdev->scan_ctx = NULL;
+	svdev->removed = false;
+	svdev->remove_cb = NULL;
+	svdev->remove_ctx = NULL;
+
+	spdk_io_device_register(svdev, bdev_virtio_scsi_ch_create_cb,
+				bdev_virtio_scsi_ch_destroy_cb,
+				sizeof(struct bdev_virtio_io_channel),
+				svdev->vdev.name);
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_INSERT_TAIL(&g_virtio_scsi_devs, svdev, tailq);
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+	return 0;
+}
+
+static struct virtio_scsi_dev *
+virtio_pci_scsi_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
+{
+	static int pci_dev_counter = 0;
+	struct virtio_scsi_dev *svdev;
+	struct virtio_dev *vdev;
+	char *default_name = NULL;
+	uint32_t num_queues;
+	int rc;
+
+	svdev = calloc(1, sizeof(*svdev));
+	if (svdev == NULL) {
+		SPDK_ERRLOG("virtio device calloc failed\n");
+		return NULL;
+	}
+
+	vdev = &svdev->vdev;
+	if (name == NULL) {
+		default_name = spdk_sprintf_alloc("VirtioScsi%"PRIu32, pci_dev_counter++);
+		if (default_name == NULL) {
+			free(vdev);
+			return NULL;
+		}
+		name = default_name;
+	}
+
+	rc = virtio_pci_dev_init(vdev, name, pci_ctx);
+	free(default_name);
+
+	if (rc != 0) {
+		free(svdev);
+		return NULL;
+	}
+
+	rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_scsi_config, num_queues),
+					&num_queues, sizeof(num_queues));
+	if (rc) {
+		SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+		virtio_dev_destruct(vdev);
+		free(svdev);
+		return NULL;
+	}
+
+	rc = virtio_scsi_dev_init(svdev, num_queues);
+	if (rc != 0) {
+		virtio_dev_destruct(vdev);
+		free(svdev);
+		return NULL;
+	}
+
+	return svdev;
+}
+
+static struct virtio_scsi_dev *
+virtio_user_scsi_dev_create(const char *name, const char *path,
+			    uint16_t num_queues, uint32_t queue_size)
+{
+	struct virtio_scsi_dev *svdev;
+	struct virtio_dev *vdev;
+	int rc;
+
+	svdev = calloc(1, sizeof(*svdev));
+	if (svdev == NULL) {
+		SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
+		return NULL;
+	}
+
+	vdev = &svdev->vdev;
+	rc = virtio_user_dev_init(vdev, name, path, queue_size);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
+		free(svdev);
+		return NULL;
+	}
+
+	rc = virtio_scsi_dev_init(svdev, num_queues);
+	if (rc != 0) {
+		virtio_dev_destruct(vdev);
+		free(svdev);
+		return NULL;
+	}
+
+	return svdev;
+}
+
+static struct virtio_scsi_disk *
+virtio_scsi_dev_get_disk_by_id(struct virtio_scsi_dev *svdev, uint8_t target_id)
+{
+	struct virtio_scsi_disk *disk;
+
+	TAILQ_FOREACH(disk, &svdev->luns, link) {
+		if (disk->info.target == target_id) {
+			return disk;
+		}
+	}
+
+	return NULL;
+}
+
+static int virtio_scsi_dev_scan(struct virtio_scsi_dev *svdev,
+				bdev_virtio_create_cb cb_fn, void *cb_arg);
+static int send_scan_io(struct virtio_scsi_scan_base *base);
+static void _virtio_scsi_dev_scan_tgt(struct virtio_scsi_scan_base *base, uint8_t target);
+static int _virtio_scsi_dev_scan_next(struct virtio_scsi_scan_base *base, int rc);
+static void _virtio_scsi_dev_scan_finish(struct virtio_scsi_scan_base *base, int errnum);
+static int virtio_scsi_dev_scan_tgt(struct virtio_scsi_dev *svdev, uint8_t target);
+
+static int
+bdev_virtio_get_ctx_size(void)
+{
+	return sizeof(struct virtio_scsi_io_ctx);
+}
+
+static int
+bdev_virtio_scsi_config_json(struct spdk_json_write_ctx *w)
+{
+	struct virtio_scsi_dev *svdev;
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "method", "construct_virtio_dev");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "name", svdev->vdev.name);
+		spdk_json_write_named_string(w, "dev_type", "scsi");
+
+		/* Write transport specific parameters. */
+		svdev->vdev.backend_ops->write_json_config(&svdev->vdev, w);
+
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+
+	}
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+	return 0;
+}
+
+
+static struct spdk_bdev_module virtio_scsi_if = {
+	.name = "virtio_scsi",
+	.module_init = bdev_virtio_initialize,
+	.module_fini = bdev_virtio_finish,
+	.get_ctx_size = bdev_virtio_get_ctx_size,
+	.config_json = bdev_virtio_scsi_config_json,
+	.async_init = true,
+	.async_fini = true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&virtio_scsi_if)
+
+static struct virtio_scsi_io_ctx *
+bdev_virtio_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_cmd_req *req;
+	struct virtio_scsi_cmd_resp *resp;
+	struct virtio_scsi_disk *disk = (struct virtio_scsi_disk *)bdev_io->bdev;
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+	req = &io_ctx->req;
+	resp = &io_ctx->resp;
+
+	io_ctx->iov_req.iov_base = req;
+	io_ctx->iov_req.iov_len = sizeof(*req);
+
+	io_ctx->iov_resp.iov_base = resp;
+	io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+	memset(req, 0, sizeof(*req));
+	req->lun[0] = 1;
+	req->lun[1] = disk->info.target;
+
+	return io_ctx;
+}
+
+static struct virtio_scsi_io_ctx *
+bdev_virtio_init_tmf_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_ctrl_tmf_req *tmf_req;
+	struct virtio_scsi_ctrl_tmf_resp *tmf_resp;
+	struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+	tmf_req = &io_ctx->tmf_req;
+	tmf_resp = &io_ctx->tmf_resp;
+
+	io_ctx->iov_req.iov_base = tmf_req;
+	io_ctx->iov_req.iov_len = sizeof(*tmf_req);
+	io_ctx->iov_resp.iov_base = tmf_resp;
+	io_ctx->iov_resp.iov_len = sizeof(*tmf_resp);
+
+	memset(tmf_req, 0, sizeof(*tmf_req));
+	tmf_req->lun[0] = 1;
+	tmf_req->lun[1] = disk->info.target;
+
+	return io_ctx;
+}
+
+static void
+bdev_virtio_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct bdev_virtio_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
+	struct virtqueue *vq = virtio_channel->vq;
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+	int rc;
+
+	rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
+	if (rc == -ENOMEM) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+		return;
+	} else if (rc != 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+		return;
+	}
+
+	virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+		virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+		virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+				       SPDK_VIRTIO_DESC_WR);
+	} else {
+		virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+				       SPDK_VIRTIO_DESC_RO);
+		virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+	}
+
+	virtqueue_req_flush(vq);
+}
+
+static void
+bdev_virtio_rw(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+	struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_io_vreq(ch, bdev_io);
+	struct virtio_scsi_cmd_req *req = &io_ctx->req;
+	bool is_write = bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE;
+
+	if (disk->info.num_blocks > (1ULL << 32)) {
+		req->cdb[0] = is_write ? SPDK_SBC_WRITE_16 : SPDK_SBC_READ_16;
+		to_be64(&req->cdb[2], bdev_io->u.bdev.offset_blocks);
+		to_be32(&req->cdb[10], bdev_io->u.bdev.num_blocks);
+	} else {
+		req->cdb[0] = is_write ? SPDK_SBC_WRITE_10 : SPDK_SBC_READ_10;
+		to_be32(&req->cdb[2], bdev_io->u.bdev.offset_blocks);
+		to_be16(&req->cdb[7], bdev_io->u.bdev.num_blocks);
+	}
+
+	bdev_virtio_send_io(ch, bdev_io);
+}
+
+static void
+bdev_virtio_reset(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct bdev_virtio_io_channel *virtio_ch = spdk_io_channel_get_ctx(ch);
+	struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_tmf_vreq(ch, bdev_io);
+	struct virtio_scsi_ctrl_tmf_req *tmf_req = &io_ctx->tmf_req;
+	struct virtio_scsi_dev *svdev = virtio_ch->svdev;
+	size_t enqueued_count;
+
+	tmf_req->type = VIRTIO_SCSI_T_TMF;
+	tmf_req->subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET;
+
+	enqueued_count = spdk_ring_enqueue(svdev->ctrlq_ring, (void **)&bdev_io, 1);
+	if (spdk_likely(enqueued_count == 1)) {
+		return;
+	} else {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+	}
+}
+
+static void
+bdev_virtio_unmap(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_io_vreq(ch, bdev_io);
+	struct virtio_scsi_cmd_req *req = &io_ctx->req;
+	struct spdk_scsi_unmap_bdesc *desc, *first_desc;
+	uint8_t *buf;
+	uint64_t offset_blocks, num_blocks;
+	uint16_t cmd_len;
+
+	buf = bdev_io->u.bdev.iovs[0].iov_base;
+
+	offset_blocks = bdev_io->u.bdev.offset_blocks;
+	num_blocks = bdev_io->u.bdev.num_blocks;
+
+	/* (n-1) * 16-byte descriptors */
+	first_desc = desc = (struct spdk_scsi_unmap_bdesc *)&buf[8];
+	while (num_blocks > UINT32_MAX) {
+		to_be64(&desc->lba, offset_blocks);
+		to_be32(&desc->block_count, UINT32_MAX);
+		memset(&desc->reserved, 0, sizeof(desc->reserved));
+		offset_blocks += UINT32_MAX;
+		num_blocks -= UINT32_MAX;
+		desc++;
+	}
+
+	/* The last descriptor with block_count <= UINT32_MAX */
+	to_be64(&desc->lba, offset_blocks);
+	to_be32(&desc->block_count, num_blocks);
+	memset(&desc->reserved, 0, sizeof(desc->reserved));
+
+	/* 8-byte header + n * 16-byte block descriptor */
+	cmd_len = 8 + (desc - first_desc + 1) *  sizeof(struct spdk_scsi_unmap_bdesc);
+
+	req->cdb[0] = SPDK_SBC_UNMAP;
+	to_be16(&req->cdb[7], cmd_len);
+
+	/* 8-byte header */
+	to_be16(&buf[0], cmd_len - 2); /* total length (excluding the length field) */
+	to_be16(&buf[2], cmd_len - 8); /* length of block descriptors */
+	memset(&buf[4], 0, 4); /* reserved */
+
+	bdev_virtio_send_io(ch, bdev_io);
+}
+
+static int _bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+
+	switch (bdev_io->type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		spdk_bdev_io_get_buf(bdev_io, bdev_virtio_rw,
+				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		bdev_virtio_rw(ch, bdev_io);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_RESET:
+		bdev_virtio_reset(ch, bdev_io);
+		return 0;
+	case SPDK_BDEV_IO_TYPE_UNMAP: {
+		uint64_t buf_len = 8 /* header size */ +
+				   (bdev_io->u.bdev.num_blocks + UINT32_MAX - 1) /
+				   UINT32_MAX * sizeof(struct spdk_scsi_unmap_bdesc);
+
+		if (!disk->info.unmap_supported) {
+			return -1;
+		}
+
+		if (buf_len > SPDK_BDEV_LARGE_BUF_MAX_SIZE) {
+			SPDK_ERRLOG("Trying to UNMAP too many blocks: %"PRIu64"\n",
+				    bdev_io->u.bdev.num_blocks);
+			return -1;
+		}
+		spdk_bdev_io_get_buf(bdev_io, bdev_virtio_unmap, buf_len);
+		return 0;
+	}
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+static void bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+	if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static bool
+bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+	struct virtio_scsi_disk *disk = ctx;
+
+	switch (io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+	case SPDK_BDEV_IO_TYPE_WRITE:
+	case SPDK_BDEV_IO_TYPE_FLUSH:
+	case SPDK_BDEV_IO_TYPE_RESET:
+		return true;
+
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		return disk->info.unmap_supported;
+
+	default:
+		return false;
+	}
+}
+
+static struct spdk_io_channel *
+bdev_virtio_get_io_channel(void *ctx)
+{
+	struct virtio_scsi_disk *disk = ctx;
+
+	return spdk_get_io_channel(disk->svdev);
+}
+
+static int
+bdev_virtio_disk_destruct(void *ctx)
+{
+	struct virtio_scsi_disk *disk = ctx;
+	struct virtio_scsi_dev *svdev = disk->svdev;
+
+	TAILQ_REMOVE(&svdev->luns, disk, link);
+	free(disk->bdev.name);
+	free(disk);
+
+	if (svdev->removed && TAILQ_EMPTY(&svdev->luns)) {
+		spdk_io_device_unregister(svdev, virtio_scsi_dev_unregister_cb);
+	}
+
+	return 0;
+}
+
+static int
+bdev_virtio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+	struct virtio_scsi_disk *disk = ctx;
+
+	virtio_dev_dump_json_info(&disk->svdev->vdev, w);
+	return 0;
+}
+
+static void
+bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+	/* SCSI targets and LUNS are discovered during scan process so nothing
+	 * to save here.
+	 */
+}
+
+static const struct spdk_bdev_fn_table virtio_fn_table = {
+	.destruct		= bdev_virtio_disk_destruct,
+	.submit_request		= bdev_virtio_submit_request,
+	.io_type_supported	= bdev_virtio_io_type_supported,
+	.get_io_channel		= bdev_virtio_get_io_channel,
+	.dump_info_json		= bdev_virtio_dump_info_json,
+	.write_config_json	= bdev_virtio_write_config_json,
+};
+
+static void
+get_scsi_status(struct virtio_scsi_cmd_resp *resp, int *sk, int *asc, int *ascq)
+{
+	/* see spdk_scsi_task_build_sense_data() for sense data details */
+	*sk = 0;
+	*asc = 0;
+	*ascq = 0;
+
+	if (resp->sense_len < 3) {
+		return;
+	}
+
+	*sk = resp->sense[2] & 0xf;
+
+	if (resp->sense_len < 13) {
+		return;
+	}
+
+	*asc = resp->sense[12];
+
+	if (resp->sense_len < 14) {
+		return;
+	}
+
+	*ascq = resp->sense[13];
+}
+
+static void
+bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+	int sk, asc, ascq;
+
+	get_scsi_status(&io_ctx->resp, &sk, &asc, &ascq);
+	spdk_bdev_io_complete_scsi_status(bdev_io, io_ctx->resp.status, sk, asc, ascq);
+}
+
+static int
+bdev_virtio_poll(void *arg)
+{
+	struct bdev_virtio_io_channel *ch = arg;
+	struct virtio_scsi_dev *svdev = ch->svdev;
+	struct virtio_scsi_scan_base *scan_ctx = svdev->scan_ctx;
+	void *io[32];
+	uint32_t io_len[32];
+	uint16_t i, cnt;
+	int rc;
+
+	cnt = virtio_recv_pkts(ch->vq, (void **)io, io_len, SPDK_COUNTOF(io));
+	for (i = 0; i < cnt; ++i) {
+		if (spdk_unlikely(scan_ctx && io[i] == &scan_ctx->io_ctx)) {
+			if (svdev->removed) {
+				_virtio_scsi_dev_scan_finish(scan_ctx, -EINTR);
+				return -1;
+			}
+
+			if (scan_ctx->restart) {
+				scan_ctx->restart = false;
+				scan_ctx->full_scan = true;
+				_virtio_scsi_dev_scan_tgt(scan_ctx, 0);
+				continue;
+			}
+
+			process_scan_resp(scan_ctx);
+			continue;
+		}
+
+		bdev_virtio_io_cpl(io[i]);
+	}
+
+	if (spdk_unlikely(scan_ctx && scan_ctx->needs_resend)) {
+		if (svdev->removed) {
+			_virtio_scsi_dev_scan_finish(scan_ctx, -EINTR);
+			return -1;
+		} else if (cnt == 0) {
+			return 0;
+		}
+
+		rc = send_scan_io(scan_ctx);
+		if (rc != 0) {
+			assert(scan_ctx->retries > 0);
+			scan_ctx->retries--;
+			if (scan_ctx->retries == 0) {
+				SPDK_ERRLOG("Target scan failed unrecoverably with rc = %d.\n", rc);
+				_virtio_scsi_dev_scan_finish(scan_ctx, rc);
+			}
+		}
+	}
+
+	return cnt;
+}
+
+static void
+bdev_virtio_tmf_cpl_cb(void *ctx)
+{
+	struct spdk_bdev_io *bdev_io = ctx;
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+	if (io_ctx->tmf_resp.response == VIRTIO_SCSI_S_OK) {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+	} else {
+		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+	}
+}
+
+static void
+bdev_virtio_tmf_cpl(struct spdk_bdev_io *bdev_io)
+{
+	spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_virtio_tmf_cpl_cb, bdev_io);
+}
+
+static void
+bdev_virtio_eventq_io_cpl(struct virtio_scsi_dev *svdev, struct virtio_scsi_eventq_io *io)
+{
+	struct virtio_scsi_event *ev = &io->ev;
+	struct virtio_scsi_disk *disk;
+
+	if (ev->lun[0] != 1) {
+		SPDK_WARNLOG("Received an event with invalid data layout.\n");
+		goto out;
+	}
+
+	if (ev->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
+		ev->event &= ~VIRTIO_SCSI_T_EVENTS_MISSED;
+		virtio_scsi_dev_scan(svdev, NULL, NULL);
+	}
+
+	switch (ev->event) {
+	case VIRTIO_SCSI_T_NO_EVENT:
+		break;
+	case VIRTIO_SCSI_T_TRANSPORT_RESET:
+		switch (ev->reason) {
+		case VIRTIO_SCSI_EVT_RESET_RESCAN:
+			virtio_scsi_dev_scan_tgt(svdev, ev->lun[1]);
+			break;
+		case VIRTIO_SCSI_EVT_RESET_REMOVED:
+			disk = virtio_scsi_dev_get_disk_by_id(svdev, ev->lun[1]);
+			if (disk != NULL) {
+				spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+			}
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+out:
+	virtio_scsi_dev_send_eventq_io(svdev->vdev.vqs[VIRTIO_SCSI_EVENTQ], io);
+}
+
+static void
+bdev_virtio_tmf_abort_nomem_cb(void *ctx)
+{
+	struct spdk_bdev_io *bdev_io = ctx;
+
+	spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+}
+
+static void
+bdev_virtio_tmf_abort_ioerr_cb(void *ctx)
+{
+	struct spdk_bdev_io *bdev_io = ctx;
+
+	spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static void
+bdev_virtio_tmf_abort(struct spdk_bdev_io *bdev_io, int status)
+{
+	spdk_thread_fn fn;
+
+	if (status == -ENOMEM) {
+		fn = bdev_virtio_tmf_abort_nomem_cb;
+	} else {
+		fn = bdev_virtio_tmf_abort_ioerr_cb;
+	}
+
+	spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), fn, bdev_io);
+}
+
+static int
+bdev_virtio_send_tmf_io(struct virtqueue *ctrlq, struct spdk_bdev_io *bdev_io)
+{
+	struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+	int rc;
+
+	rc = virtqueue_req_start(ctrlq, bdev_io, 2);
+	if (rc != 0) {
+		return rc;
+	}
+
+	virtqueue_req_add_iovs(ctrlq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+	virtqueue_req_add_iovs(ctrlq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+
+	virtqueue_req_flush(ctrlq);
+	return 0;
+}
+
+static int
+bdev_virtio_mgmt_poll(void *arg)
+{
+	struct virtio_scsi_dev *svdev = arg;
+	struct virtio_dev *vdev = &svdev->vdev;
+	struct virtqueue *eventq = vdev->vqs[VIRTIO_SCSI_EVENTQ];
+	struct virtqueue *ctrlq = vdev->vqs[VIRTIO_SCSI_CONTROLQ];
+	struct spdk_ring *send_ring = svdev->ctrlq_ring;
+	void *io[16];
+	uint32_t io_len[16];
+	uint16_t i, cnt;
+	int rc;
+	int total = 0;
+
+	cnt = spdk_ring_dequeue(send_ring, io, SPDK_COUNTOF(io));
+	total += cnt;
+	for (i = 0; i < cnt; ++i) {
+		rc = bdev_virtio_send_tmf_io(ctrlq, io[i]);
+		if (rc != 0) {
+			bdev_virtio_tmf_abort(io[i], rc);
+		}
+	}
+
+	cnt = virtio_recv_pkts(ctrlq, io, io_len, SPDK_COUNTOF(io));
+	total += cnt;
+	for (i = 0; i < cnt; ++i) {
+		bdev_virtio_tmf_cpl(io[i]);
+	}
+
+	cnt = virtio_recv_pkts(eventq, io, io_len, SPDK_COUNTOF(io));
+	total += cnt;
+	for (i = 0; i < cnt; ++i) {
+		bdev_virtio_eventq_io_cpl(svdev, io[i]);
+	}
+
+	return total;
+}
+
+static int
+bdev_virtio_scsi_ch_create_cb(void *io_device, void *ctx_buf)
+{
+	struct virtio_scsi_dev *svdev = io_device;
+	struct virtio_dev *vdev = &svdev->vdev;
+	struct bdev_virtio_io_channel *ch = ctx_buf;
+	struct virtqueue *vq;
+	int32_t queue_idx;
+
+	queue_idx = virtio_dev_find_and_acquire_queue(vdev, VIRTIO_SCSI_REQUESTQ);
+	if (queue_idx < 0) {
+		SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
+		return -1;
+	}
+
+	vq = vdev->vqs[queue_idx];
+
+	ch->svdev = svdev;
+	ch->vq = vq;
+
+	ch->poller = spdk_poller_register(bdev_virtio_poll, ch, 0);
+
+	return 0;
+}
+
+static void
+bdev_virtio_scsi_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct bdev_virtio_io_channel *ch = ctx_buf;
+	struct virtio_scsi_dev *svdev = ch->svdev;
+	struct virtio_dev *vdev = &svdev->vdev;
+	struct virtqueue *vq = ch->vq;
+
+	spdk_poller_unregister(&ch->poller);
+	virtio_dev_release_queue(vdev, vq->vq_queue_index);
+}
+
+static void
+_virtio_scsi_dev_scan_finish(struct virtio_scsi_scan_base *base, int errnum)
+{
+	struct virtio_scsi_dev *svdev = base->svdev;
+	size_t bdevs_cnt;
+	struct spdk_bdev *bdevs[BDEV_VIRTIO_MAX_TARGET];
+	struct virtio_scsi_disk *disk;
+	struct virtio_scsi_scan_info *tgt, *next_tgt;
+
+	spdk_put_io_channel(spdk_io_channel_from_ctx(base->channel));
+	base->svdev->scan_ctx = NULL;
+
+	TAILQ_FOREACH_SAFE(tgt, &base->scan_queue, tailq, next_tgt) {
+		TAILQ_REMOVE(&base->scan_queue, tgt, tailq);
+		free(tgt);
+	}
+
+	if (base->cb_fn == NULL) {
+		spdk_dma_free(base);
+		return;
+	}
+
+	bdevs_cnt = 0;
+	if (errnum == 0) {
+		TAILQ_FOREACH(disk, &svdev->luns, link) {
+			bdevs[bdevs_cnt] = &disk->bdev;
+			bdevs_cnt++;
+		}
+	}
+
+	base->cb_fn(base->cb_arg, errnum, bdevs, bdevs_cnt);
+	spdk_dma_free(base);
+}
+
+static int
+send_scan_io(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_io_ctx *io_ctx = &base->io_ctx;
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct virtqueue *vq = base->channel->vq;
+	int payload_iov_cnt = base->iov.iov_len > 0 ? 1 : 0;
+	int rc;
+
+	req->lun[0] = 1;
+	req->lun[1] = base->info.target;
+
+	rc = virtqueue_req_start(vq, io_ctx, 2 + payload_iov_cnt);
+	if (rc != 0) {
+		base->needs_resend = true;
+		return -1;
+	}
+
+	virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+	virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+	virtqueue_req_add_iovs(vq, &base->iov, payload_iov_cnt, SPDK_VIRTIO_DESC_WR);
+
+	virtqueue_req_flush(vq);
+	return 0;
+}
+
+static int
+send_inquiry(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct spdk_scsi_cdb_inquiry *cdb;
+
+	memset(req, 0, sizeof(*req));
+
+	base->iov.iov_len = BDEV_VIRTIO_SCAN_PAYLOAD_SIZE;
+	cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+	cdb->opcode = SPDK_SPC_INQUIRY;
+	to_be16(cdb->alloc_len, BDEV_VIRTIO_SCAN_PAYLOAD_SIZE);
+
+	return send_scan_io(base);
+}
+
+static int
+send_inquiry_vpd(struct virtio_scsi_scan_base *base, uint8_t page_code)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct spdk_scsi_cdb_inquiry *inquiry_cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+
+	memset(req, 0, sizeof(*req));
+
+	base->iov.iov_len = BDEV_VIRTIO_SCAN_PAYLOAD_SIZE;
+	inquiry_cdb->opcode = SPDK_SPC_INQUIRY;
+	inquiry_cdb->evpd = 1;
+	inquiry_cdb->page_code = page_code;
+	to_be16(inquiry_cdb->alloc_len, base->iov.iov_len);
+
+	return send_scan_io(base);
+}
+
+static int
+send_read_cap_10(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+	memset(req, 0, sizeof(*req));
+
+	base->iov.iov_len = 8;
+	req->cdb[0] = SPDK_SBC_READ_CAPACITY_10;
+
+	return send_scan_io(base);
+}
+
+static int
+send_read_cap_16(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+	memset(req, 0, sizeof(*req));
+
+	base->iov.iov_len = 32;
+	req->cdb[0] = SPDK_SPC_SERVICE_ACTION_IN_16;
+	req->cdb[1] = SPDK_SBC_SAI_READ_CAPACITY_16;
+	to_be32(&req->cdb[10], base->iov.iov_len);
+
+	return send_scan_io(base);
+}
+
+static int
+send_test_unit_ready(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+	memset(req, 0, sizeof(*req));
+	req->cdb[0] = SPDK_SPC_TEST_UNIT_READY;
+	base->iov.iov_len = 0;
+
+	return send_scan_io(base);
+}
+
+static int
+send_start_stop_unit(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+	memset(req, 0, sizeof(*req));
+	req->cdb[0] = SPDK_SBC_START_STOP_UNIT;
+	req->cdb[4] = SPDK_SBC_START_STOP_UNIT_START_BIT;
+	base->iov.iov_len = 0;
+
+	return send_scan_io(base);
+}
+
+static int
+process_scan_start_stop_unit(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+
+	if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+		return send_inquiry_vpd(base, SPDK_SPC_VPD_SUPPORTED_VPD_PAGES);
+	}
+
+	return -1;
+}
+
+static int
+process_scan_test_unit_ready(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	int sk, asc, ascq;
+
+	get_scsi_status(resp, &sk, &asc, &ascq);
+
+	/* check response, get VPD if spun up otherwise send SSU */
+	if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+		return send_inquiry_vpd(base, SPDK_SPC_VPD_SUPPORTED_VPD_PAGES);
+	} else if (resp->response == VIRTIO_SCSI_S_OK &&
+		   resp->status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
+		   sk == SPDK_SCSI_SENSE_UNIT_ATTENTION &&
+		   asc == SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY) {
+		return send_start_stop_unit(base);
+	} else {
+		return -1;
+	}
+}
+
+static int
+process_scan_inquiry_standard(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	struct spdk_scsi_cdb_inquiry_data *inquiry_data =
+		(struct spdk_scsi_cdb_inquiry_data *)base->payload;
+
+	if (resp->status != SPDK_SCSI_STATUS_GOOD) {
+		return -1;
+	}
+
+	/* check to make sure its a supported device */
+	if (inquiry_data->peripheral_device_type != SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DISK ||
+	    inquiry_data->peripheral_qualifier != SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED) {
+		SPDK_WARNLOG("Unsupported peripheral device type 0x%02x (qualifier 0x%02x)\n",
+			     inquiry_data->peripheral_device_type,
+			     inquiry_data->peripheral_qualifier);
+		return -1;
+	}
+
+	return send_test_unit_ready(base);
+}
+
+static int
+process_scan_inquiry_vpd_supported_vpd_pages(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	bool block_provisioning_page_supported = false;
+
+	if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+		const uint8_t *vpd_data = base->payload;
+		const uint8_t *supported_vpd_pages = vpd_data + 4;
+		uint16_t page_length;
+		uint16_t num_supported_pages;
+		uint16_t i;
+
+		page_length = from_be16(vpd_data + 2);
+		num_supported_pages = spdk_min(page_length, base->iov.iov_len - 4);
+
+		for (i = 0; i < num_supported_pages; i++) {
+			if (supported_vpd_pages[i] == SPDK_SPC_VPD_BLOCK_THIN_PROVISION) {
+				block_provisioning_page_supported = true;
+				break;
+			}
+		}
+	}
+
+	if (block_provisioning_page_supported) {
+		return send_inquiry_vpd(base, SPDK_SPC_VPD_BLOCK_THIN_PROVISION);
+	} else {
+		return send_read_cap_10(base);
+	}
+}
+
+static int
+process_scan_inquiry_vpd_block_thin_provision(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+
+	base->info.unmap_supported = false;
+
+	if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+		uint8_t *vpd_data = base->payload;
+
+		base->info.unmap_supported = !!(vpd_data[5] & SPDK_SCSI_UNMAP_LBPU);
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VIRTIO, "Target %u: unmap supported = %d\n",
+		     base->info.target, (int)base->info.unmap_supported);
+
+	return send_read_cap_10(base);
+}
+
+static int
+process_scan_inquiry(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct spdk_scsi_cdb_inquiry *inquiry_cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+
+	if ((inquiry_cdb->evpd & 1) == 0) {
+		return process_scan_inquiry_standard(base);
+	}
+
+	switch (inquiry_cdb->page_code) {
+	case SPDK_SPC_VPD_SUPPORTED_VPD_PAGES:
+		return process_scan_inquiry_vpd_supported_vpd_pages(base);
+	case SPDK_SPC_VPD_BLOCK_THIN_PROVISION:
+		return process_scan_inquiry_vpd_block_thin_provision(base);
+	default:
+		SPDK_DEBUGLOG(SPDK_LOG_VIRTIO, "Unexpected VPD page 0x%02x\n", inquiry_cdb->page_code);
+		return -1;
+	}
+}
+
+static void
+bdev_virtio_disc_notify_remove(void *remove_ctx)
+{
+	struct virtio_scsi_disk *disk = remove_ctx;
+
+	disk->removed = true;
+	spdk_bdev_close(disk->notify_desc);
+}
+
+/* To be called only from the thread performing target scan */
+static int
+virtio_scsi_dev_add_tgt(struct virtio_scsi_dev *svdev, struct virtio_scsi_scan_info *info)
+{
+	struct virtio_scsi_disk *disk;
+	struct spdk_bdev *bdev;
+	int rc;
+
+	TAILQ_FOREACH(disk, &svdev->luns, link) {
+		if (disk->info.target == info->target) {
+			/* Target is already attached and param change is not supported */
+			return 0;
+		}
+	}
+
+	if (info->block_size == 0 || info->num_blocks == 0) {
+		SPDK_ERRLOG("%s: invalid target %u: bs=%"PRIu32" blocks=%"PRIu64"\n",
+			    svdev->vdev.name, info->target, info->block_size, info->num_blocks);
+		return -EINVAL;
+	}
+
+	disk = calloc(1, sizeof(*disk));
+	if (disk == NULL) {
+		SPDK_ERRLOG("could not allocate disk\n");
+		return -ENOMEM;
+	}
+
+	disk->svdev = svdev;
+	memcpy(&disk->info, info, sizeof(*info));
+
+	bdev = &disk->bdev;
+	bdev->name = spdk_sprintf_alloc("%st%"PRIu8, svdev->vdev.name, info->target);
+	if (bdev->name == NULL) {
+		SPDK_ERRLOG("Couldn't alloc memory for the bdev name.\n");
+		free(disk);
+		return -ENOMEM;
+	}
+
+	bdev->product_name = "Virtio SCSI Disk";
+	bdev->write_cache = 0;
+	bdev->blocklen = disk->info.block_size;
+	bdev->blockcnt = disk->info.num_blocks;
+
+	bdev->ctxt = disk;
+	bdev->fn_table = &virtio_fn_table;
+	bdev->module = &virtio_scsi_if;
+
+	rc = spdk_bdev_register(&disk->bdev);
+	if (rc) {
+		SPDK_ERRLOG("Failed to register bdev name=%s\n", disk->bdev.name);
+		free(bdev->name);
+		free(disk);
+		return rc;
+	}
+
+	rc = spdk_bdev_open(bdev, false, bdev_virtio_disc_notify_remove, disk, &disk->notify_desc);
+	if (rc) {
+		assert(false);
+	}
+
+	TAILQ_INSERT_TAIL(&svdev->luns, disk, link);
+	return 0;
+}
+
+static int
+process_read_cap_10(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	uint64_t max_block;
+	uint32_t block_size;
+	uint8_t target_id = req->lun[1];
+	int rc;
+
+	if (resp->response != VIRTIO_SCSI_S_OK || resp->status != SPDK_SCSI_STATUS_GOOD) {
+		SPDK_ERRLOG("READ CAPACITY (10) failed for target %"PRIu8".\n", target_id);
+		return -1;
+	}
+
+	block_size = from_be32(base->payload + 4);
+	max_block = from_be32(base->payload);
+
+	if (max_block == 0xffffffff) {
+		return send_read_cap_16(base);
+	}
+
+	base->info.num_blocks = (uint64_t)max_block + 1;
+	base->info.block_size = block_size;
+
+	rc = virtio_scsi_dev_add_tgt(base->svdev, &base->info);
+	if (rc != 0) {
+		return rc;
+	}
+
+	return _virtio_scsi_dev_scan_next(base, 0);
+}
+
+static int
+process_read_cap_16(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	uint8_t target_id = req->lun[1];
+	int rc;
+
+	if (resp->response != VIRTIO_SCSI_S_OK || resp->status != SPDK_SCSI_STATUS_GOOD) {
+		SPDK_ERRLOG("READ CAPACITY (16) failed for target %"PRIu8".\n", target_id);
+		return -1;
+	}
+
+	base->info.num_blocks = from_be64(base->payload) + 1;
+	base->info.block_size = from_be32(base->payload + 8);
+	rc = virtio_scsi_dev_add_tgt(base->svdev, &base->info);
+	if (rc != 0) {
+		return rc;
+	}
+
+	return _virtio_scsi_dev_scan_next(base, 0);
+}
+
+static void
+process_scan_resp(struct virtio_scsi_scan_base *base)
+{
+	struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+	struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+	int rc, sk, asc, ascq;
+	uint8_t target_id;
+
+	if (base->io_ctx.iov_req.iov_len < sizeof(struct virtio_scsi_cmd_req) ||
+	    base->io_ctx.iov_resp.iov_len < sizeof(struct virtio_scsi_cmd_resp)) {
+		SPDK_ERRLOG("Received target scan message with invalid length.\n");
+		_virtio_scsi_dev_scan_next(base, -EIO);
+		return;
+	}
+
+	get_scsi_status(resp, &sk, &asc, &ascq);
+	target_id = req->lun[1];
+
+	if (resp->response == VIRTIO_SCSI_S_BAD_TARGET ||
+	    resp->response == VIRTIO_SCSI_S_INCORRECT_LUN) {
+		_virtio_scsi_dev_scan_next(base, -ENODEV);
+		return;
+	}
+
+	if (resp->response != VIRTIO_SCSI_S_OK ||
+	    (resp->status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
+	     sk != SPDK_SCSI_SENSE_ILLEGAL_REQUEST)) {
+		assert(base->retries > 0);
+		base->retries--;
+		if (base->retries == 0) {
+			SPDK_NOTICELOG("Target %"PRIu8" is present, but unavailable.\n", target_id);
+			SPDK_TRACEDUMP(SPDK_LOG_VIRTIO, "CDB", req->cdb, sizeof(req->cdb));
+			SPDK_TRACEDUMP(SPDK_LOG_VIRTIO, "SENSE DATA", resp->sense, sizeof(resp->sense));
+			_virtio_scsi_dev_scan_next(base, -EBUSY);
+			return;
+		}
+
+		/* resend the same request */
+		rc = send_scan_io(base);
+		if (rc != 0) {
+			/* Let response poller do the resend */
+		}
+		return;
+	}
+
+	base->retries = SCAN_REQUEST_RETRIES;
+
+	switch (req->cdb[0]) {
+	case SPDK_SPC_INQUIRY:
+		rc = process_scan_inquiry(base);
+		break;
+	case SPDK_SPC_TEST_UNIT_READY:
+		rc = process_scan_test_unit_ready(base);
+		break;
+	case SPDK_SBC_START_STOP_UNIT:
+		rc = process_scan_start_stop_unit(base);
+		break;
+	case SPDK_SBC_READ_CAPACITY_10:
+		rc = process_read_cap_10(base);
+		break;
+	case SPDK_SPC_SERVICE_ACTION_IN_16:
+		rc = process_read_cap_16(base);
+		break;
+	default:
+		SPDK_ERRLOG("Received invalid target scan message: cdb[0] = %"PRIu8".\n", req->cdb[0]);
+		rc = -1;
+		break;
+	}
+
+	if (rc != 0) {
+		if (base->needs_resend) {
+			return; /* Let response poller do the resend */
+		}
+
+		_virtio_scsi_dev_scan_next(base, rc);
+	}
+}
+
+static int
+_virtio_scsi_dev_scan_next(struct virtio_scsi_scan_base *base, int rc)
+{
+	struct virtio_scsi_scan_info *next;
+	struct virtio_scsi_disk *disk;
+	uint8_t target_id;
+
+	if (base->full_scan) {
+		if (rc != 0) {
+			disk = virtio_scsi_dev_get_disk_by_id(base->svdev,
+							      base->info.target);
+			if (disk != NULL) {
+				spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+			}
+		}
+
+		target_id = base->info.target + 1;
+		if (target_id < BDEV_VIRTIO_MAX_TARGET) {
+			_virtio_scsi_dev_scan_tgt(base, target_id);
+			return 0;
+		}
+
+		base->full_scan = false;
+	}
+
+	next = TAILQ_FIRST(&base->scan_queue);
+	if (next == NULL) {
+		_virtio_scsi_dev_scan_finish(base, 0);
+		return 0;
+	}
+
+	TAILQ_REMOVE(&base->scan_queue, next, tailq);
+	target_id = next->target;
+	free(next);
+
+	_virtio_scsi_dev_scan_tgt(base, target_id);
+	return 0;
+}
+
+static int
+virtio_pci_scsi_dev_enumerate_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+	struct virtio_scsi_dev *svdev;
+
+	svdev = virtio_pci_scsi_dev_create(NULL, pci_ctx);
+	return svdev == NULL ? -1 : 0;
+}
+
+static int
+bdev_virtio_process_config(void)
+{
+	struct spdk_conf_section *sp;
+	struct virtio_scsi_dev *svdev;
+	char *default_name = NULL;
+	char *path, *type, *name;
+	unsigned vdev_num;
+	int num_queues;
+	bool enable_pci;
+	int rc = 0;
+
+	for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+		if (!spdk_conf_section_match_prefix(sp, "VirtioUser")) {
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VirtioUser%u", &vdev_num) != 1) {
+			SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+				    spdk_conf_section_get_name(sp));
+			rc = -1;
+			goto out;
+		}
+
+		path = spdk_conf_section_get_val(sp, "Path");
+		if (path == NULL) {
+			SPDK_ERRLOG("VirtioUser%u: missing Path\n", vdev_num);
+			rc = -1;
+			goto out;
+		}
+
+		type = spdk_conf_section_get_val(sp, "Type");
+		if (type != NULL && strcmp(type, "SCSI") != 0) {
+			continue;
+		}
+
+		num_queues = spdk_conf_section_get_intval(sp, "Queues");
+		if (num_queues < 1) {
+			num_queues = 1;
+		} else if (num_queues > SPDK_VIRTIO_MAX_VIRTQUEUES) {
+			num_queues = SPDK_VIRTIO_MAX_VIRTQUEUES;
+		}
+
+		name = spdk_conf_section_get_val(sp, "Name");
+		if (name == NULL) {
+			default_name = spdk_sprintf_alloc("VirtioScsi%u", vdev_num);
+			name = default_name;
+		}
+
+		svdev = virtio_user_scsi_dev_create(name, path, num_queues, 512);
+		free(default_name);
+		default_name = NULL;
+
+		if (svdev == NULL) {
+			rc = -1;
+			goto out;
+		}
+	}
+
+	sp = spdk_conf_find_section(NULL, "VirtioPci");
+	if (sp == NULL) {
+		return 0;
+	}
+
+	enable_pci = spdk_conf_section_get_boolval(sp, "Enable", false);
+	if (enable_pci) {
+		rc = virtio_pci_dev_enumerate(virtio_pci_scsi_dev_enumerate_cb, NULL,
+					      PCI_DEVICE_ID_VIRTIO_SCSI_MODERN);
+	}
+
+out:
+	return rc;
+}
+
+static int
+_virtio_scsi_dev_scan_init(struct virtio_scsi_dev *svdev)
+{
+	struct virtio_scsi_scan_base *base;
+	struct spdk_io_channel *io_ch;
+	struct virtio_scsi_io_ctx *io_ctx;
+	struct virtio_scsi_cmd_req *req;
+	struct virtio_scsi_cmd_resp *resp;
+
+	io_ch = spdk_get_io_channel(svdev);
+	if (io_ch == NULL) {
+		return -EBUSY;
+	}
+
+	base = spdk_dma_zmalloc(sizeof(*base), 64, NULL);
+	if (base == NULL) {
+		SPDK_ERRLOG("couldn't allocate memory for scsi target scan.\n");
+		return -ENOMEM;
+	}
+
+	base->svdev = svdev;
+
+	base->channel = spdk_io_channel_get_ctx(io_ch);
+	TAILQ_INIT(&base->scan_queue);
+	svdev->scan_ctx = base;
+
+	base->iov.iov_base = base->payload;
+	io_ctx = &base->io_ctx;
+	req = &io_ctx->req;
+	resp = &io_ctx->resp;
+	io_ctx->iov_req.iov_base = req;
+	io_ctx->iov_req.iov_len = sizeof(*req);
+	io_ctx->iov_resp.iov_base = resp;
+	io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+	base->retries = SCAN_REQUEST_RETRIES;
+	return 0;
+}
+
+static void
+_virtio_scsi_dev_scan_tgt(struct virtio_scsi_scan_base *base, uint8_t target)
+{
+	int rc;
+
+	memset(&base->info, 0, sizeof(base->info));
+	base->info.target = target;
+
+	rc = send_inquiry(base);
+	if (rc) {
+		/* Let response poller do the resend */
+	}
+}
+
+static int
+virtio_scsi_dev_scan(struct virtio_scsi_dev *svdev, bdev_virtio_create_cb cb_fn,
+		     void *cb_arg)
+{
+	struct virtio_scsi_scan_base *base;
+	struct virtio_scsi_scan_info *tgt, *next_tgt;
+	int rc;
+
+	if (svdev->scan_ctx) {
+		if (svdev->scan_ctx->full_scan) {
+			return -EEXIST;
+		}
+
+		/* We're about to start a full rescan, so there's no need
+		 * to scan particular targets afterwards.
+		 */
+		TAILQ_FOREACH_SAFE(tgt, &svdev->scan_ctx->scan_queue, tailq, next_tgt) {
+			TAILQ_REMOVE(&svdev->scan_ctx->scan_queue, tgt, tailq);
+			free(tgt);
+		}
+
+		svdev->scan_ctx->cb_fn = cb_fn;
+		svdev->scan_ctx->cb_arg = cb_arg;
+		svdev->scan_ctx->restart = true;
+		return 0;
+	}
+
+	rc = _virtio_scsi_dev_scan_init(svdev);
+	if (rc != 0) {
+		return rc;
+	}
+
+	base = svdev->scan_ctx;
+	base->cb_fn = cb_fn;
+	base->cb_arg = cb_arg;
+	base->full_scan = true;
+
+	_virtio_scsi_dev_scan_tgt(base, 0);
+	return 0;
+}
+
+static int
+virtio_scsi_dev_scan_tgt(struct virtio_scsi_dev *svdev, uint8_t target)
+{
+	struct virtio_scsi_scan_base *base;
+	struct virtio_scsi_scan_info *info;
+	int rc;
+
+	base = svdev->scan_ctx;
+	if (base) {
+		info = calloc(1, sizeof(*info));
+		if (info == NULL) {
+			SPDK_ERRLOG("calloc failed\n");
+			return -ENOMEM;
+		}
+
+		info->target = target;
+		TAILQ_INSERT_TAIL(&base->scan_queue, info, tailq);
+		return 0;
+	}
+
+	rc = _virtio_scsi_dev_scan_init(svdev);
+	if (rc != 0) {
+		return rc;
+	}
+
+	base = svdev->scan_ctx;
+	base->full_scan = true;
+	_virtio_scsi_dev_scan_tgt(base, target);
+	return 0;
+}
+
+static void
+bdev_virtio_initial_scan_complete(void *ctx, int result,
+				  struct spdk_bdev **bdevs, size_t bdevs_cnt)
+{
+	struct virtio_scsi_dev *svdev;
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+		if (svdev->scan_ctx) {
+			/* another device is still being scanned */
+			pthread_mutex_unlock(&g_virtio_scsi_mutex);
+			return;
+		}
+	}
+
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+	spdk_bdev_module_init_done(&virtio_scsi_if);
+}
+
+static int
+bdev_virtio_initialize(void)
+{
+	struct virtio_scsi_dev *svdev, *next_svdev;
+	int rc;
+
+	rc = bdev_virtio_process_config();
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+
+	if (rc != 0) {
+		goto err_unlock;
+	}
+
+	if (TAILQ_EMPTY(&g_virtio_scsi_devs)) {
+		goto out_unlock;
+	}
+
+	/* Initialize all created devices and scan available targets */
+	TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+		rc = virtio_scsi_dev_scan(svdev, bdev_virtio_initial_scan_complete, NULL);
+		if (rc != 0) {
+			goto err_unlock;
+		}
+	}
+
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+	return 0;
+
+err_unlock:
+	/* Remove any created devices */
+	TAILQ_FOREACH_SAFE(svdev, &g_virtio_scsi_devs, tailq, next_svdev) {
+		virtio_scsi_dev_remove(svdev, NULL, NULL);
+	}
+
+out_unlock:
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+	spdk_bdev_module_init_done(&virtio_scsi_if);
+	return rc;
+}
+
+static void
+_virtio_scsi_dev_unregister_cb(void *io_device)
+{
+	struct virtio_scsi_dev *svdev = io_device;
+	struct virtio_dev *vdev = &svdev->vdev;
+	bool finish_module;
+	bdev_virtio_remove_cb remove_cb;
+	void *remove_ctx;
+
+	assert(spdk_ring_count(svdev->ctrlq_ring) == 0);
+	spdk_ring_free(svdev->ctrlq_ring);
+	spdk_poller_unregister(&svdev->mgmt_poller);
+
+	virtio_dev_release_queue(vdev, VIRTIO_SCSI_EVENTQ);
+	virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+
+	virtio_dev_stop(vdev);
+	virtio_dev_destruct(vdev);
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_REMOVE(&g_virtio_scsi_devs, svdev, tailq);
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+	remove_cb = svdev->remove_cb;
+	remove_ctx = svdev->remove_ctx;
+	spdk_dma_free(svdev->eventq_ios);
+	free(svdev);
+
+	if (remove_cb) {
+		remove_cb(remove_ctx, 0);
+	}
+
+	finish_module = TAILQ_EMPTY(&g_virtio_scsi_devs);
+
+	if (g_bdev_virtio_finish && finish_module) {
+		spdk_bdev_module_finish_done();
+	}
+}
+
+static void
+virtio_scsi_dev_unregister_cb(void *io_device)
+{
+	struct virtio_scsi_dev *svdev = io_device;
+	struct spdk_thread *thread;
+
+	thread = virtio_dev_queue_get_thread(&svdev->vdev, VIRTIO_SCSI_CONTROLQ);
+	spdk_thread_send_msg(thread, _virtio_scsi_dev_unregister_cb, io_device);
+}
+
+static void
+virtio_scsi_dev_remove(struct virtio_scsi_dev *svdev,
+		       bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+	struct virtio_scsi_disk *disk, *disk_tmp;
+	bool do_remove = true;
+
+	if (svdev->removed) {
+		if (cb_fn) {
+			cb_fn(cb_arg, -EBUSY);
+		}
+		return;
+	}
+
+	svdev->remove_cb = cb_fn;
+	svdev->remove_ctx = cb_arg;
+	svdev->removed = true;
+
+	if (svdev->scan_ctx) {
+		/* The removal will continue after we receive a pending scan I/O. */
+		return;
+	}
+
+	TAILQ_FOREACH_SAFE(disk, &svdev->luns, link, disk_tmp) {
+		if (!disk->removed) {
+			spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+		}
+		do_remove = false;
+	}
+
+	if (do_remove) {
+		spdk_io_device_unregister(svdev, virtio_scsi_dev_unregister_cb);
+	}
+}
+
+static void
+bdev_virtio_finish(void)
+{
+	struct virtio_scsi_dev *svdev, *next;
+
+	g_bdev_virtio_finish = true;
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	if (TAILQ_EMPTY(&g_virtio_scsi_devs)) {
+		pthread_mutex_unlock(&g_virtio_scsi_mutex);
+		spdk_bdev_module_finish_done();
+		return;
+	}
+
+	/* Defer module finish until all controllers are removed. */
+	TAILQ_FOREACH_SAFE(svdev, &g_virtio_scsi_devs, tailq, next) {
+		virtio_scsi_dev_remove(svdev, NULL, NULL);
+	}
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+}
+
+int
+bdev_virtio_user_scsi_dev_create(const char *base_name, const char *path,
+				 unsigned num_queues, unsigned queue_size,
+				 bdev_virtio_create_cb cb_fn, void *cb_arg)
+{
+	struct virtio_scsi_dev *svdev;
+	int rc;
+
+	svdev = virtio_user_scsi_dev_create(base_name, path, num_queues, queue_size);
+	if (svdev == NULL) {
+		return -1;
+	}
+
+	rc = virtio_scsi_dev_scan(svdev, cb_fn, cb_arg);
+	if (rc) {
+		virtio_scsi_dev_remove(svdev, NULL, NULL);
+	}
+
+	return rc;
+}
+
+struct bdev_virtio_pci_dev_create_ctx {
+	const char *name;
+	bdev_virtio_create_cb cb_fn;
+	void *cb_arg;
+};
+
+static int
+bdev_virtio_pci_scsi_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+	struct virtio_scsi_dev *svdev;
+	struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
+	int rc;
+
+	svdev = virtio_pci_scsi_dev_create(create_ctx->name, pci_ctx);
+	if (svdev == NULL) {
+		return -1;
+	}
+
+	rc = virtio_scsi_dev_scan(svdev, create_ctx->cb_fn, create_ctx->cb_arg);
+	if (rc) {
+		virtio_scsi_dev_remove(svdev, NULL, NULL);
+	}
+
+	return rc;
+}
+
+int
+bdev_virtio_pci_scsi_dev_create(const char *name, struct spdk_pci_addr *pci_addr,
+				bdev_virtio_create_cb cb_fn, void *cb_arg)
+{
+	struct bdev_virtio_pci_dev_create_ctx create_ctx;
+
+	create_ctx.name = name;
+	create_ctx.cb_fn = cb_fn;
+	create_ctx.cb_arg = cb_arg;
+
+	return virtio_pci_dev_attach(bdev_virtio_pci_scsi_dev_create_cb, &create_ctx,
+				     PCI_DEVICE_ID_VIRTIO_SCSI_MODERN, pci_addr);
+}
+
+int
+bdev_virtio_scsi_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+	struct virtio_scsi_dev *svdev;
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+		if (strcmp(svdev->vdev.name, name) == 0) {
+			break;
+		}
+	}
+
+	if (svdev == NULL) {
+		pthread_mutex_unlock(&g_virtio_scsi_mutex);
+		SPDK_ERRLOG("Cannot find Virtio-SCSI device named '%s'\n", name);
+		return -ENODEV;
+	}
+
+	virtio_scsi_dev_remove(svdev, cb_fn, cb_arg);
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+	return 0;
+}
+
+void
+bdev_virtio_scsi_dev_list(struct spdk_json_write_ctx *w)
+{
+	struct virtio_scsi_dev *svdev;
+
+	spdk_json_write_array_begin(w);
+
+	pthread_mutex_lock(&g_virtio_scsi_mutex);
+	TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "name");
+		spdk_json_write_string(w, svdev->vdev.name);
+
+		virtio_dev_dump_json_info(&svdev->vdev, w);
+
+		spdk_json_write_object_end(w);
+	}
+	pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+	spdk_json_write_array_end(w);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio", SPDK_LOG_VIRTIO)
diff --git a/src/spdk/lib/bdev/vtune.c b/src/spdk/lib/bdev/vtune.c
new file mode 100644
index 00000000..2cb48826
--- /dev/null
+++ b/src/spdk/lib/bdev/vtune.c
@@ -0,0 +1,49 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/config.h"
+#if SPDK_CONFIG_VTUNE
+
+/* Disable warnings triggered by the VTune code */
+#if defined(__GNUC__) && \
+	__GNUC__ > 4 || \
+	(__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#if __GNUC__ >= 7
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#endif
+#endif
+
+#include "ittnotify_static.c"
+
+#endif
diff --git a/src/spdk/lib/blob/Makefile b/src/spdk/lib/blob/Makefile
new file mode 100644
index 00000000..996155bf
--- /dev/null
+++ b/src/spdk/lib/blob/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = blobstore.c request.c zeroes.c blob_bs_dev.c
+LIBNAME = blob
+
+DIRS-y += bdev
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/blob/bdev/Makefile b/src/spdk/lib/blob/bdev/Makefile
new file mode 100644
index 00000000..dbc25dfb
--- /dev/null
+++ b/src/spdk/lib/blob/bdev/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = blob_bdev.c
+LIBNAME = blob_bdev
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/blob/bdev/blob_bdev.c b/src/spdk/lib/blob/bdev/blob_bdev.c
new file mode 100644
index 00000000..42293142
--- /dev/null
+++ b/src/spdk/lib/blob/bdev/blob_bdev.c
@@ -0,0 +1,357 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blob_bdev.h"
+#include "spdk/blob.h"
+#include "spdk/thread.h"
+#include "spdk/log.h"
+#include "spdk/endian.h"
+#include "spdk/bdev_module.h"
+
+struct blob_bdev {
+	struct spdk_bs_dev	bs_dev;
+	struct spdk_bdev	*bdev;
+	struct spdk_bdev_desc	*desc;
+	bool			claimed;
+};
+
+struct blob_resubmit {
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+	enum spdk_bdev_io_type io_type;
+	struct spdk_bs_dev *dev;
+	struct spdk_io_channel *channel;
+	void *payload;
+	int iovcnt;
+	uint64_t lba;
+	uint32_t lba_count;
+	struct spdk_bs_dev_cb_args *cb_args;
+};
+static void bdev_blob_resubmit(void *);
+
+static inline struct spdk_bdev_desc *
+__get_desc(struct spdk_bs_dev *dev)
+{
+	return ((struct blob_bdev *)dev)->desc;
+}
+
+static inline struct spdk_bdev *
+__get_bdev(struct spdk_bs_dev *dev)
+{
+	return ((struct blob_bdev *)dev)->bdev;
+}
+
+static void
+bdev_blob_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg)
+{
+	struct spdk_bs_dev_cb_args *cb_args = arg;
+	int bserrno;
+
+	if (success) {
+		bserrno = 0;
+	} else {
+		bserrno = -EIO;
+	}
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno);
+	spdk_bdev_free_io(bdev_io);
+}
+
+static void
+bdev_blob_queue_io(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+		   int iovcnt,
+		   uint64_t lba, uint32_t lba_count, enum spdk_bdev_io_type io_type,
+		   struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+	struct spdk_bdev *bdev = __get_bdev(dev);
+	struct blob_resubmit *ctx;
+
+	ctx = calloc(1, sizeof(struct blob_resubmit));
+
+	if (ctx == NULL) {
+		SPDK_ERRLOG("Not enough memory to queue io\n");
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->io_type = io_type;
+	ctx->dev = dev;
+	ctx->channel = channel;
+	ctx->payload = payload;
+	ctx->iovcnt = iovcnt;
+	ctx->lba = lba;
+	ctx->lba_count = lba_count;
+	ctx->cb_args = cb_args;
+	ctx->bdev_io_wait.bdev = bdev;
+	ctx->bdev_io_wait.cb_fn = bdev_blob_resubmit;
+	ctx->bdev_io_wait.cb_arg = ctx;
+
+	rc = spdk_bdev_queue_io_wait(bdev, channel, &ctx->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed, rc=%d\n", rc);
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+		free(ctx);
+		assert(false);
+	}
+}
+
+static void
+bdev_blob_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+	       uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+
+	rc = spdk_bdev_read_blocks(__get_desc(dev), channel, payload, lba,
+				   lba_count, bdev_blob_io_complete, cb_args);
+	if (rc == -ENOMEM) {
+		bdev_blob_queue_io(dev, channel, payload, 0, lba,
+				   lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args);
+	} else if (rc != 0) {
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+	}
+}
+
+static void
+bdev_blob_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+		uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+
+	rc = spdk_bdev_write_blocks(__get_desc(dev), channel, payload, lba,
+				    lba_count, bdev_blob_io_complete, cb_args);
+	if (rc == -ENOMEM) {
+		bdev_blob_queue_io(dev, channel, payload, 0, lba,
+				   lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args);
+	} else if (rc != 0) {
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+	}
+}
+
+static void
+bdev_blob_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		struct iovec *iov, int iovcnt,
+		uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+
+	rc = spdk_bdev_readv_blocks(__get_desc(dev), channel, iov, iovcnt, lba,
+				    lba_count, bdev_blob_io_complete, cb_args);
+	if (rc == -ENOMEM) {
+		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba,
+				   lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args);
+	} else if (rc != 0) {
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+	}
+}
+
+static void
+bdev_blob_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		 struct iovec *iov, int iovcnt,
+		 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+
+	rc = spdk_bdev_writev_blocks(__get_desc(dev), channel, iov, iovcnt, lba,
+				     lba_count, bdev_blob_io_complete, cb_args);
+	if (rc == -ENOMEM) {
+		bdev_blob_queue_io(dev, channel, iov, iovcnt, lba,
+				   lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args);
+	} else if (rc != 0) {
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+	}
+}
+
+static void
+bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
+		       uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int rc;
+
+	rc = spdk_bdev_write_zeroes_blocks(__get_desc(dev), channel, lba,
+					   lba_count, bdev_blob_io_complete, cb_args);
+	if (rc == -ENOMEM) {
+		bdev_blob_queue_io(dev, channel, NULL, 0, lba,
+				   lba_count, SPDK_BDEV_IO_TYPE_WRITE_ZEROES, cb_args);
+	} else if (rc != 0) {
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+	}
+}
+
+static void
+bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba,
+		uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
+	int rc;
+
+	if (spdk_bdev_io_type_supported(blob_bdev->bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+		rc = spdk_bdev_unmap_blocks(__get_desc(dev), channel, lba, lba_count,
+					    bdev_blob_io_complete, cb_args);
+		if (rc == -ENOMEM) {
+			bdev_blob_queue_io(dev, channel, NULL, 0, lba,
+					   lba_count, SPDK_BDEV_IO_TYPE_UNMAP, cb_args);
+		} else if (rc != 0) {
+			cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc);
+		}
+	} else {
+		/*
+		 * If the device doesn't support unmap, immediately complete
+		 * the request. Blobstore does not rely on unmap zeroing
+		 * data.
+		 */
+		cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
+	}
+}
+
+static void
+bdev_blob_resubmit(void *arg)
+{
+	struct blob_resubmit *ctx = (struct blob_resubmit *) arg;
+
+	switch (ctx->io_type) {
+	case SPDK_BDEV_IO_TYPE_READ:
+		if (ctx->iovcnt > 0) {
+			bdev_blob_readv(ctx->dev, ctx->channel, (struct iovec *)ctx->payload, ctx->iovcnt,
+					ctx->lba, ctx->lba_count, ctx->cb_args);
+		} else {
+			bdev_blob_read(ctx->dev, ctx->channel, ctx->payload,
+				       ctx->lba, ctx->lba_count, ctx->cb_args);
+		}
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE:
+		if (ctx->iovcnt > 0) {
+			bdev_blob_writev(ctx->dev, ctx->channel, (struct iovec *)ctx->payload, ctx->iovcnt,
+					 ctx->lba, ctx->lba_count, ctx->cb_args);
+		} else {
+			bdev_blob_write(ctx->dev, ctx->channel, ctx->payload,
+					ctx->lba, ctx->lba_count, ctx->cb_args);
+		}
+		break;
+	case SPDK_BDEV_IO_TYPE_UNMAP:
+		bdev_blob_unmap(ctx->dev, ctx->channel,
+				ctx->lba, ctx->lba_count, ctx->cb_args);
+		break;
+	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+		bdev_blob_write_zeroes(ctx->dev, ctx->channel,
+				       ctx->lba, ctx->lba_count, ctx->cb_args);
+		break;
+	default:
+		SPDK_ERRLOG("Unsupported io type %d\n", ctx->io_type);
+		assert(false);
+		break;
+	}
+	free(ctx);
+}
+
+int
+spdk_bs_bdev_claim(struct spdk_bs_dev *bs_dev, struct spdk_bdev_module *module)
+{
+	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
+	int rc;
+
+	rc = spdk_bdev_module_claim_bdev(blob_bdev->bdev, NULL, module);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not claim bs dev\n");
+		return rc;
+	}
+
+	blob_bdev->claimed = true;
+
+	return rc;
+}
+
+static struct spdk_io_channel *
+bdev_blob_create_channel(struct spdk_bs_dev *dev)
+{
+	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
+
+	return spdk_bdev_get_io_channel(blob_bdev->desc);
+}
+
+static void
+bdev_blob_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel)
+{
+	spdk_put_io_channel(channel);
+}
+
+static void
+bdev_blob_destroy(struct spdk_bs_dev *bs_dev)
+{
+	struct spdk_bdev_desc *desc = __get_desc(bs_dev);
+	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
+
+	if (blob_bdev->claimed) {
+		spdk_bdev_module_release_bdev(blob_bdev->bdev);
+	}
+
+	spdk_bdev_close(desc);
+	free(bs_dev);
+}
+
+struct spdk_bs_dev *
+spdk_bdev_create_bs_dev(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, void *remove_ctx)
+{
+	struct blob_bdev *b;
+	struct spdk_bdev_desc *desc;
+	int rc;
+
+	b = calloc(1, sizeof(*b));
+
+	if (b == NULL) {
+		SPDK_ERRLOG("could not allocate blob_bdev\n");
+		return NULL;
+	}
+
+	rc = spdk_bdev_open(bdev, true, remove_cb, remove_ctx, &desc);
+	if (rc != 0) {
+		free(b);
+		return NULL;
+	}
+
+	b->bdev = bdev;
+	b->desc = desc;
+	b->bs_dev.blockcnt = spdk_bdev_get_num_blocks(bdev);
+	b->bs_dev.blocklen = spdk_bdev_get_block_size(bdev);
+	b->bs_dev.create_channel = bdev_blob_create_channel;
+	b->bs_dev.destroy_channel = bdev_blob_destroy_channel;
+	b->bs_dev.destroy = bdev_blob_destroy;
+	b->bs_dev.read = bdev_blob_read;
+	b->bs_dev.write = bdev_blob_write;
+	b->bs_dev.readv = bdev_blob_readv;
+	b->bs_dev.writev = bdev_blob_writev;
+	b->bs_dev.write_zeroes = bdev_blob_write_zeroes;
+	b->bs_dev.unmap = bdev_blob_unmap;
+
+	return &b->bs_dev;
+}
diff --git a/src/spdk/lib/blob/blob_bs_dev.c b/src/spdk/lib/blob/blob_bs_dev.c
new file mode 100644
index 00000000..91084651
--- /dev/null
+++ b/src/spdk/lib/blob/blob_bs_dev.c
@@ -0,0 +1,150 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/blob.h"
+#include "spdk/log.h"
+#include "blobstore.h"
+
+static void
+blob_bs_dev_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+		  uint64_t lba, uint32_t lba_count,
+		  struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+blob_bs_dev_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		   struct iovec *iov, int iovcnt,
+		   uint64_t lba, uint32_t lba_count,
+		   struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+			 uint64_t lba, uint32_t lba_count,
+			 struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+blob_bs_dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		  uint64_t lba, uint32_t lba_count,
+		  struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+blob_bs_dev_read_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_bs_dev_cb_args *cb_args = (struct spdk_bs_dev_cb_args *)cb_arg;
+
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno);
+}
+
+static inline void
+blob_bs_dev_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+		 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev;
+
+	spdk_blob_io_read(b->blob, channel, payload, lba, lba_count,
+			  blob_bs_dev_read_cpl, cb_args);
+}
+
+static inline void
+blob_bs_dev_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		  struct iovec *iov, int iovcnt,
+		  uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev;
+
+	spdk_blob_io_readv(b->blob, channel, iov, iovcnt, lba, lba_count,
+			   blob_bs_dev_read_cpl, cb_args);
+}
+
+static void
+blob_bs_dev_destroy_cpl(void *cb_arg, int bserrno)
+{
+	if (bserrno != 0) {
+		SPDK_ERRLOG("Error on blob_bs_dev destroy: %d", bserrno);
+	}
+
+	/* Free blob_bs_dev */
+	free(cb_arg);
+}
+
+static void
+blob_bs_dev_destroy(struct spdk_bs_dev *bs_dev)
+{
+	struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)bs_dev;
+
+	spdk_blob_close(b->blob, blob_bs_dev_destroy_cpl, b);
+}
+
+
+struct spdk_bs_dev *
+spdk_bs_create_blob_bs_dev(struct spdk_blob *blob)
+{
+	struct spdk_blob_bs_dev  *b;
+
+	b = calloc(1, sizeof(*b));
+	if (b == NULL) {
+		return NULL;
+	}
+	/* snapshot blob */
+	b->bs_dev.blockcnt = blob->active.num_clusters *
+			     blob->bs->pages_per_cluster * _spdk_bs_io_unit_per_page(blob->bs);
+	b->bs_dev.blocklen = spdk_bs_get_io_unit_size(blob->bs);
+	b->bs_dev.create_channel = NULL;
+	b->bs_dev.destroy_channel = NULL;
+	b->bs_dev.destroy = blob_bs_dev_destroy;
+	b->bs_dev.write = blob_bs_dev_write;
+	b->bs_dev.writev = blob_bs_dev_writev;
+	b->bs_dev.read = blob_bs_dev_read;
+	b->bs_dev.readv = blob_bs_dev_readv;
+	b->bs_dev.write_zeroes = blob_bs_dev_write_zeroes;
+	b->bs_dev.unmap = blob_bs_dev_unmap;
+	b->blob = blob;
+
+	return &b->bs_dev;
+}
diff --git a/src/spdk/lib/blob/blobstore.c b/src/spdk/lib/blob/blobstore.c
new file mode 100644
index 00000000..3b294180
--- /dev/null
+++ b/src/spdk/lib/blob/blobstore.c
@@ -0,0 +1,5720 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blob.h"
+#include "spdk/crc32.h"
+#include "spdk/env.h"
+#include "spdk/queue.h"
+#include "spdk/thread.h"
+#include "spdk/bit_array.h"
+#include "spdk/likely.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+
+#include "blobstore.h"
+
+#define BLOB_CRC32C_INITIAL    0xffffffffUL
+
+static int spdk_bs_register_md_thread(struct spdk_blob_store *bs);
+static int spdk_bs_unregister_md_thread(struct spdk_blob_store *bs);
+static void _spdk_blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
+static void _spdk_blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
+		uint64_t cluster, spdk_blob_op_complete cb_fn, void *cb_arg);
+
+static int _spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
+				uint16_t value_len, bool internal);
+static int _spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
+				      const void **value, size_t *value_len, bool internal);
+static int _spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
+
+static void
+_spdk_blob_verify_md_op(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+	assert(spdk_get_thread() == blob->bs->md_thread);
+	assert(blob->state != SPDK_BLOB_STATE_LOADING);
+}
+
+static inline size_t
+divide_round_up(size_t num, size_t divisor)
+{
+	return (num + divisor - 1) / divisor;
+}
+
+static void
+_spdk_bs_claim_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
+{
+	assert(cluster_num < spdk_bit_array_capacity(bs->used_clusters));
+	assert(spdk_bit_array_get(bs->used_clusters, cluster_num) == false);
+	assert(bs->num_free_clusters > 0);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Claiming cluster %u\n", cluster_num);
+
+	spdk_bit_array_set(bs->used_clusters, cluster_num);
+	bs->num_free_clusters--;
+}
+
+static int
+_spdk_blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
+{
+	uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (*cluster_lba != 0) {
+		return -EEXIST;
+	}
+
+	*cluster_lba = _spdk_bs_cluster_to_lba(blob->bs, cluster);
+	return 0;
+}
+
+static int
+_spdk_bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
+			  uint64_t *lowest_free_cluster, bool update_map)
+{
+	pthread_mutex_lock(&blob->bs->used_clusters_mutex);
+	*lowest_free_cluster = spdk_bit_array_find_first_clear(blob->bs->used_clusters,
+			       *lowest_free_cluster);
+	if (*lowest_free_cluster == UINT32_MAX) {
+		/* No more free clusters. Cannot satisfy the request */
+		pthread_mutex_unlock(&blob->bs->used_clusters_mutex);
+		return -ENOSPC;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Claiming cluster %lu for blob %lu\n", *lowest_free_cluster, blob->id);
+	_spdk_bs_claim_cluster(blob->bs, *lowest_free_cluster);
+	pthread_mutex_unlock(&blob->bs->used_clusters_mutex);
+
+	if (update_map) {
+		_spdk_blob_insert_cluster(blob, cluster_num, *lowest_free_cluster);
+	}
+
+	return 0;
+}
+
+static void
+_spdk_bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
+{
+	assert(cluster_num < spdk_bit_array_capacity(bs->used_clusters));
+	assert(spdk_bit_array_get(bs->used_clusters, cluster_num) == true);
+	assert(bs->num_free_clusters < bs->total_clusters);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Releasing cluster %u\n", cluster_num);
+
+	pthread_mutex_lock(&bs->used_clusters_mutex);
+	spdk_bit_array_clear(bs->used_clusters, cluster_num);
+	bs->num_free_clusters++;
+	pthread_mutex_unlock(&bs->used_clusters_mutex);
+}
+
+static void
+_spdk_blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
+{
+	xattrs->count = 0;
+	xattrs->names = NULL;
+	xattrs->ctx = NULL;
+	xattrs->get_value = NULL;
+}
+
+void
+spdk_blob_opts_init(struct spdk_blob_opts *opts)
+{
+	opts->num_clusters = 0;
+	opts->thin_provision = false;
+	_spdk_blob_xattrs_init(&opts->xattrs);
+}
+
+static struct spdk_blob *
+_spdk_blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
+{
+	struct spdk_blob *blob;
+
+	blob = calloc(1, sizeof(*blob));
+	if (!blob) {
+		return NULL;
+	}
+
+	blob->id = id;
+	blob->bs = bs;
+
+	blob->parent_id = SPDK_BLOBID_INVALID;
+
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+	blob->active.num_pages = 1;
+	blob->active.pages = calloc(1, sizeof(*blob->active.pages));
+	if (!blob->active.pages) {
+		free(blob);
+		return NULL;
+	}
+
+	blob->active.pages[0] = _spdk_bs_blobid_to_page(id);
+
+	TAILQ_INIT(&blob->xattrs);
+	TAILQ_INIT(&blob->xattrs_internal);
+
+	return blob;
+}
+
+static void
+_spdk_xattrs_free(struct spdk_xattr_tailq *xattrs)
+{
+	struct spdk_xattr	*xattr, *xattr_tmp;
+
+	TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
+		TAILQ_REMOVE(xattrs, xattr, link);
+		free(xattr->name);
+		free(xattr->value);
+		free(xattr);
+	}
+}
+
+static void
+_spdk_blob_free(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	free(blob->active.clusters);
+	free(blob->clean.clusters);
+	free(blob->active.pages);
+	free(blob->clean.pages);
+
+	_spdk_xattrs_free(&blob->xattrs);
+	_spdk_xattrs_free(&blob->xattrs_internal);
+
+	if (blob->back_bs_dev) {
+		blob->back_bs_dev->destroy(blob->back_bs_dev);
+	}
+
+	free(blob);
+}
+
+struct freeze_io_ctx {
+	struct spdk_bs_cpl cpl;
+	struct spdk_blob *blob;
+};
+
+static void
+_spdk_blob_io_sync(struct spdk_io_channel_iter *i)
+{
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_blob_execute_queued_io(struct spdk_io_channel_iter *i)
+{
+	struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
+	struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_bs_request_set	*set;
+	struct spdk_bs_user_op_args	*args;
+	spdk_bs_user_op_t *op, *tmp;
+
+	TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
+		set = (struct spdk_bs_request_set *)op;
+		args = &set->u.user_op;
+
+		if (args->blob == ctx->blob) {
+			TAILQ_REMOVE(&ch->queued_io, op, link);
+			spdk_bs_user_op_execute(op);
+		}
+	}
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_blob_io_cpl(struct spdk_io_channel_iter *i, int status)
+{
+	struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+	ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
+
+	free(ctx);
+}
+
+static void
+_spdk_blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct freeze_io_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	ctx->cpl.u.blob_basic.cb_fn = cb_fn;
+	ctx->cpl.u.blob_basic.cb_arg = cb_arg;
+	ctx->blob = blob;
+
+	/* Freeze I/O on blob */
+	blob->frozen_refcnt++;
+
+	if (blob->frozen_refcnt == 1) {
+		spdk_for_each_channel(blob->bs, _spdk_blob_io_sync, ctx, _spdk_blob_io_cpl);
+	} else {
+		cb_fn(cb_arg, 0);
+		free(ctx);
+	}
+}
+
+static void
+_spdk_blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct freeze_io_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	ctx->cpl.u.blob_basic.cb_fn = cb_fn;
+	ctx->cpl.u.blob_basic.cb_arg = cb_arg;
+	ctx->blob = blob;
+
+	assert(blob->frozen_refcnt > 0);
+
+	blob->frozen_refcnt--;
+
+	if (blob->frozen_refcnt == 0) {
+		spdk_for_each_channel(blob->bs, _spdk_blob_execute_queued_io, ctx, _spdk_blob_io_cpl);
+	} else {
+		cb_fn(cb_arg, 0);
+		free(ctx);
+	}
+}
+
+static int
+_spdk_blob_mark_clean(struct spdk_blob *blob)
+{
+	uint64_t *clusters = NULL;
+	uint32_t *pages = NULL;
+
+	assert(blob != NULL);
+
+	if (blob->active.num_clusters) {
+		assert(blob->active.clusters);
+		clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
+		if (!clusters) {
+			return -ENOMEM;
+		}
+		memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*clusters));
+	}
+
+	if (blob->active.num_pages) {
+		assert(blob->active.pages);
+		pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
+		if (!pages) {
+			free(clusters);
+			return -ENOMEM;
+		}
+		memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*pages));
+	}
+
+	free(blob->clean.clusters);
+	free(blob->clean.pages);
+
+	blob->clean.num_clusters = blob->active.num_clusters;
+	blob->clean.clusters = blob->active.clusters;
+	blob->clean.num_pages = blob->active.num_pages;
+	blob->clean.pages = blob->active.pages;
+
+	blob->active.clusters = clusters;
+	blob->active.pages = pages;
+
+	/* If the metadata was dirtied again while the metadata was being written to disk,
+	 *  we do not want to revert the DIRTY state back to CLEAN here.
+	 */
+	if (blob->state == SPDK_BLOB_STATE_LOADING) {
+		blob->state = SPDK_BLOB_STATE_CLEAN;
+	}
+
+	return 0;
+}
+
+static int
+_spdk_blob_deserialize_xattr(struct spdk_blob *blob,
+			     struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
+{
+	struct spdk_xattr                       *xattr;
+
+	if (desc_xattr->length != sizeof(desc_xattr->name_length) +
+	    sizeof(desc_xattr->value_length) +
+	    desc_xattr->name_length + desc_xattr->value_length) {
+		return -EINVAL;
+	}
+
+	xattr = calloc(1, sizeof(*xattr));
+	if (xattr == NULL) {
+		return -ENOMEM;
+	}
+
+	xattr->name = malloc(desc_xattr->name_length + 1);
+	if (xattr->name == NULL) {
+		free(xattr);
+		return -ENOMEM;
+	}
+	memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
+	xattr->name[desc_xattr->name_length] = '\0';
+
+	xattr->value = malloc(desc_xattr->value_length);
+	if (xattr->value == NULL) {
+		free(xattr->name);
+		free(xattr);
+		return -ENOMEM;
+	}
+	xattr->value_len = desc_xattr->value_length;
+	memcpy(xattr->value,
+	       (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
+	       desc_xattr->value_length);
+
+	TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
+
+	return 0;
+}
+
+
+static int
+_spdk_blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
+{
+	struct spdk_blob_md_descriptor *desc;
+	size_t	cur_desc = 0;
+	void *tmp;
+
+	desc = (struct spdk_blob_md_descriptor *)page->descriptors;
+	while (cur_desc < sizeof(page->descriptors)) {
+		if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
+			if (desc->length == 0) {
+				/* If padding and length are 0, this terminates the page */
+				break;
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
+			struct spdk_blob_md_descriptor_flags	*desc_flags;
+
+			desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
+
+			if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
+				return -EINVAL;
+			}
+
+			if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
+			    SPDK_BLOB_INVALID_FLAGS_MASK) {
+				return -EINVAL;
+			}
+
+			if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
+			    SPDK_BLOB_DATA_RO_FLAGS_MASK) {
+				blob->data_ro = true;
+				blob->md_ro = true;
+			}
+
+			if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
+			    SPDK_BLOB_MD_RO_FLAGS_MASK) {
+				blob->md_ro = true;
+			}
+
+			if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
+				blob->data_ro = true;
+				blob->md_ro = true;
+			}
+
+			blob->invalid_flags = desc_flags->invalid_flags;
+			blob->data_ro_flags = desc_flags->data_ro_flags;
+			blob->md_ro_flags = desc_flags->md_ro_flags;
+
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT) {
+			struct spdk_blob_md_descriptor_extent	*desc_extent;
+			unsigned int				i, j;
+			unsigned int				cluster_count = blob->active.num_clusters;
+
+			desc_extent = (struct spdk_blob_md_descriptor_extent *)desc;
+
+			if (desc_extent->length == 0 ||
+			    (desc_extent->length % sizeof(desc_extent->extents[0]) != 0)) {
+				return -EINVAL;
+			}
+
+			for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) {
+				for (j = 0; j < desc_extent->extents[i].length; j++) {
+					if (desc_extent->extents[i].cluster_idx != 0) {
+						if (!spdk_bit_array_get(blob->bs->used_clusters,
+									desc_extent->extents[i].cluster_idx + j)) {
+							return -EINVAL;
+						}
+					}
+					cluster_count++;
+				}
+			}
+
+			if (cluster_count == 0) {
+				return -EINVAL;
+			}
+			tmp = realloc(blob->active.clusters, cluster_count * sizeof(uint64_t));
+			if (tmp == NULL) {
+				return -ENOMEM;
+			}
+			blob->active.clusters = tmp;
+			blob->active.cluster_array_size = cluster_count;
+
+			for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) {
+				for (j = 0; j < desc_extent->extents[i].length; j++) {
+					if (desc_extent->extents[i].cluster_idx != 0) {
+						blob->active.clusters[blob->active.num_clusters++] = _spdk_bs_cluster_to_lba(blob->bs,
+								desc_extent->extents[i].cluster_idx + j);
+					} else if (spdk_blob_is_thin_provisioned(blob)) {
+						blob->active.clusters[blob->active.num_clusters++] = 0;
+					} else {
+						return -EINVAL;
+					}
+				}
+			}
+
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
+			int rc;
+
+			rc = _spdk_blob_deserialize_xattr(blob,
+							  (struct spdk_blob_md_descriptor_xattr *) desc, false);
+			if (rc != 0) {
+				return rc;
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
+			int rc;
+
+			rc = _spdk_blob_deserialize_xattr(blob,
+							  (struct spdk_blob_md_descriptor_xattr *) desc, true);
+			if (rc != 0) {
+				return rc;
+			}
+		} else {
+			/* Unrecognized descriptor type.  Do not fail - just continue to the
+			 *  next descriptor.  If this descriptor is associated with some feature
+			 *  defined in a newer version of blobstore, that version of blobstore
+			 *  should create and set an associated feature flag to specify if this
+			 *  blob can be loaded or not.
+			 */
+		}
+
+		/* Advance to the next descriptor */
+		cur_desc += sizeof(*desc) + desc->length;
+		if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
+			break;
+		}
+		desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
+	}
+
+	return 0;
+}
+
+static int
+_spdk_blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
+		 struct spdk_blob *blob)
+{
+	const struct spdk_blob_md_page *page;
+	uint32_t i;
+	int rc;
+
+	assert(page_count > 0);
+	assert(pages[0].sequence_num == 0);
+	assert(blob != NULL);
+	assert(blob->state == SPDK_BLOB_STATE_LOADING);
+	assert(blob->active.clusters == NULL);
+
+	/* The blobid provided doesn't match what's in the MD, this can
+	 * happen for example if a bogus blobid is passed in through open.
+	 */
+	if (blob->id != pages[0].id) {
+		SPDK_ERRLOG("Blobid (%lu) doesn't match what's in metadata (%lu)\n",
+			    blob->id, pages[0].id);
+		return -ENOENT;
+	}
+
+	for (i = 0; i < page_count; i++) {
+		page = &pages[i];
+
+		assert(page->id == blob->id);
+		assert(page->sequence_num == i);
+
+		rc = _spdk_blob_parse_page(page, blob);
+		if (rc != 0) {
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int
+_spdk_blob_serialize_add_page(const struct spdk_blob *blob,
+			      struct spdk_blob_md_page **pages,
+			      uint32_t *page_count,
+			      struct spdk_blob_md_page **last_page)
+{
+	struct spdk_blob_md_page *page;
+
+	assert(pages != NULL);
+	assert(page_count != NULL);
+
+	if (*page_count == 0) {
+		assert(*pages == NULL);
+		*page_count = 1;
+		*pages = spdk_dma_malloc(SPDK_BS_PAGE_SIZE,
+					 SPDK_BS_PAGE_SIZE,
+					 NULL);
+	} else {
+		assert(*pages != NULL);
+		(*page_count)++;
+		*pages = spdk_dma_realloc(*pages,
+					  SPDK_BS_PAGE_SIZE * (*page_count),
+					  SPDK_BS_PAGE_SIZE,
+					  NULL);
+	}
+
+	if (*pages == NULL) {
+		*page_count = 0;
+		*last_page = NULL;
+		return -ENOMEM;
+	}
+
+	page = &(*pages)[*page_count - 1];
+	memset(page, 0, sizeof(*page));
+	page->id = blob->id;
+	page->sequence_num = *page_count - 1;
+	page->next = SPDK_INVALID_MD_PAGE;
+	*last_page = page;
+
+	return 0;
+}
+
+/* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
+ * Update required_sz on both success and failure.
+ *
+ */
+static int
+_spdk_blob_serialize_xattr(const struct spdk_xattr *xattr,
+			   uint8_t *buf, size_t buf_sz,
+			   size_t *required_sz, bool internal)
+{
+	struct spdk_blob_md_descriptor_xattr	*desc;
+
+	*required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
+		       strlen(xattr->name) +
+		       xattr->value_len;
+
+	if (buf_sz < *required_sz) {
+		return -1;
+	}
+
+	desc = (struct spdk_blob_md_descriptor_xattr *)buf;
+
+	desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
+	desc->length = sizeof(desc->name_length) +
+		       sizeof(desc->value_length) +
+		       strlen(xattr->name) +
+		       xattr->value_len;
+	desc->name_length = strlen(xattr->name);
+	desc->value_length = xattr->value_len;
+
+	memcpy(desc->name, xattr->name, desc->name_length);
+	memcpy((void *)((uintptr_t)desc->name + desc->name_length),
+	       xattr->value,
+	       desc->value_length);
+
+	return 0;
+}
+
+static void
+_spdk_blob_serialize_extent(const struct spdk_blob *blob,
+			    uint64_t start_cluster, uint64_t *next_cluster,
+			    uint8_t *buf, size_t buf_sz)
+{
+	struct spdk_blob_md_descriptor_extent *desc;
+	size_t cur_sz;
+	uint64_t i, extent_idx;
+	uint64_t lba, lba_per_cluster, lba_count;
+
+	/* The buffer must have room for at least one extent */
+	cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->extents[0]);
+	if (buf_sz < cur_sz) {
+		*next_cluster = start_cluster;
+		return;
+	}
+
+	desc = (struct spdk_blob_md_descriptor_extent *)buf;
+	desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT;
+
+	lba_per_cluster = _spdk_bs_cluster_to_lba(blob->bs, 1);
+
+	lba = blob->active.clusters[start_cluster];
+	lba_count = lba_per_cluster;
+	extent_idx = 0;
+	for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
+		if ((lba + lba_count) == blob->active.clusters[i]) {
+			lba_count += lba_per_cluster;
+			continue;
+		} else if (lba == 0 && blob->active.clusters[i] == 0) {
+			lba_count += lba_per_cluster;
+			continue;
+		}
+		desc->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
+		desc->extents[extent_idx].length = lba_count / lba_per_cluster;
+		extent_idx++;
+
+		cur_sz += sizeof(desc->extents[extent_idx]);
+
+		if (buf_sz < cur_sz) {
+			/* If we ran out of buffer space, return */
+			desc->length = sizeof(desc->extents[0]) * extent_idx;
+			*next_cluster = i;
+			return;
+		}
+
+		lba = blob->active.clusters[i];
+		lba_count = lba_per_cluster;
+	}
+
+	desc->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
+	desc->extents[extent_idx].length = lba_count / lba_per_cluster;
+	extent_idx++;
+
+	desc->length = sizeof(desc->extents[0]) * extent_idx;
+	*next_cluster = blob->active.num_clusters;
+
+	return;
+}
+
+static void
+_spdk_blob_serialize_flags(const struct spdk_blob *blob,
+			   uint8_t *buf, size_t *buf_sz)
+{
+	struct spdk_blob_md_descriptor_flags *desc;
+
+	/*
+	 * Flags get serialized first, so we should always have room for the flags
+	 *  descriptor.
+	 */
+	assert(*buf_sz >= sizeof(*desc));
+
+	desc = (struct spdk_blob_md_descriptor_flags *)buf;
+	desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
+	desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
+	desc->invalid_flags = blob->invalid_flags;
+	desc->data_ro_flags = blob->data_ro_flags;
+	desc->md_ro_flags = blob->md_ro_flags;
+
+	*buf_sz -= sizeof(*desc);
+}
+
+static int
+_spdk_blob_serialize_xattrs(const struct spdk_blob *blob,
+			    const struct spdk_xattr_tailq *xattrs, bool internal,
+			    struct spdk_blob_md_page **pages,
+			    struct spdk_blob_md_page *cur_page,
+			    uint32_t *page_count, uint8_t **buf,
+			    size_t *remaining_sz)
+{
+	const struct spdk_xattr	*xattr;
+	int	rc;
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		size_t required_sz = 0;
+
+		rc = _spdk_blob_serialize_xattr(xattr,
+						*buf, *remaining_sz,
+						&required_sz, internal);
+		if (rc < 0) {
+			/* Need to add a new page to the chain */
+			rc = _spdk_blob_serialize_add_page(blob, pages, page_count,
+							   &cur_page);
+			if (rc < 0) {
+				spdk_dma_free(*pages);
+				*pages = NULL;
+				*page_count = 0;
+				return rc;
+			}
+
+			*buf = (uint8_t *)cur_page->descriptors;
+			*remaining_sz = sizeof(cur_page->descriptors);
+
+			/* Try again */
+			required_sz = 0;
+			rc = _spdk_blob_serialize_xattr(xattr,
+							*buf, *remaining_sz,
+							&required_sz, internal);
+
+			if (rc < 0) {
+				spdk_dma_free(*pages);
+				*pages = NULL;
+				*page_count = 0;
+				return rc;
+			}
+		}
+
+		*remaining_sz -= required_sz;
+		*buf += required_sz;
+	}
+
+	return 0;
+}
+
+static int
+_spdk_blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
+		     uint32_t *page_count)
+{
+	struct spdk_blob_md_page		*cur_page;
+	int					rc;
+	uint8_t					*buf;
+	size_t					remaining_sz;
+	uint64_t				last_cluster;
+
+	assert(pages != NULL);
+	assert(page_count != NULL);
+	assert(blob != NULL);
+	assert(blob->state == SPDK_BLOB_STATE_DIRTY);
+
+	*pages = NULL;
+	*page_count = 0;
+
+	/* A blob always has at least 1 page, even if it has no descriptors */
+	rc = _spdk_blob_serialize_add_page(blob, pages, page_count, &cur_page);
+	if (rc < 0) {
+		return rc;
+	}
+
+	buf = (uint8_t *)cur_page->descriptors;
+	remaining_sz = sizeof(cur_page->descriptors);
+
+	/* Serialize flags */
+	_spdk_blob_serialize_flags(blob, buf, &remaining_sz);
+	buf += sizeof(struct spdk_blob_md_descriptor_flags);
+
+	/* Serialize xattrs */
+	rc = _spdk_blob_serialize_xattrs(blob, &blob->xattrs, false,
+					 pages, cur_page, page_count, &buf, &remaining_sz);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* Serialize internal xattrs */
+	rc = _spdk_blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
+					 pages, cur_page, page_count, &buf, &remaining_sz);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* Serialize extents */
+	last_cluster = 0;
+	while (last_cluster < blob->active.num_clusters) {
+		_spdk_blob_serialize_extent(blob, last_cluster, &last_cluster,
+					    buf, remaining_sz);
+
+		if (last_cluster == blob->active.num_clusters) {
+			break;
+		}
+
+		rc = _spdk_blob_serialize_add_page(blob, pages, page_count,
+						   &cur_page);
+		if (rc < 0) {
+			return rc;
+		}
+
+		buf = (uint8_t *)cur_page->descriptors;
+		remaining_sz = sizeof(cur_page->descriptors);
+	}
+
+	return 0;
+}
+
+struct spdk_blob_load_ctx {
+	struct spdk_blob		*blob;
+
+	struct spdk_blob_md_page	*pages;
+	uint32_t			num_pages;
+	spdk_bs_sequence_t	        *seq;
+
+	spdk_bs_sequence_cpl		cb_fn;
+	void				*cb_arg;
+};
+
+static uint32_t
+_spdk_blob_md_page_calc_crc(void *page)
+{
+	uint32_t		crc;
+
+	crc = BLOB_CRC32C_INITIAL;
+	crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
+	crc ^= BLOB_CRC32C_INITIAL;
+
+	return crc;
+
+}
+
+static void
+_spdk_blob_load_final(void *cb_arg, int bserrno)
+{
+	struct spdk_blob_load_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+
+	_spdk_blob_mark_clean(blob);
+
+	ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
+
+	/* Free the memory */
+	spdk_dma_free(ctx->pages);
+	free(ctx);
+}
+
+static void
+_spdk_blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
+{
+	struct spdk_blob_load_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+
+	if (bserrno != 0) {
+		goto error;
+	}
+
+	blob->back_bs_dev = spdk_bs_create_blob_bs_dev(snapshot);
+
+	if (blob->back_bs_dev == NULL) {
+		bserrno = -ENOMEM;
+		goto error;
+	}
+
+	_spdk_blob_load_final(ctx, bserrno);
+	return;
+
+error:
+	SPDK_ERRLOG("Snapshot fail\n");
+	_spdk_blob_free(blob);
+	ctx->cb_fn(ctx->seq, NULL, bserrno);
+	spdk_dma_free(ctx->pages);
+	free(ctx);
+}
+
+static void
+_spdk_blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_load_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_md_page	*page;
+	const void			*value;
+	size_t				len;
+	int				rc;
+	uint32_t			crc;
+
+	page = &ctx->pages[ctx->num_pages - 1];
+	crc = _spdk_blob_md_page_calc_crc(page);
+	if (crc != page->crc) {
+		SPDK_ERRLOG("Metadata page %d crc mismatch\n", ctx->num_pages);
+		_spdk_blob_free(blob);
+		ctx->cb_fn(seq, NULL, -EINVAL);
+		spdk_dma_free(ctx->pages);
+		free(ctx);
+		return;
+	}
+
+	if (page->next != SPDK_INVALID_MD_PAGE) {
+		uint32_t next_page = page->next;
+		uint64_t next_lba = _spdk_bs_page_to_lba(blob->bs, blob->bs->md_start + next_page);
+
+
+		assert(next_lba < (blob->bs->md_start + blob->bs->md_len));
+
+		/* Read the next page */
+		ctx->num_pages++;
+		ctx->pages = spdk_dma_realloc(ctx->pages, (sizeof(*page) * ctx->num_pages),
+					      sizeof(*page), NULL);
+		if (ctx->pages == NULL) {
+			ctx->cb_fn(seq, ctx->cb_arg, -ENOMEM);
+			free(ctx);
+			return;
+		}
+
+		spdk_bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
+					  next_lba,
+					  _spdk_bs_byte_to_lba(blob->bs, sizeof(*page)),
+					  _spdk_blob_load_cpl, ctx);
+		return;
+	}
+
+	/* Parse the pages */
+	rc = _spdk_blob_parse(ctx->pages, ctx->num_pages, blob);
+	if (rc) {
+		_spdk_blob_free(blob);
+		ctx->cb_fn(seq, NULL, rc);
+		spdk_dma_free(ctx->pages);
+		free(ctx);
+		return;
+	}
+	ctx->seq = seq;
+
+
+	if (spdk_blob_is_thin_provisioned(blob)) {
+		rc = _spdk_blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
+		if (rc == 0) {
+			if (len != sizeof(spdk_blob_id)) {
+				_spdk_blob_free(blob);
+				ctx->cb_fn(seq, NULL, -EINVAL);
+				spdk_dma_free(ctx->pages);
+				free(ctx);
+				return;
+			}
+			/* open snapshot blob and continue in the callback function */
+			blob->parent_id = *(spdk_blob_id *)value;
+			spdk_bs_open_blob(blob->bs, blob->parent_id,
+					  _spdk_blob_load_snapshot_cpl, ctx);
+			return;
+		} else {
+			/* add zeroes_dev for thin provisioned blob */
+			blob->back_bs_dev = spdk_bs_create_zeroes_dev();
+		}
+	} else {
+		/* standard blob */
+		blob->back_bs_dev = NULL;
+	}
+	_spdk_blob_load_final(ctx, bserrno);
+}
+
+/* Load a blob from disk given a blobid */
+static void
+_spdk_blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
+		spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_blob_load_ctx *ctx;
+	struct spdk_blob_store *bs;
+	uint32_t page_num;
+	uint64_t lba;
+
+	_spdk_blob_verify_md_op(blob);
+
+	bs = blob->bs;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(seq, cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->blob = blob;
+	ctx->pages = spdk_dma_realloc(ctx->pages, SPDK_BS_PAGE_SIZE,
+				      SPDK_BS_PAGE_SIZE, NULL);
+	if (!ctx->pages) {
+		free(ctx);
+		cb_fn(seq, cb_arg, -ENOMEM);
+		return;
+	}
+	ctx->num_pages = 1;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	page_num = _spdk_bs_blobid_to_page(blob->id);
+	lba = _spdk_bs_page_to_lba(blob->bs, bs->md_start + page_num);
+
+	blob->state = SPDK_BLOB_STATE_LOADING;
+
+	spdk_bs_sequence_read_dev(seq, &ctx->pages[0], lba,
+				  _spdk_bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
+				  _spdk_blob_load_cpl, ctx);
+}
+
+struct spdk_blob_persist_ctx {
+	struct spdk_blob		*blob;
+
+	struct spdk_bs_super_block	*super;
+
+	struct spdk_blob_md_page	*pages;
+
+	uint64_t			idx;
+
+	spdk_bs_sequence_t		*seq;
+	spdk_bs_sequence_cpl		cb_fn;
+	void				*cb_arg;
+};
+
+static void
+_spdk_blob_persist_complete(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+
+	if (bserrno == 0) {
+		_spdk_blob_mark_clean(blob);
+	}
+
+	/* Call user callback */
+	ctx->cb_fn(seq, ctx->cb_arg, bserrno);
+
+	/* Free the memory */
+	spdk_dma_free(ctx->pages);
+	free(ctx);
+}
+
+static void
+_spdk_blob_persist_unmap_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	void				*tmp;
+	size_t				i;
+
+	/* Release all clusters that were truncated */
+	for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
+		uint32_t cluster_num = _spdk_bs_lba_to_cluster(bs, blob->active.clusters[i]);
+
+		/* Nothing to release if it was not allocated */
+		if (blob->active.clusters[i] != 0) {
+			_spdk_bs_release_cluster(bs, cluster_num);
+		}
+	}
+
+	if (blob->active.num_clusters == 0) {
+		free(blob->active.clusters);
+		blob->active.clusters = NULL;
+		blob->active.cluster_array_size = 0;
+	} else {
+		tmp = realloc(blob->active.clusters, sizeof(uint64_t) * blob->active.num_clusters);
+		assert(tmp != NULL);
+		blob->active.clusters = tmp;
+		blob->active.cluster_array_size = blob->active.num_clusters;
+	}
+
+	_spdk_blob_persist_complete(seq, ctx, bserrno);
+}
+
+static void
+_spdk_blob_persist_unmap_clusters(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	spdk_bs_batch_t			*batch;
+	size_t				i;
+	uint64_t			lba;
+	uint32_t			lba_count;
+
+	/* Clusters don't move around in blobs. The list shrinks or grows
+	 * at the end, but no changes ever occur in the middle of the list.
+	 */
+
+	batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_unmap_clusters_cpl, ctx);
+
+	/* Unmap all clusters that were truncated */
+	lba = 0;
+	lba_count = 0;
+	for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
+		uint64_t next_lba = blob->active.clusters[i];
+		uint32_t next_lba_count = _spdk_bs_cluster_to_lba(bs, 1);
+
+		if (next_lba > 0 && (lba + lba_count) == next_lba) {
+			/* This cluster is contiguous with the previous one. */
+			lba_count += next_lba_count;
+			continue;
+		}
+
+		/* This cluster is not contiguous with the previous one. */
+
+		/* If a run of LBAs previously existing, send them
+		 * as an unmap.
+		 */
+		if (lba_count > 0) {
+			spdk_bs_batch_unmap_dev(batch, lba, lba_count);
+		}
+
+		/* Start building the next batch */
+		lba = next_lba;
+		if (next_lba > 0) {
+			lba_count = next_lba_count;
+		} else {
+			lba_count = 0;
+		}
+	}
+
+	/* If we ended with a contiguous set of LBAs, send the unmap now */
+	if (lba_count > 0) {
+		spdk_bs_batch_unmap_dev(batch, lba, lba_count);
+	}
+
+	spdk_bs_batch_close(batch);
+}
+
+static void
+_spdk_blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	size_t				i;
+
+	/* This loop starts at 1 because the first page is special and handled
+	 * below. The pages (except the first) are never written in place,
+	 * so any pages in the clean list must be zeroed.
+	 */
+	for (i = 1; i < blob->clean.num_pages; i++) {
+		spdk_bit_array_clear(bs->used_md_pages, blob->clean.pages[i]);
+	}
+
+	if (blob->active.num_pages == 0) {
+		uint32_t page_num;
+
+		page_num = _spdk_bs_blobid_to_page(blob->id);
+		spdk_bit_array_clear(bs->used_md_pages, page_num);
+	}
+
+	/* Move on to unmapping clusters */
+	_spdk_blob_persist_unmap_clusters(seq, ctx, 0);
+}
+
+static void
+_spdk_blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	uint64_t			lba;
+	uint32_t			lba_count;
+	spdk_bs_batch_t			*batch;
+	size_t				i;
+
+	batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_zero_pages_cpl, ctx);
+
+	lba_count = _spdk_bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
+
+	/* This loop starts at 1 because the first page is special and handled
+	 * below. The pages (except the first) are never written in place,
+	 * so any pages in the clean list must be zeroed.
+	 */
+	for (i = 1; i < blob->clean.num_pages; i++) {
+		lba = _spdk_bs_page_to_lba(bs, bs->md_start + blob->clean.pages[i]);
+
+		spdk_bs_batch_write_zeroes_dev(batch, lba, lba_count);
+	}
+
+	/* The first page will only be zeroed if this is a delete. */
+	if (blob->active.num_pages == 0) {
+		uint32_t page_num;
+
+		/* The first page in the metadata goes where the blobid indicates */
+		page_num = _spdk_bs_blobid_to_page(blob->id);
+		lba = _spdk_bs_page_to_lba(bs, bs->md_start + page_num);
+
+		spdk_bs_batch_write_zeroes_dev(batch, lba, lba_count);
+	}
+
+	spdk_bs_batch_close(batch);
+}
+
+static void
+_spdk_blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	uint64_t			lba;
+	uint32_t			lba_count;
+	struct spdk_blob_md_page	*page;
+
+	if (blob->active.num_pages == 0) {
+		/* Move on to the next step */
+		_spdk_blob_persist_zero_pages(seq, ctx, 0);
+		return;
+	}
+
+	lba_count = _spdk_bs_byte_to_lba(bs, sizeof(*page));
+
+	page = &ctx->pages[0];
+	/* The first page in the metadata goes where the blobid indicates */
+	lba = _spdk_bs_page_to_lba(bs, bs->md_start + _spdk_bs_blobid_to_page(blob->id));
+
+	spdk_bs_sequence_write_dev(seq, page, lba, lba_count,
+				   _spdk_blob_persist_zero_pages, ctx);
+}
+
+static void
+_spdk_blob_persist_write_page_chain(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx	*ctx = cb_arg;
+	struct spdk_blob		*blob = ctx->blob;
+	struct spdk_blob_store		*bs = blob->bs;
+	uint64_t			lba;
+	uint32_t			lba_count;
+	struct spdk_blob_md_page	*page;
+	spdk_bs_batch_t			*batch;
+	size_t				i;
+
+	/* Clusters don't move around in blobs. The list shrinks or grows
+	 * at the end, but no changes ever occur in the middle of the list.
+	 */
+
+	lba_count = _spdk_bs_byte_to_lba(bs, sizeof(*page));
+
+	batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_write_page_root, ctx);
+
+	/* This starts at 1. The root page is not written until
+	 * all of the others are finished
+	 */
+	for (i = 1; i < blob->active.num_pages; i++) {
+		page = &ctx->pages[i];
+		assert(page->sequence_num == i);
+
+		lba = _spdk_bs_page_to_lba(bs, bs->md_start + blob->active.pages[i]);
+
+		spdk_bs_batch_write_dev(batch, page, lba, lba_count);
+	}
+
+	spdk_bs_batch_close(batch);
+}
+
+static int
+_spdk_blob_resize(struct spdk_blob *blob, uint64_t sz)
+{
+	uint64_t	i;
+	uint64_t	*tmp;
+	uint64_t	lfc; /* lowest free cluster */
+	uint64_t	num_clusters;
+	struct spdk_blob_store *bs;
+
+	bs = blob->bs;
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (blob->active.num_clusters == sz) {
+		return 0;
+	}
+
+	if (blob->active.num_clusters < blob->active.cluster_array_size) {
+		/* If this blob was resized to be larger, then smaller, then
+		 * larger without syncing, then the cluster array already
+		 * contains spare assigned clusters we can use.
+		 */
+		num_clusters = spdk_min(blob->active.cluster_array_size,
+					sz);
+	} else {
+		num_clusters = blob->active.num_clusters;
+	}
+
+	/* Do two passes - one to verify that we can obtain enough clusters
+	 * and another to actually claim them.
+	 */
+
+	if (spdk_blob_is_thin_provisioned(blob) == false) {
+		lfc = 0;
+		for (i = num_clusters; i < sz; i++) {
+			lfc = spdk_bit_array_find_first_clear(bs->used_clusters, lfc);
+			if (lfc == UINT32_MAX) {
+				/* No more free clusters. Cannot satisfy the request */
+				return -ENOSPC;
+			}
+			lfc++;
+		}
+	}
+
+	if (sz > num_clusters) {
+		/* Expand the cluster array if necessary.
+		 * We only shrink the array when persisting.
+		 */
+		tmp = realloc(blob->active.clusters, sizeof(uint64_t) * sz);
+		if (sz > 0 && tmp == NULL) {
+			return -ENOMEM;
+		}
+		memset(tmp + blob->active.cluster_array_size, 0,
+		       sizeof(uint64_t) * (sz - blob->active.cluster_array_size));
+		blob->active.clusters = tmp;
+		blob->active.cluster_array_size = sz;
+	}
+
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+
+	if (spdk_blob_is_thin_provisioned(blob) == false) {
+		lfc = 0;
+		for (i = num_clusters; i < sz; i++) {
+			_spdk_bs_allocate_cluster(blob, i, &lfc, true);
+			lfc++;
+		}
+	}
+
+	blob->active.num_clusters = sz;
+
+	return 0;
+}
+
+static void
+_spdk_blob_persist_start(struct spdk_blob_persist_ctx *ctx)
+{
+	spdk_bs_sequence_t *seq = ctx->seq;
+	struct spdk_blob *blob = ctx->blob;
+	struct spdk_blob_store *bs = blob->bs;
+	uint64_t i;
+	uint32_t page_num;
+	void *tmp;
+	int rc;
+
+	if (blob->active.num_pages == 0) {
+		/* This is the signal that the blob should be deleted.
+		 * Immediately jump to the clean up routine. */
+		assert(blob->clean.num_pages > 0);
+		ctx->idx = blob->clean.num_pages - 1;
+		blob->state = SPDK_BLOB_STATE_CLEAN;
+		_spdk_blob_persist_zero_pages(seq, ctx, 0);
+		return;
+
+	}
+
+	/* Generate the new metadata */
+	rc = _spdk_blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
+	if (rc < 0) {
+		_spdk_blob_persist_complete(seq, ctx, rc);
+		return;
+	}
+
+	assert(blob->active.num_pages >= 1);
+
+	/* Resize the cache of page indices */
+	tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
+	if (!tmp) {
+		_spdk_blob_persist_complete(seq, ctx, -ENOMEM);
+		return;
+	}
+	blob->active.pages = tmp;
+
+	/* Assign this metadata to pages. This requires two passes -
+	 * one to verify that there are enough pages and a second
+	 * to actually claim them. */
+	page_num = 0;
+	/* Note that this loop starts at one. The first page location is fixed by the blobid. */
+	for (i = 1; i < blob->active.num_pages; i++) {
+		page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
+		if (page_num == UINT32_MAX) {
+			_spdk_blob_persist_complete(seq, ctx, -ENOMEM);
+			return;
+		}
+		page_num++;
+	}
+
+	page_num = 0;
+	blob->active.pages[0] = _spdk_bs_blobid_to_page(blob->id);
+	for (i = 1; i < blob->active.num_pages; i++) {
+		page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
+		ctx->pages[i - 1].next = page_num;
+		/* Now that previous metadata page is complete, calculate the crc for it. */
+		ctx->pages[i - 1].crc = _spdk_blob_md_page_calc_crc(&ctx->pages[i - 1]);
+		blob->active.pages[i] = page_num;
+		spdk_bit_array_set(bs->used_md_pages, page_num);
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Claiming page %u for blob %lu\n", page_num, blob->id);
+		page_num++;
+	}
+	ctx->pages[i - 1].crc = _spdk_blob_md_page_calc_crc(&ctx->pages[i - 1]);
+	/* Start writing the metadata from last page to first */
+	ctx->idx = blob->active.num_pages - 1;
+	blob->state = SPDK_BLOB_STATE_CLEAN;
+	_spdk_blob_persist_write_page_chain(seq, ctx, 0);
+}
+
+static void
+_spdk_blob_persist_dirty_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx *ctx = cb_arg;
+
+	ctx->blob->bs->clean = 0;
+
+	spdk_dma_free(ctx->super);
+
+	_spdk_blob_persist_start(ctx);
+}
+
+static void
+_spdk_bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
+		     struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+
+static void
+_spdk_blob_persist_dirty(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_persist_ctx *ctx = cb_arg;
+
+	ctx->super->clean = 0;
+	if (ctx->super->size == 0) {
+		ctx->super->size = ctx->blob->bs->dev->blockcnt * ctx->blob->bs->dev->blocklen;
+	}
+
+	_spdk_bs_write_super(seq, ctx->blob->bs, ctx->super, _spdk_blob_persist_dirty_cpl, ctx);
+}
+
+
+/* Write a blob to disk */
+static void
+_spdk_blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
+		   spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_blob_persist_ctx *ctx;
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (blob->state == SPDK_BLOB_STATE_CLEAN) {
+		cb_fn(seq, cb_arg, 0);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(seq, cb_arg, -ENOMEM);
+		return;
+	}
+	ctx->blob = blob;
+	ctx->seq = seq;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	if (blob->bs->clean) {
+		ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+		if (!ctx->super) {
+			cb_fn(seq, cb_arg, -ENOMEM);
+			free(ctx);
+			return;
+		}
+
+		spdk_bs_sequence_read_dev(seq, ctx->super, _spdk_bs_page_to_lba(blob->bs, 0),
+					  _spdk_bs_byte_to_lba(blob->bs, sizeof(*ctx->super)),
+					  _spdk_blob_persist_dirty, ctx);
+	} else {
+		_spdk_blob_persist_start(ctx);
+	}
+}
+
+struct spdk_blob_copy_cluster_ctx {
+	struct spdk_blob *blob;
+	uint8_t *buf;
+	uint64_t page;
+	uint64_t new_cluster;
+	spdk_bs_sequence_t *seq;
+};
+
+static void
+_spdk_blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
+	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
+	TAILQ_HEAD(, spdk_bs_request_set) requests;
+	spdk_bs_user_op_t *op;
+
+	TAILQ_INIT(&requests);
+	TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
+
+	while (!TAILQ_EMPTY(&requests)) {
+		op = TAILQ_FIRST(&requests);
+		TAILQ_REMOVE(&requests, op, link);
+		if (bserrno == 0) {
+			spdk_bs_user_op_execute(op);
+		} else {
+			spdk_bs_user_op_abort(op);
+		}
+	}
+
+	spdk_dma_free(ctx->buf);
+	free(ctx);
+}
+
+static void
+_spdk_blob_insert_cluster_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
+
+	if (bserrno) {
+		uint32_t cluster_number;
+
+		if (bserrno == -EEXIST) {
+			/* The metadata insert failed because another thread
+			 * allocated the cluster first. Free our cluster
+			 * but continue without error. */
+			bserrno = 0;
+		}
+
+		cluster_number = _spdk_bs_page_to_cluster(ctx->blob->bs, ctx->page);
+		_spdk_bs_release_cluster(ctx->blob->bs, cluster_number);
+	}
+
+	spdk_bs_sequence_finish(ctx->seq, bserrno);
+}
+
+static void
+_spdk_blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
+	uint32_t cluster_number;
+
+	if (bserrno) {
+		/* The write failed, so jump to the final completion handler */
+		spdk_bs_sequence_finish(seq, bserrno);
+		return;
+	}
+
+	cluster_number = _spdk_bs_page_to_cluster(ctx->blob->bs, ctx->page);
+
+	_spdk_blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
+					       _spdk_blob_insert_cluster_cpl, ctx);
+}
+
+static void
+_spdk_blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
+
+	if (bserrno != 0) {
+		/* The read failed, so jump to the final completion handler */
+		spdk_bs_sequence_finish(seq, bserrno);
+		return;
+	}
+
+	/* Write whole cluster */
+	spdk_bs_sequence_write_dev(seq, ctx->buf,
+				   _spdk_bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
+				   _spdk_bs_cluster_to_lba(ctx->blob->bs, 1),
+				   _spdk_blob_write_copy_cpl, ctx);
+}
+
+static void
+_spdk_bs_allocate_and_copy_cluster(struct spdk_blob *blob,
+				   struct spdk_io_channel *_ch,
+				   uint64_t io_unit, spdk_bs_user_op_t *op)
+{
+	struct spdk_bs_cpl cpl;
+	struct spdk_bs_channel *ch;
+	struct spdk_blob_copy_cluster_ctx *ctx;
+	uint32_t cluster_start_page;
+	uint32_t cluster_number;
+	int rc;
+
+	ch = spdk_io_channel_get_ctx(_ch);
+
+	if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
+		/* There are already operations pending. Queue this user op
+		 * and return because it will be re-executed when the outstanding
+		 * cluster allocation completes. */
+		TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
+		return;
+	}
+
+	/* Round the io_unit offset down to the first page in the cluster */
+	cluster_start_page = _spdk_bs_io_unit_to_cluster_start(blob, io_unit);
+
+	/* Calculate which index in the metadata cluster array the corresponding
+	 * cluster is supposed to be at. */
+	cluster_number = _spdk_bs_io_unit_to_cluster_number(blob, io_unit);
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_bs_user_op_abort(op);
+		return;
+	}
+
+	assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
+
+	ctx->blob = blob;
+	ctx->page = cluster_start_page;
+
+	if (blob->parent_id != SPDK_BLOBID_INVALID) {
+		ctx->buf = spdk_dma_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, NULL);
+		if (!ctx->buf) {
+			SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
+				    blob->bs->cluster_sz);
+			free(ctx);
+			spdk_bs_user_op_abort(op);
+			return;
+		}
+	}
+
+	rc = _spdk_bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, false);
+	if (rc != 0) {
+		spdk_dma_free(ctx->buf);
+		free(ctx);
+		spdk_bs_user_op_abort(op);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = _spdk_blob_allocate_and_copy_cluster_cpl;
+	cpl.u.blob_basic.cb_arg = ctx;
+
+	ctx->seq = spdk_bs_sequence_start(_ch, &cpl);
+	if (!ctx->seq) {
+		_spdk_bs_release_cluster(blob->bs, ctx->new_cluster);
+		spdk_dma_free(ctx->buf);
+		free(ctx);
+		spdk_bs_user_op_abort(op);
+		return;
+	}
+
+	/* Queue the user op to block other incoming operations */
+	TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
+
+	if (blob->parent_id != SPDK_BLOBID_INVALID) {
+		/* Read cluster from backing device */
+		spdk_bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
+					     _spdk_bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
+					     _spdk_bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
+					     _spdk_blob_write_copy, ctx);
+	} else {
+		_spdk_blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
+						       _spdk_blob_insert_cluster_cpl, ctx);
+	}
+}
+
+static void
+_spdk_blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
+				       uint64_t *lba,	uint32_t *lba_count)
+{
+	*lba_count = length;
+
+	if (!_spdk_bs_io_unit_is_allocated(blob, io_unit)) {
+		assert(blob->back_bs_dev != NULL);
+		*lba = _spdk_bs_io_unit_to_back_dev_lba(blob, io_unit);
+		*lba_count = _spdk_bs_io_unit_to_back_dev_lba(blob, *lba_count);
+	} else {
+		*lba = _spdk_bs_blob_io_unit_to_lba(blob, io_unit);
+	}
+}
+
+struct op_split_ctx {
+	struct spdk_blob *blob;
+	struct spdk_io_channel *channel;
+	uint64_t io_unit_offset;
+	uint64_t io_units_remaining;
+	void *curr_payload;
+	enum spdk_blob_op_type op_type;
+	spdk_bs_sequence_t *seq;
+};
+
+static void
+_spdk_blob_request_submit_op_split_next(void *cb_arg, int bserrno)
+{
+	struct op_split_ctx	*ctx = cb_arg;
+	struct spdk_blob	*blob = ctx->blob;
+	struct spdk_io_channel	*ch = ctx->channel;
+	enum spdk_blob_op_type	op_type = ctx->op_type;
+	uint8_t			*buf = ctx->curr_payload;
+	uint64_t		offset = ctx->io_unit_offset;
+	uint64_t		length = ctx->io_units_remaining;
+	uint64_t		op_length;
+
+	if (bserrno != 0 || ctx->io_units_remaining == 0) {
+		spdk_bs_sequence_finish(ctx->seq, bserrno);
+		free(ctx);
+		return;
+	}
+
+	op_length = spdk_min(length, _spdk_bs_num_io_units_to_cluster_boundary(blob,
+			     offset));
+
+	/* Update length and payload for next operation */
+	ctx->io_units_remaining -= op_length;
+	ctx->io_unit_offset += op_length;
+	if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
+		ctx->curr_payload += op_length * blob->bs->io_unit_size;
+	}
+
+	switch (op_type) {
+	case SPDK_BLOB_READ:
+		spdk_blob_io_read(blob, ch, buf, offset, op_length,
+				  _spdk_blob_request_submit_op_split_next, ctx);
+		break;
+	case SPDK_BLOB_WRITE:
+		spdk_blob_io_write(blob, ch, buf, offset, op_length,
+				   _spdk_blob_request_submit_op_split_next, ctx);
+		break;
+	case SPDK_BLOB_UNMAP:
+		spdk_blob_io_unmap(blob, ch, offset, op_length,
+				   _spdk_blob_request_submit_op_split_next, ctx);
+		break;
+	case SPDK_BLOB_WRITE_ZEROES:
+		spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
+					  _spdk_blob_request_submit_op_split_next, ctx);
+		break;
+	case SPDK_BLOB_READV:
+	case SPDK_BLOB_WRITEV:
+		SPDK_ERRLOG("readv/write not valid for %s\n", __func__);
+		spdk_bs_sequence_finish(ctx->seq, -EINVAL);
+		free(ctx);
+		break;
+	}
+}
+
+static void
+_spdk_blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
+				   void *payload, uint64_t offset, uint64_t length,
+				   spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
+{
+	struct op_split_ctx *ctx;
+	spdk_bs_sequence_t *seq;
+	struct spdk_bs_cpl cpl;
+
+	assert(blob != NULL);
+
+	ctx = calloc(1, sizeof(struct op_split_ctx));
+	if (ctx == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = cb_fn;
+	cpl.u.blob_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(ch, &cpl);
+	if (!seq) {
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->blob = blob;
+	ctx->channel = ch;
+	ctx->curr_payload = payload;
+	ctx->io_unit_offset = offset;
+	ctx->io_units_remaining = length;
+	ctx->op_type = op_type;
+	ctx->seq = seq;
+
+	_spdk_blob_request_submit_op_split_next(ctx, 0);
+}
+
+static void
+_spdk_blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
+				    void *payload, uint64_t offset, uint64_t length,
+				    spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
+{
+	struct spdk_bs_cpl cpl;
+	uint64_t lba;
+	uint32_t lba_count;
+
+	assert(blob != NULL);
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = cb_fn;
+	cpl.u.blob_basic.cb_arg = cb_arg;
+
+	_spdk_blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
+
+	if (blob->frozen_refcnt) {
+		/* This blob I/O is frozen */
+		spdk_bs_user_op_t *op;
+		struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
+
+		op = spdk_bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
+		if (!op) {
+			cb_fn(cb_arg, -ENOMEM);
+			return;
+		}
+
+		TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
+
+		return;
+	}
+
+	switch (op_type) {
+	case SPDK_BLOB_READ: {
+		spdk_bs_batch_t *batch;
+
+		batch = spdk_bs_batch_open(_ch, &cpl);
+		if (!batch) {
+			cb_fn(cb_arg, -ENOMEM);
+			return;
+		}
+
+		if (_spdk_bs_io_unit_is_allocated(blob, offset)) {
+			/* Read from the blob */
+			spdk_bs_batch_read_dev(batch, payload, lba, lba_count);
+		} else {
+			/* Read from the backing block device */
+			spdk_bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
+		}
+
+		spdk_bs_batch_close(batch);
+		break;
+	}
+	case SPDK_BLOB_WRITE:
+	case SPDK_BLOB_WRITE_ZEROES: {
+		if (_spdk_bs_io_unit_is_allocated(blob, offset)) {
+			/* Write to the blob */
+			spdk_bs_batch_t *batch;
+
+			if (lba_count == 0) {
+				cb_fn(cb_arg, 0);
+				return;
+			}
+
+			batch = spdk_bs_batch_open(_ch, &cpl);
+			if (!batch) {
+				cb_fn(cb_arg, -ENOMEM);
+				return;
+			}
+
+			if (op_type == SPDK_BLOB_WRITE) {
+				spdk_bs_batch_write_dev(batch, payload, lba, lba_count);
+			} else {
+				spdk_bs_batch_write_zeroes_dev(batch, lba, lba_count);
+			}
+
+			spdk_bs_batch_close(batch);
+		} else {
+			/* Queue this operation and allocate the cluster */
+			spdk_bs_user_op_t *op;
+
+			op = spdk_bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
+			if (!op) {
+				cb_fn(cb_arg, -ENOMEM);
+				return;
+			}
+
+			_spdk_bs_allocate_and_copy_cluster(blob, _ch, offset, op);
+		}
+		break;
+	}
+	case SPDK_BLOB_UNMAP: {
+		spdk_bs_batch_t *batch;
+
+		batch = spdk_bs_batch_open(_ch, &cpl);
+		if (!batch) {
+			cb_fn(cb_arg, -ENOMEM);
+			return;
+		}
+
+		if (_spdk_bs_io_unit_is_allocated(blob, offset)) {
+			spdk_bs_batch_unmap_dev(batch, lba, lba_count);
+		}
+
+		spdk_bs_batch_close(batch);
+		break;
+	}
+	case SPDK_BLOB_READV:
+	case SPDK_BLOB_WRITEV:
+		SPDK_ERRLOG("readv/write not valid\n");
+		cb_fn(cb_arg, -EINVAL);
+		break;
+	}
+}
+
+static void
+_spdk_blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
+			     void *payload, uint64_t offset, uint64_t length,
+			     spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
+{
+	assert(blob != NULL);
+
+	if (blob->data_ro && op_type != SPDK_BLOB_READ) {
+		cb_fn(cb_arg, -EPERM);
+		return;
+	}
+
+	if (offset + length > _spdk_bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
+		cb_fn(cb_arg, -EINVAL);
+		return;
+	}
+	if (length <= _spdk_bs_num_io_units_to_cluster_boundary(blob, offset)) {
+		_spdk_blob_request_submit_op_single(_channel, blob, payload, offset, length,
+						    cb_fn, cb_arg, op_type);
+	} else {
+		_spdk_blob_request_submit_op_split(_channel, blob, payload, offset, length,
+						   cb_fn, cb_arg, op_type);
+	}
+}
+
+struct rw_iov_ctx {
+	struct spdk_blob *blob;
+	struct spdk_io_channel *channel;
+	spdk_blob_op_complete cb_fn;
+	void *cb_arg;
+	bool read;
+	int iovcnt;
+	struct iovec *orig_iov;
+	uint64_t io_unit_offset;
+	uint64_t io_units_remaining;
+	uint64_t io_units_done;
+	struct iovec iov[0];
+};
+
+static void
+_spdk_rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	assert(cb_arg == NULL);
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static void
+_spdk_rw_iov_split_next(void *cb_arg, int bserrno)
+{
+	struct rw_iov_ctx *ctx = cb_arg;
+	struct spdk_blob *blob = ctx->blob;
+	struct iovec *iov, *orig_iov;
+	int iovcnt;
+	size_t orig_iovoff;
+	uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
+	uint64_t byte_count;
+
+	if (bserrno != 0 || ctx->io_units_remaining == 0) {
+		ctx->cb_fn(ctx->cb_arg, bserrno);
+		free(ctx);
+		return;
+	}
+
+	io_unit_offset = ctx->io_unit_offset;
+	io_units_to_boundary = _spdk_bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
+	io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
+	/*
+	 * Get index and offset into the original iov array for our current position in the I/O sequence.
+	 *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
+	 *  point to the current position in the I/O sequence.
+	 */
+	byte_count = ctx->io_units_done * blob->bs->io_unit_size;
+	orig_iov = &ctx->orig_iov[0];
+	orig_iovoff = 0;
+	while (byte_count > 0) {
+		if (byte_count >= orig_iov->iov_len) {
+			byte_count -= orig_iov->iov_len;
+			orig_iov++;
+		} else {
+			orig_iovoff = byte_count;
+			byte_count = 0;
+		}
+	}
+
+	/*
+	 * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
+	 *  bytes of this next I/O remain to be accounted for in the new iov array.
+	 */
+	byte_count = io_units_count * blob->bs->io_unit_size;
+	iov = &ctx->iov[0];
+	iovcnt = 0;
+	while (byte_count > 0) {
+		assert(iovcnt < ctx->iovcnt);
+		iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
+		iov->iov_base = orig_iov->iov_base + orig_iovoff;
+		byte_count -= iov->iov_len;
+		orig_iovoff = 0;
+		orig_iov++;
+		iov++;
+		iovcnt++;
+	}
+
+	ctx->io_unit_offset += io_units_count;
+	ctx->io_units_remaining -= io_units_count;
+	ctx->io_units_done += io_units_count;
+	iov = &ctx->iov[0];
+
+	if (ctx->read) {
+		spdk_blob_io_readv(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
+				   io_units_count, _spdk_rw_iov_split_next, ctx);
+	} else {
+		spdk_blob_io_writev(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
+				    io_units_count, _spdk_rw_iov_split_next, ctx);
+	}
+}
+
+static void
+_spdk_blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
+				 struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+				 spdk_blob_op_complete cb_fn, void *cb_arg, bool read)
+{
+	struct spdk_bs_cpl	cpl;
+
+	assert(blob != NULL);
+
+	if (!read && blob->data_ro) {
+		cb_fn(cb_arg, -EPERM);
+		return;
+	}
+
+	if (length == 0) {
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	if (offset + length > _spdk_bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
+		cb_fn(cb_arg, -EINVAL);
+		return;
+	}
+
+	/*
+	 * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
+	 *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
+	 *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
+	 *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
+	 *  to allocate a separate iov array and split the I/O such that none of the resulting
+	 *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
+	 *  but since this case happens very infrequently, any performance impact will be negligible.
+	 *
+	 * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
+	 *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
+	 *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
+	 *  when the batch was completed, to allow for freeing the memory for the iov arrays.
+	 */
+	if (spdk_likely(length <= _spdk_bs_num_io_units_to_cluster_boundary(blob, offset))) {
+		uint32_t lba_count;
+		uint64_t lba;
+
+		_spdk_blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
+
+		cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+		cpl.u.blob_basic.cb_fn = cb_fn;
+		cpl.u.blob_basic.cb_arg = cb_arg;
+		if (blob->frozen_refcnt) {
+			/* This blob I/O is frozen */
+			spdk_bs_user_op_t *op;
+			struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
+
+			op = spdk_bs_user_op_alloc(_channel, &cpl, read, blob, iov, iovcnt, offset, length);
+			if (!op) {
+				cb_fn(cb_arg, -ENOMEM);
+				return;
+			}
+
+			TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
+
+			return;
+		}
+
+		if (read) {
+			spdk_bs_sequence_t *seq;
+
+			seq = spdk_bs_sequence_start(_channel, &cpl);
+			if (!seq) {
+				cb_fn(cb_arg, -ENOMEM);
+				return;
+			}
+
+			if (_spdk_bs_io_unit_is_allocated(blob, offset)) {
+				spdk_bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, _spdk_rw_iov_done, NULL);
+			} else {
+				spdk_bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
+							      _spdk_rw_iov_done, NULL);
+			}
+		} else {
+			if (_spdk_bs_io_unit_is_allocated(blob, offset)) {
+				spdk_bs_sequence_t *seq;
+
+				seq = spdk_bs_sequence_start(_channel, &cpl);
+				if (!seq) {
+					cb_fn(cb_arg, -ENOMEM);
+					return;
+				}
+
+				spdk_bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, _spdk_rw_iov_done, NULL);
+			} else {
+				/* Queue this operation and allocate the cluster */
+				spdk_bs_user_op_t *op;
+
+				op = spdk_bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
+							   length);
+				if (!op) {
+					cb_fn(cb_arg, -ENOMEM);
+					return;
+				}
+
+				_spdk_bs_allocate_and_copy_cluster(blob, _channel, offset, op);
+			}
+		}
+	} else {
+		struct rw_iov_ctx *ctx;
+
+		ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
+		if (ctx == NULL) {
+			cb_fn(cb_arg, -ENOMEM);
+			return;
+		}
+
+		ctx->blob = blob;
+		ctx->channel = _channel;
+		ctx->cb_fn = cb_fn;
+		ctx->cb_arg = cb_arg;
+		ctx->read = read;
+		ctx->orig_iov = iov;
+		ctx->iovcnt = iovcnt;
+		ctx->io_unit_offset = offset;
+		ctx->io_units_remaining = length;
+		ctx->io_units_done = 0;
+
+		_spdk_rw_iov_split_next(ctx, 0);
+	}
+}
+
+static struct spdk_blob *
+_spdk_blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
+{
+	struct spdk_blob *blob;
+
+	TAILQ_FOREACH(blob, &bs->blobs, link) {
+		if (blob->id == blobid) {
+			return blob;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+_spdk_bs_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_blob_store		*bs = io_device;
+	struct spdk_bs_channel		*channel = ctx_buf;
+	struct spdk_bs_dev		*dev;
+	uint32_t			max_ops = bs->max_channel_ops;
+	uint32_t			i;
+
+	dev = bs->dev;
+
+	channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
+	if (!channel->req_mem) {
+		return -1;
+	}
+
+	TAILQ_INIT(&channel->reqs);
+
+	for (i = 0; i < max_ops; i++) {
+		TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
+	}
+
+	channel->bs = bs;
+	channel->dev = dev;
+	channel->dev_channel = dev->create_channel(dev);
+
+	if (!channel->dev_channel) {
+		SPDK_ERRLOG("Failed to create device channel.\n");
+		free(channel->req_mem);
+		return -1;
+	}
+
+	TAILQ_INIT(&channel->need_cluster_alloc);
+	TAILQ_INIT(&channel->queued_io);
+
+	return 0;
+}
+
+static void
+_spdk_bs_channel_destroy(void *io_device, void *ctx_buf)
+{
+	struct spdk_bs_channel *channel = ctx_buf;
+	spdk_bs_user_op_t *op;
+
+	while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
+		op = TAILQ_FIRST(&channel->need_cluster_alloc);
+		TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
+		spdk_bs_user_op_abort(op);
+	}
+
+	while (!TAILQ_EMPTY(&channel->queued_io)) {
+		op = TAILQ_FIRST(&channel->queued_io);
+		TAILQ_REMOVE(&channel->queued_io, op, link);
+		spdk_bs_user_op_abort(op);
+	}
+
+	free(channel->req_mem);
+	channel->dev->destroy_channel(channel->dev, channel->dev_channel);
+}
+
+static void
+_spdk_bs_dev_destroy(void *io_device)
+{
+	struct spdk_blob_store *bs = io_device;
+	struct spdk_blob	*blob, *blob_tmp;
+
+	bs->dev->destroy(bs->dev);
+
+	TAILQ_FOREACH_SAFE(blob, &bs->blobs, link, blob_tmp) {
+		TAILQ_REMOVE(&bs->blobs, blob, link);
+		_spdk_blob_free(blob);
+	}
+
+	pthread_mutex_destroy(&bs->used_clusters_mutex);
+
+	spdk_bit_array_free(&bs->used_blobids);
+	spdk_bit_array_free(&bs->used_md_pages);
+	spdk_bit_array_free(&bs->used_clusters);
+	/*
+	 * If this function is called for any reason except a successful unload,
+	 * the unload_cpl type will be NONE and this will be a nop.
+	 */
+	spdk_bs_call_cpl(&bs->unload_cpl, bs->unload_err);
+
+	free(bs);
+}
+
+static int
+_spdk_bs_blob_list_add(struct spdk_blob *blob)
+{
+	spdk_blob_id snapshot_id;
+	struct spdk_blob_list *snapshot_entry = NULL;
+	struct spdk_blob_list *clone_entry = NULL;
+
+	assert(blob != NULL);
+
+	snapshot_id = blob->parent_id;
+	if (snapshot_id == SPDK_BLOBID_INVALID) {
+		return 0;
+	}
+
+	TAILQ_FOREACH(snapshot_entry, &blob->bs->snapshots, link) {
+		if (snapshot_entry->id == snapshot_id) {
+			break;
+		}
+	}
+
+	if (snapshot_entry == NULL) {
+		/* Snapshot not found */
+		snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
+		if (snapshot_entry == NULL) {
+			return -ENOMEM;
+		}
+		snapshot_entry->id = snapshot_id;
+		TAILQ_INIT(&snapshot_entry->clones);
+		TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
+	} else {
+		TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
+			if (clone_entry->id == blob->id) {
+				break;
+			}
+		}
+	}
+
+	if (clone_entry == NULL) {
+		/* Clone not found */
+		clone_entry = calloc(1, sizeof(struct spdk_blob_list));
+		if (clone_entry == NULL) {
+			return -ENOMEM;
+		}
+		clone_entry->id = blob->id;
+		TAILQ_INIT(&clone_entry->clones);
+		TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
+		snapshot_entry->clone_count++;
+	}
+
+	return 0;
+}
+
+static int
+_spdk_bs_blob_list_remove(struct spdk_blob *blob)
+{
+	struct spdk_blob_list *snapshot_entry = NULL;
+	struct spdk_blob_list *clone_entry = NULL;
+	spdk_blob_id snapshot_id;
+
+	assert(blob != NULL);
+
+	snapshot_id = blob->parent_id;
+	if (snapshot_id == SPDK_BLOBID_INVALID) {
+		return 0;
+	}
+
+	TAILQ_FOREACH(snapshot_entry, &blob->bs->snapshots, link) {
+		if (snapshot_entry->id == snapshot_id) {
+			break;
+		}
+	}
+
+	assert(snapshot_entry != NULL);
+
+	TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
+		if (clone_entry->id == blob->id) {
+			break;
+		}
+	}
+
+	assert(clone_entry != NULL);
+
+	blob->parent_id = SPDK_BLOBID_INVALID;
+	TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
+	free(clone_entry);
+
+	snapshot_entry->clone_count--;
+	if (snapshot_entry->clone_count == 0) {
+		/* Snapshot have no more clones */
+		TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
+		free(snapshot_entry);
+	}
+
+	return 0;
+}
+
+static int
+_spdk_bs_blob_list_free(struct spdk_blob_store *bs)
+{
+	struct spdk_blob_list *snapshot_entry;
+	struct spdk_blob_list *snapshot_entry_tmp;
+	struct spdk_blob_list *clone_entry;
+	struct spdk_blob_list *clone_entry_tmp;
+
+	TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
+		TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
+			TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
+			free(clone_entry);
+		}
+		TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
+		free(snapshot_entry);
+	}
+
+	return 0;
+}
+
+static void
+_spdk_bs_free(struct spdk_blob_store *bs)
+{
+	_spdk_bs_blob_list_free(bs);
+
+	spdk_bs_unregister_md_thread(bs);
+	spdk_io_device_unregister(bs, _spdk_bs_dev_destroy);
+}
+
+void
+spdk_bs_opts_init(struct spdk_bs_opts *opts)
+{
+	opts->cluster_sz = SPDK_BLOB_OPTS_CLUSTER_SZ;
+	opts->num_md_pages = SPDK_BLOB_OPTS_NUM_MD_PAGES;
+	opts->max_md_ops = SPDK_BLOB_OPTS_MAX_MD_OPS;
+	opts->max_channel_ops = SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS;
+	memset(&opts->bstype, 0, sizeof(opts->bstype));
+	opts->iter_cb_fn = NULL;
+	opts->iter_cb_arg = NULL;
+}
+
+static int
+_spdk_bs_opts_verify(struct spdk_bs_opts *opts)
+{
+	if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
+	    opts->max_channel_ops == 0) {
+		SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+_spdk_bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs)
+{
+	struct spdk_blob_store	*bs;
+	uint64_t dev_size;
+	int rc;
+
+	dev_size = dev->blocklen * dev->blockcnt;
+	if (dev_size < opts->cluster_sz) {
+		/* Device size cannot be smaller than cluster size of blobstore */
+		SPDK_INFOLOG(SPDK_LOG_BLOB, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
+			     dev_size, opts->cluster_sz);
+		return -ENOSPC;
+	}
+	if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
+		/* Cluster size cannot be smaller than page size */
+		SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
+			    opts->cluster_sz, SPDK_BS_PAGE_SIZE);
+		return -EINVAL;
+	}
+	bs = calloc(1, sizeof(struct spdk_blob_store));
+	if (!bs) {
+		return -ENOMEM;
+	}
+
+	TAILQ_INIT(&bs->blobs);
+	TAILQ_INIT(&bs->snapshots);
+	bs->dev = dev;
+	bs->md_thread = spdk_get_thread();
+	assert(bs->md_thread != NULL);
+
+	/*
+	 * Do not use _spdk_bs_lba_to_cluster() here since blockcnt may not be an
+	 *  even multiple of the cluster size.
+	 */
+	bs->cluster_sz = opts->cluster_sz;
+	bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
+	bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
+	bs->num_free_clusters = bs->total_clusters;
+	bs->used_clusters = spdk_bit_array_create(bs->total_clusters);
+	bs->io_unit_size = dev->blocklen;
+	if (bs->used_clusters == NULL) {
+		free(bs);
+		return -ENOMEM;
+	}
+
+	bs->max_channel_ops = opts->max_channel_ops;
+	bs->super_blob = SPDK_BLOBID_INVALID;
+	memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
+
+	/* The metadata is assumed to be at least 1 page */
+	bs->used_md_pages = spdk_bit_array_create(1);
+	bs->used_blobids = spdk_bit_array_create(0);
+
+	pthread_mutex_init(&bs->used_clusters_mutex, NULL);
+
+	spdk_io_device_register(bs, _spdk_bs_channel_create, _spdk_bs_channel_destroy,
+				sizeof(struct spdk_bs_channel), "blobstore");
+	rc = spdk_bs_register_md_thread(bs);
+	if (rc == -1) {
+		spdk_io_device_unregister(bs, NULL);
+		pthread_mutex_destroy(&bs->used_clusters_mutex);
+		spdk_bit_array_free(&bs->used_blobids);
+		spdk_bit_array_free(&bs->used_md_pages);
+		spdk_bit_array_free(&bs->used_clusters);
+		free(bs);
+		/* FIXME: this is a lie but don't know how to get a proper error code here */
+		return -ENOMEM;
+	}
+
+	*_bs = bs;
+	return 0;
+}
+
+/* START spdk_bs_load, spdk_bs_load_ctx will used for both load and unload. */
+
+struct spdk_bs_load_ctx {
+	struct spdk_blob_store		*bs;
+	struct spdk_bs_super_block	*super;
+
+	struct spdk_bs_md_mask		*mask;
+	bool				in_page_chain;
+	uint32_t			page_index;
+	uint32_t			cur_page;
+	struct spdk_blob_md_page	*page;
+	bool				is_load;
+
+	spdk_bs_sequence_t			*seq;
+	spdk_blob_op_with_handle_complete	iter_cb_fn;
+	void					*iter_cb_arg;
+};
+
+static void
+_spdk_bs_load_ctx_fail(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
+{
+	assert(bserrno != 0);
+
+	spdk_dma_free(ctx->super);
+	spdk_bs_sequence_finish(seq, bserrno);
+	/*
+	 * Only free the blobstore when a load fails.  If an unload fails (for some reason)
+	 *  we want to keep the blobstore in case the caller wants to try again.
+	 */
+	if (ctx->is_load) {
+		_spdk_bs_free(ctx->bs);
+	}
+	free(ctx);
+}
+
+static void
+_spdk_bs_set_mask(struct spdk_bit_array *array, struct spdk_bs_md_mask *mask)
+{
+	uint32_t i = 0;
+
+	while (true) {
+		i = spdk_bit_array_find_first_set(array, i);
+		if (i >= mask->length) {
+			break;
+		}
+		mask->mask[i / 8] |= 1U << (i % 8);
+		i++;
+	}
+}
+
+static int
+_spdk_bs_load_mask(struct spdk_bit_array **array_ptr, struct spdk_bs_md_mask *mask)
+{
+	struct spdk_bit_array *array;
+	uint32_t i;
+
+	if (spdk_bit_array_resize(array_ptr, mask->length) < 0) {
+		return -ENOMEM;
+	}
+
+	array = *array_ptr;
+	for (i = 0; i < mask->length; i++) {
+		if (mask->mask[i / 8] & (1U << (i % 8))) {
+			spdk_bit_array_set(array, i);
+		}
+	}
+
+	return 0;
+}
+
+static void
+_spdk_bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
+		     struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	/* Update the values in the super block */
+	super->super_blob = bs->super_blob;
+	memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
+	super->crc = _spdk_blob_md_page_calc_crc(super);
+	spdk_bs_sequence_write_dev(seq, super, _spdk_bs_page_to_lba(bs, 0),
+				   _spdk_bs_byte_to_lba(bs, sizeof(*super)),
+				   cb_fn, cb_arg);
+}
+
+static void
+_spdk_bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
+{
+	struct spdk_bs_load_ctx	*ctx = arg;
+	uint64_t	mask_size, lba, lba_count;
+
+	/* Write out the used clusters mask */
+	mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
+	ctx->mask->length = ctx->bs->total_clusters;
+	assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_clusters));
+
+	_spdk_bs_set_mask(ctx->bs->used_clusters, ctx->mask);
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
+	spdk_bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
+}
+
+static void
+_spdk_bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
+{
+	struct spdk_bs_load_ctx	*ctx = arg;
+	uint64_t	mask_size, lba, lba_count;
+
+	mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
+	ctx->mask->length = ctx->super->md_len;
+	assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
+
+	_spdk_bs_set_mask(ctx->bs->used_md_pages, ctx->mask);
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
+	spdk_bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
+}
+
+static void
+_spdk_bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
+{
+	struct spdk_bs_load_ctx	*ctx = arg;
+	uint64_t	mask_size, lba, lba_count;
+
+	if (ctx->super->used_blobid_mask_len == 0) {
+		/*
+		 * This is a pre-v3 on-disk format where the blobid mask does not get
+		 *  written to disk.
+		 */
+		cb_fn(seq, arg, 0);
+		return;
+	}
+
+	mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
+	ctx->mask->length = ctx->super->md_len;
+	assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
+
+	_spdk_bs_set_mask(ctx->bs->used_blobids, ctx->mask);
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
+	spdk_bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
+}
+
+static void
+_spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = arg;
+
+	if (bserrno == 0) {
+		if (ctx->iter_cb_fn) {
+			ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
+		}
+		_spdk_bs_blob_list_add(blob);
+		spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx);
+		return;
+	}
+
+	if (bserrno == -ENOENT) {
+		bserrno = 0;
+	} else {
+		/*
+		 * This case needs to be looked at further.  Same problem
+		 *  exists with applications that rely on explicit blob
+		 *  iteration.  We should just skip the blob that failed
+		 *  to load and continue on to the next one.
+		 */
+		SPDK_ERRLOG("Error in iterating blobs\n");
+	}
+
+	ctx->iter_cb_fn = NULL;
+
+	spdk_dma_free(ctx->super);
+	spdk_dma_free(ctx->mask);
+	spdk_bs_sequence_finish(ctx->seq, bserrno);
+	free(ctx);
+}
+
+static void
+_spdk_bs_load_complete(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
+{
+	ctx->seq = seq;
+	spdk_bs_iter_first(ctx->bs, _spdk_bs_load_iter, ctx);
+}
+
+static void
+_spdk_bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	int rc;
+
+	/* The type must be correct */
+	assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
+
+	/* The length of the mask (in bits) must not be greater than
+	 * the length of the buffer (converted to bits) */
+	assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
+
+	/* The length of the mask must be exactly equal to the size
+	 * (in pages) of the metadata region */
+	assert(ctx->mask->length == ctx->super->md_len);
+
+	rc = _spdk_bs_load_mask(&ctx->bs->used_blobids, ctx->mask);
+	if (rc < 0) {
+		spdk_dma_free(ctx->mask);
+		_spdk_bs_load_ctx_fail(seq, ctx, rc);
+		return;
+	}
+
+	_spdk_bs_load_complete(seq, ctx, bserrno);
+}
+
+static void
+_spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	uint64_t		lba, lba_count, mask_size;
+	int			rc;
+
+	/* The type must be correct */
+	assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
+	/* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
+	assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
+					     struct spdk_blob_md_page) * 8));
+	/* The length of the mask must be exactly equal to the total number of clusters */
+	assert(ctx->mask->length == ctx->bs->total_clusters);
+
+	rc = _spdk_bs_load_mask(&ctx->bs->used_clusters, ctx->mask);
+	if (rc < 0) {
+		spdk_dma_free(ctx->mask);
+		_spdk_bs_load_ctx_fail(seq, ctx, rc);
+		return;
+	}
+
+	ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->bs->used_clusters);
+	assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
+
+	spdk_dma_free(ctx->mask);
+
+	/* Read the used blobids mask */
+	mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
+	spdk_bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
+				  _spdk_bs_load_used_blobids_cpl, ctx);
+}
+
+static void
+_spdk_bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	uint64_t		lba, lba_count, mask_size;
+	int			rc;
+
+	/* The type must be correct */
+	assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
+	/* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
+	assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
+				     8));
+	/* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
+	assert(ctx->mask->length == ctx->super->md_len);
+
+	rc = _spdk_bs_load_mask(&ctx->bs->used_md_pages, ctx->mask);
+	if (rc < 0) {
+		spdk_dma_free(ctx->mask);
+		_spdk_bs_load_ctx_fail(seq, ctx, rc);
+		return;
+	}
+
+	spdk_dma_free(ctx->mask);
+
+	/* Read the used clusters mask */
+	mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
+	spdk_bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
+				  _spdk_bs_load_used_clusters_cpl, ctx);
+}
+
+static void
+_spdk_bs_load_read_used_pages(spdk_bs_sequence_t *seq, void *cb_arg)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+	uint64_t lba, lba_count, mask_size;
+
+	/* Read the used pages mask */
+	mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
+	ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
+	if (!ctx->mask) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
+	lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
+	spdk_bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
+				  _spdk_bs_load_used_pages_cpl, ctx);
+}
+
+static int
+_spdk_bs_load_replay_md_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob_store *bs)
+{
+	struct spdk_blob_md_descriptor *desc;
+	size_t	cur_desc = 0;
+
+	desc = (struct spdk_blob_md_descriptor *)page->descriptors;
+	while (cur_desc < sizeof(page->descriptors)) {
+		if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
+			if (desc->length == 0) {
+				/* If padding and length are 0, this terminates the page */
+				break;
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT) {
+			struct spdk_blob_md_descriptor_extent	*desc_extent;
+			unsigned int				i, j;
+			unsigned int				cluster_count = 0;
+			uint32_t				cluster_idx;
+
+			desc_extent = (struct spdk_blob_md_descriptor_extent *)desc;
+
+			for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) {
+				for (j = 0; j < desc_extent->extents[i].length; j++) {
+					cluster_idx = desc_extent->extents[i].cluster_idx;
+					/*
+					 * cluster_idx = 0 means an unallocated cluster - don't mark that
+					 * in the used cluster map.
+					 */
+					if (cluster_idx != 0) {
+						spdk_bit_array_set(bs->used_clusters, cluster_idx + j);
+						if (bs->num_free_clusters == 0) {
+							return -ENOSPC;
+						}
+						bs->num_free_clusters--;
+					}
+					cluster_count++;
+				}
+			}
+			if (cluster_count == 0) {
+				return -EINVAL;
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
+			/* Skip this item */
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
+			/* Skip this item */
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
+			/* Skip this item */
+		} else {
+			/* Error */
+			return -EINVAL;
+		}
+		/* Advance to the next descriptor */
+		cur_desc += sizeof(*desc) + desc->length;
+		if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
+			break;
+		}
+		desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
+	}
+	return 0;
+}
+
+static bool _spdk_bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
+{
+	uint32_t crc;
+
+	crc = _spdk_blob_md_page_calc_crc(ctx->page);
+	if (crc != ctx->page->crc) {
+		return false;
+	}
+
+	if (_spdk_bs_page_to_blobid(ctx->cur_page) != ctx->page->id) {
+		return false;
+	}
+	return true;
+}
+
+static void
+_spdk_bs_load_replay_cur_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
+
+static void
+_spdk_bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	_spdk_bs_load_complete(seq, ctx, bserrno);
+}
+
+static void
+_spdk_bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->mask);
+	ctx->mask = NULL;
+
+	_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl);
+}
+
+static void
+_spdk_bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->mask);
+	ctx->mask = NULL;
+
+	_spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_load_write_used_blobids_cpl);
+}
+
+static void
+_spdk_bs_load_write_used_md(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	_spdk_bs_write_used_md(seq, cb_arg, _spdk_bs_load_write_used_pages_cpl);
+}
+
+static void
+_spdk_bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	uint64_t num_md_clusters;
+	uint64_t i;
+	uint32_t page_num;
+
+	if (bserrno != 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, bserrno);
+		return;
+	}
+
+	page_num = ctx->cur_page;
+	if (_spdk_bs_load_cur_md_page_valid(ctx) == true) {
+		if (ctx->page->sequence_num == 0 || ctx->in_page_chain == true) {
+			spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
+			if (ctx->page->sequence_num == 0) {
+				spdk_bit_array_set(ctx->bs->used_blobids, page_num);
+			}
+			if (_spdk_bs_load_replay_md_parse_page(ctx->page, ctx->bs)) {
+				_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
+				return;
+			}
+			if (ctx->page->next != SPDK_INVALID_MD_PAGE) {
+				ctx->in_page_chain = true;
+				ctx->cur_page = ctx->page->next;
+				_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
+				return;
+			}
+		}
+	}
+
+	ctx->in_page_chain = false;
+
+	do {
+		ctx->page_index++;
+	} while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
+
+	if (ctx->page_index < ctx->super->md_len) {
+		ctx->cur_page = ctx->page_index;
+		_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
+	} else {
+		/* Claim all of the clusters used by the metadata */
+		num_md_clusters = divide_round_up(ctx->super->md_len, ctx->bs->pages_per_cluster);
+		for (i = 0; i < num_md_clusters; i++) {
+			_spdk_bs_claim_cluster(ctx->bs, i);
+		}
+		spdk_dma_free(ctx->page);
+		_spdk_bs_load_write_used_md(seq, ctx, bserrno);
+	}
+}
+
+static void
+_spdk_bs_load_replay_cur_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	uint64_t lba;
+
+	assert(ctx->cur_page < ctx->super->md_len);
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
+	spdk_bs_sequence_read_dev(seq, ctx->page, lba,
+				  _spdk_bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
+				  _spdk_bs_load_replay_md_cpl, ctx);
+}
+
+static void
+_spdk_bs_load_replay_md(spdk_bs_sequence_t *seq, void *cb_arg)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+
+	ctx->page_index = 0;
+	ctx->cur_page = 0;
+	ctx->page = spdk_dma_zmalloc(SPDK_BS_PAGE_SIZE,
+				     SPDK_BS_PAGE_SIZE,
+				     NULL);
+	if (!ctx->page) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+	_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
+}
+
+static void
+_spdk_bs_recover(spdk_bs_sequence_t *seq, void *cb_arg)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	int		rc;
+
+	rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
+	if (rc < 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
+	if (rc < 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters);
+	if (rc < 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+
+	ctx->bs->num_free_clusters = ctx->bs->total_clusters;
+	_spdk_bs_load_replay_md(seq, cb_arg);
+}
+
+static void
+_spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx *ctx = cb_arg;
+	uint32_t	crc;
+	int		rc;
+	static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
+
+	if (ctx->super->version > SPDK_BS_VERSION ||
+	    ctx->super->version < SPDK_BS_INITIAL_VERSION) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
+		return;
+	}
+
+	if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
+		   sizeof(ctx->super->signature)) != 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
+		return;
+	}
+
+	crc = _spdk_blob_md_page_calc_crc(ctx->super);
+	if (crc != ctx->super->crc) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
+		return;
+	}
+
+	if (memcmp(&ctx->bs->bstype, &ctx->super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Bstype matched - loading blobstore\n");
+	} else if (memcmp(&ctx->bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Bstype wildcard used - loading blobstore regardless bstype\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Unexpected bstype\n");
+		SPDK_TRACEDUMP(SPDK_LOG_BLOB, "Expected:", ctx->bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
+		SPDK_TRACEDUMP(SPDK_LOG_BLOB, "Found:", ctx->super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENXIO);
+		return;
+	}
+
+	if (ctx->super->size > ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen) {
+		SPDK_NOTICELOG("Size mismatch, dev size: %lu, blobstore size: %lu\n",
+			       ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen, ctx->super->size);
+		_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
+		return;
+	}
+
+	if (ctx->super->size == 0) {
+		ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
+	}
+
+	if (ctx->super->io_unit_size == 0) {
+		ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
+	}
+
+	/* Parse the super block */
+	ctx->bs->clean = 1;
+	ctx->bs->cluster_sz = ctx->super->cluster_size;
+	ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
+	ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
+	ctx->bs->io_unit_size = ctx->super->io_unit_size;
+	rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters);
+	if (rc < 0) {
+		_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
+		return;
+	}
+	ctx->bs->md_start = ctx->super->md_start;
+	ctx->bs->md_len = ctx->super->md_len;
+	ctx->bs->total_data_clusters = ctx->bs->total_clusters - divide_round_up(
+					       ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
+	ctx->bs->super_blob = ctx->super->super_blob;
+	memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
+
+	if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
+		_spdk_bs_recover(seq, ctx);
+	} else {
+		_spdk_bs_load_read_used_pages(seq, ctx);
+	}
+}
+
+void
+spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
+	     spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob_store	*bs;
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	struct spdk_bs_load_ctx *ctx;
+	struct spdk_bs_opts	opts = {};
+	int err;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Loading blobstore from dev %p\n", dev);
+
+	if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "unsupported dev block length of %d\n", dev->blocklen);
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	if (o) {
+		opts = *o;
+	} else {
+		spdk_bs_opts_init(&opts);
+	}
+
+	if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	err = _spdk_bs_alloc(dev, &opts, &bs);
+	if (err) {
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, err);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+	ctx->is_load = true;
+	ctx->iter_cb_fn = opts.iter_cb_fn;
+	ctx->iter_cb_arg = opts.iter_cb_arg;
+
+	/* Allocate memory for the super block */
+	ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+	if (!ctx->super) {
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
+	cpl.u.bs_handle.cb_fn = cb_fn;
+	cpl.u.bs_handle.cb_arg = cb_arg;
+	cpl.u.bs_handle.bs = bs;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	/* Read the super block */
+	spdk_bs_sequence_read_dev(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0),
+				  _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)),
+				  _spdk_bs_load_super_cpl, ctx);
+}
+
+/* END spdk_bs_load */
+
+/* START spdk_bs_dump */
+
+struct spdk_bs_dump_ctx {
+	struct spdk_blob_store		*bs;
+	struct spdk_bs_super_block	*super;
+	uint32_t			cur_page;
+	struct spdk_blob_md_page	*page;
+	spdk_bs_sequence_t		*seq;
+	FILE				*fp;
+	spdk_bs_dump_print_xattr	print_xattr_fn;
+	char				xattr_name[4096];
+};
+
+static void
+_spdk_bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_dump_ctx *ctx, int bserrno)
+{
+	spdk_dma_free(ctx->super);
+
+	/*
+	 * We need to defer calling spdk_bs_call_cpl() until after
+	 * dev destruction, so tuck these away for later use.
+	 */
+	ctx->bs->unload_err = bserrno;
+	memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
+	seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
+
+	spdk_bs_sequence_finish(seq, 0);
+	_spdk_bs_free(ctx->bs);
+	free(ctx);
+}
+
+static void _spdk_bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
+
+static void
+_spdk_bs_dump_print_md_page(struct spdk_bs_dump_ctx *ctx)
+{
+	uint32_t page_idx = ctx->cur_page;
+	struct spdk_blob_md_page *page = ctx->page;
+	struct spdk_blob_md_descriptor *desc;
+	size_t cur_desc = 0;
+	uint32_t crc;
+
+	fprintf(ctx->fp, "=========\n");
+	fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
+	fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
+
+	crc = _spdk_blob_md_page_calc_crc(page);
+	fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
+
+	desc = (struct spdk_blob_md_descriptor *)page->descriptors;
+	while (cur_desc < sizeof(page->descriptors)) {
+		if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
+			if (desc->length == 0) {
+				/* If padding and length are 0, this terminates the page */
+				break;
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT) {
+			struct spdk_blob_md_descriptor_extent	*desc_extent;
+			unsigned int				i;
+
+			desc_extent = (struct spdk_blob_md_descriptor_extent *)desc;
+
+			for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) {
+				if (desc_extent->extents[i].cluster_idx != 0) {
+					fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
+						desc_extent->extents[i].cluster_idx);
+				} else {
+					fprintf(ctx->fp, "Unallocated Extent - ");
+				}
+				fprintf(ctx->fp, " Length: %" PRIu32, desc_extent->extents[i].length);
+				fprintf(ctx->fp, "\n");
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
+			struct spdk_blob_md_descriptor_xattr *desc_xattr;
+			uint32_t i;
+
+			desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
+
+			if (desc_xattr->length !=
+			    sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
+			    desc_xattr->name_length + desc_xattr->value_length) {
+			}
+
+			memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
+			ctx->xattr_name[desc_xattr->name_length] = '\0';
+			fprintf(ctx->fp, "XATTR: name = \"%s\"\n", ctx->xattr_name);
+			fprintf(ctx->fp, "       value = \"");
+			ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
+					    (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
+					    desc_xattr->value_length);
+			fprintf(ctx->fp, "\"\n");
+			for (i = 0; i < desc_xattr->value_length; i++) {
+				if (i % 16 == 0) {
+					fprintf(ctx->fp, "               ");
+				}
+				fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
+				if ((i + 1) % 16 == 0) {
+					fprintf(ctx->fp, "\n");
+				}
+			}
+			if (i % 16 != 0) {
+				fprintf(ctx->fp, "\n");
+			}
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
+			/* TODO */
+		} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
+			/* TODO */
+		} else {
+			/* Error */
+		}
+		/* Advance to the next descriptor */
+		cur_desc += sizeof(*desc) + desc->length;
+		if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
+			break;
+		}
+		desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
+	}
+}
+
+static void
+_spdk_bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_dump_ctx *ctx = cb_arg;
+
+	if (bserrno != 0) {
+		_spdk_bs_dump_finish(seq, ctx, bserrno);
+		return;
+	}
+
+	if (ctx->page->id != 0) {
+		_spdk_bs_dump_print_md_page(ctx);
+	}
+
+	ctx->cur_page++;
+
+	if (ctx->cur_page < ctx->super->md_len) {
+		_spdk_bs_dump_read_md_page(seq, cb_arg);
+	} else {
+		spdk_dma_free(ctx->page);
+		_spdk_bs_dump_finish(seq, ctx, 0);
+	}
+}
+
+static void
+_spdk_bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
+{
+	struct spdk_bs_dump_ctx *ctx = cb_arg;
+	uint64_t lba;
+
+	assert(ctx->cur_page < ctx->super->md_len);
+	lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
+	spdk_bs_sequence_read_dev(seq, ctx->page, lba,
+				  _spdk_bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
+				  _spdk_bs_dump_read_md_page_cpl, ctx);
+}
+
+static void
+_spdk_bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_dump_ctx *ctx = cb_arg;
+
+	fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
+	if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
+		   sizeof(ctx->super->signature)) != 0) {
+		fprintf(ctx->fp, "(Mismatch)\n");
+		_spdk_bs_dump_finish(seq, ctx, bserrno);
+		return;
+	} else {
+		fprintf(ctx->fp, "(OK)\n");
+	}
+	fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
+	fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
+		(ctx->super->crc == _spdk_blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
+	fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
+	fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
+	fprintf(ctx->fp, "Super Blob ID: ");
+	if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
+		fprintf(ctx->fp, "(None)\n");
+	} else {
+		fprintf(ctx->fp, "%" PRIu64 "\n", ctx->super->super_blob);
+	}
+	fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
+	fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
+	fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
+	fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
+	fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
+	fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
+	fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
+	fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
+	fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
+
+	ctx->cur_page = 0;
+	ctx->page = spdk_dma_zmalloc(SPDK_BS_PAGE_SIZE,
+				     SPDK_BS_PAGE_SIZE,
+				     NULL);
+	if (!ctx->page) {
+		_spdk_bs_dump_finish(seq, ctx, -ENOMEM);
+		return;
+	}
+	_spdk_bs_dump_read_md_page(seq, cb_arg);
+}
+
+void
+spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
+	     spdk_bs_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob_store	*bs;
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	struct spdk_bs_dump_ctx *ctx;
+	struct spdk_bs_opts	opts = {};
+	int err;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Dumping blobstore from dev %p\n", dev);
+
+	spdk_bs_opts_init(&opts);
+
+	err = _spdk_bs_alloc(dev, &opts, &bs);
+	if (err) {
+		dev->destroy(dev);
+		cb_fn(cb_arg, err);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+	ctx->fp = fp;
+	ctx->print_xattr_fn = print_xattr_fn;
+
+	/* Allocate memory for the super block */
+	ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+	if (!ctx->super) {
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	cpl.u.bs_basic.cb_fn = cb_fn;
+	cpl.u.bs_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	/* Read the super block */
+	spdk_bs_sequence_read_dev(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0),
+				  _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)),
+				  _spdk_bs_dump_super_cpl, ctx);
+}
+
+/* END spdk_bs_dump */
+
+/* START spdk_bs_init */
+
+struct spdk_bs_init_ctx {
+	struct spdk_blob_store		*bs;
+	struct spdk_bs_super_block	*super;
+};
+
+static void
+_spdk_bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_init_ctx *ctx = cb_arg;
+
+	spdk_dma_free(ctx->super);
+	free(ctx);
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static void
+_spdk_bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_init_ctx *ctx = cb_arg;
+
+	/* Write super block */
+	spdk_bs_sequence_write_dev(seq, ctx->super, _spdk_bs_page_to_lba(ctx->bs, 0),
+				   _spdk_bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
+				   _spdk_bs_init_persist_super_cpl, ctx);
+}
+
+void
+spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
+	     spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_init_ctx *ctx;
+	struct spdk_blob_store	*bs;
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	spdk_bs_batch_t		*batch;
+	uint64_t		num_md_lba;
+	uint64_t		num_md_pages;
+	uint64_t		num_md_clusters;
+	uint32_t		i;
+	struct spdk_bs_opts	opts = {};
+	int			rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Initializing blobstore on dev %p\n", dev);
+
+	if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
+		SPDK_ERRLOG("unsupported dev block length of %d\n",
+			    dev->blocklen);
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	if (o) {
+		opts = *o;
+	} else {
+		spdk_bs_opts_init(&opts);
+	}
+
+	if (_spdk_bs_opts_verify(&opts) != 0) {
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	rc = _spdk_bs_alloc(dev, &opts, &bs);
+	if (rc) {
+		dev->destroy(dev);
+		cb_fn(cb_arg, NULL, rc);
+		return;
+	}
+
+	if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
+		/* By default, allocate 1 page per cluster.
+		 * Technically, this over-allocates metadata
+		 * because more metadata will reduce the number
+		 * of usable clusters. This can be addressed with
+		 * more complex math in the future.
+		 */
+		bs->md_len = bs->total_clusters;
+	} else {
+		bs->md_len = opts.num_md_pages;
+	}
+	rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
+	if (rc < 0) {
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
+	if (rc < 0) {
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+
+	/* Allocate memory for the super block */
+	ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+	if (!ctx->super) {
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+	memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
+	       sizeof(ctx->super->signature));
+	ctx->super->version = SPDK_BS_VERSION;
+	ctx->super->length = sizeof(*ctx->super);
+	ctx->super->super_blob = bs->super_blob;
+	ctx->super->clean = 0;
+	ctx->super->cluster_size = bs->cluster_sz;
+	ctx->super->io_unit_size = bs->io_unit_size;
+	memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
+
+	/* Calculate how many pages the metadata consumes at the front
+	 * of the disk.
+	 */
+
+	/* The super block uses 1 page */
+	num_md_pages = 1;
+
+	/* The used_md_pages mask requires 1 bit per metadata page, rounded
+	 * up to the nearest page, plus a header.
+	 */
+	ctx->super->used_page_mask_start = num_md_pages;
+	ctx->super->used_page_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) +
+					 divide_round_up(bs->md_len, 8),
+					 SPDK_BS_PAGE_SIZE);
+	num_md_pages += ctx->super->used_page_mask_len;
+
+	/* The used_clusters mask requires 1 bit per cluster, rounded
+	 * up to the nearest page, plus a header.
+	 */
+	ctx->super->used_cluster_mask_start = num_md_pages;
+	ctx->super->used_cluster_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) +
+					    divide_round_up(bs->total_clusters, 8),
+					    SPDK_BS_PAGE_SIZE);
+	num_md_pages += ctx->super->used_cluster_mask_len;
+
+	/* The used_blobids mask requires 1 bit per metadata page, rounded
+	 * up to the nearest page, plus a header.
+	 */
+	ctx->super->used_blobid_mask_start = num_md_pages;
+	ctx->super->used_blobid_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) +
+					   divide_round_up(bs->md_len, 8),
+					   SPDK_BS_PAGE_SIZE);
+	num_md_pages += ctx->super->used_blobid_mask_len;
+
+	/* The metadata region size was chosen above */
+	ctx->super->md_start = bs->md_start = num_md_pages;
+	ctx->super->md_len = bs->md_len;
+	num_md_pages += bs->md_len;
+
+	num_md_lba = _spdk_bs_page_to_lba(bs, num_md_pages);
+
+	ctx->super->size = dev->blockcnt * dev->blocklen;
+
+	ctx->super->crc = _spdk_blob_md_page_calc_crc(ctx->super);
+
+	num_md_clusters = divide_round_up(num_md_pages, bs->pages_per_cluster);
+	if (num_md_clusters > bs->total_clusters) {
+		SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
+			    "please decrease number of pages reserved for metadata "
+			    "or increase cluster size.\n");
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+	/* Claim all of the clusters used by the metadata */
+	for (i = 0; i < num_md_clusters; i++) {
+		_spdk_bs_claim_cluster(bs, i);
+	}
+
+	bs->total_data_clusters = bs->num_free_clusters;
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
+	cpl.u.bs_handle.cb_fn = cb_fn;
+	cpl.u.bs_handle.cb_arg = cb_arg;
+	cpl.u.bs_handle.bs = bs;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		_spdk_bs_free(bs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	batch = spdk_bs_sequence_to_batch(seq, _spdk_bs_init_trim_cpl, ctx);
+
+	/* Clear metadata space */
+	spdk_bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
+	/* Trim data clusters */
+	spdk_bs_batch_unmap_dev(batch, num_md_lba, ctx->bs->dev->blockcnt - num_md_lba);
+
+	spdk_bs_batch_close(batch);
+}
+
+/* END spdk_bs_init */
+
+/* START spdk_bs_destroy */
+
+static void
+_spdk_bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_init_ctx *ctx = cb_arg;
+	struct spdk_blob_store *bs = ctx->bs;
+
+	/*
+	 * We need to defer calling spdk_bs_call_cpl() until after
+	 * dev destruction, so tuck these away for later use.
+	 */
+	bs->unload_err = bserrno;
+	memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
+	seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
+
+	spdk_bs_sequence_finish(seq, bserrno);
+
+	_spdk_bs_free(bs);
+	free(ctx);
+}
+
+void
+spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
+		void *cb_arg)
+{
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	struct spdk_bs_init_ctx *ctx;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Destroying blobstore\n");
+
+	if (!TAILQ_EMPTY(&bs->blobs)) {
+		SPDK_ERRLOG("Blobstore still has open blobs\n");
+		cb_fn(cb_arg, -EBUSY);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	cpl.u.bs_basic.cb_fn = cb_fn;
+	cpl.u.bs_basic.cb_arg = cb_arg;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	/* Write zeroes to the super block */
+	spdk_bs_sequence_write_zeroes_dev(seq,
+					  _spdk_bs_page_to_lba(bs, 0),
+					  _spdk_bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
+					  _spdk_bs_destroy_trim_cpl, ctx);
+}
+
+/* END spdk_bs_destroy */
+
+/* START spdk_bs_unload */
+
+static void
+_spdk_bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->super);
+
+	/*
+	 * We need to defer calling spdk_bs_call_cpl() until after
+	 * dev destruction, so tuck these away for later use.
+	 */
+	ctx->bs->unload_err = bserrno;
+	memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
+	seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
+
+	spdk_bs_sequence_finish(seq, bserrno);
+
+	_spdk_bs_free(ctx->bs);
+	free(ctx);
+}
+
+static void
+_spdk_bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->mask);
+	ctx->super->clean = 1;
+
+	_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_unload_write_super_cpl, ctx);
+}
+
+static void
+_spdk_bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->mask);
+	ctx->mask = NULL;
+
+	_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_unload_write_used_clusters_cpl);
+}
+
+static void
+_spdk_bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_load_ctx	*ctx = cb_arg;
+
+	spdk_dma_free(ctx->mask);
+	ctx->mask = NULL;
+
+	_spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_unload_write_used_blobids_cpl);
+}
+
+static void
+_spdk_bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	_spdk_bs_write_used_md(seq, cb_arg, _spdk_bs_unload_write_used_pages_cpl);
+}
+
+void
+spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	struct spdk_bs_load_ctx *ctx;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Syncing blobstore\n");
+
+	if (!TAILQ_EMPTY(&bs->blobs)) {
+		SPDK_ERRLOG("Blobstore still has open blobs\n");
+		cb_fn(cb_arg, -EBUSY);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+	ctx->is_load = false;
+
+	ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+	if (!ctx->super) {
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	cpl.u.bs_basic.cb_fn = cb_fn;
+	cpl.u.bs_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	/* Read super block */
+	spdk_bs_sequence_read_dev(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0),
+				  _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)),
+				  _spdk_bs_unload_read_super_cpl, ctx);
+}
+
+/* END spdk_bs_unload */
+
+/* START spdk_bs_set_super */
+
+struct spdk_bs_set_super_ctx {
+	struct spdk_blob_store		*bs;
+	struct spdk_bs_super_block	*super;
+};
+
+static void
+_spdk_bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_set_super_ctx	*ctx = cb_arg;
+
+	if (bserrno != 0) {
+		SPDK_ERRLOG("Unable to write to super block of blobstore\n");
+	}
+
+	spdk_dma_free(ctx->super);
+
+	spdk_bs_sequence_finish(seq, bserrno);
+
+	free(ctx);
+}
+
+static void
+_spdk_bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_set_super_ctx	*ctx = cb_arg;
+
+	if (bserrno != 0) {
+		SPDK_ERRLOG("Unable to read super block of blobstore\n");
+		spdk_dma_free(ctx->super);
+		spdk_bs_sequence_finish(seq, bserrno);
+		free(ctx);
+		return;
+	}
+
+	_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_set_super_write_cpl, ctx);
+}
+
+void
+spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
+		  spdk_bs_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_cpl		cpl;
+	spdk_bs_sequence_t		*seq;
+	struct spdk_bs_set_super_ctx	*ctx;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Setting super blob id on blobstore\n");
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->bs = bs;
+
+	ctx->super = spdk_dma_zmalloc(sizeof(*ctx->super), 0x1000, NULL);
+	if (!ctx->super) {
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
+	cpl.u.bs_basic.cb_fn = cb_fn;
+	cpl.u.bs_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		spdk_dma_free(ctx->super);
+		free(ctx);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	bs->super_blob = blobid;
+
+	/* Read super block */
+	spdk_bs_sequence_read_dev(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0),
+				  _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)),
+				  _spdk_bs_set_super_read_cpl, ctx);
+}
+
+/* END spdk_bs_set_super */
+
+void
+spdk_bs_get_super(struct spdk_blob_store *bs,
+		  spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	if (bs->super_blob == SPDK_BLOBID_INVALID) {
+		cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
+	} else {
+		cb_fn(cb_arg, bs->super_blob, 0);
+	}
+}
+
+uint64_t
+spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
+{
+	return bs->cluster_sz;
+}
+
+uint64_t
+spdk_bs_get_page_size(struct spdk_blob_store *bs)
+{
+	return SPDK_BS_PAGE_SIZE;
+}
+
+uint64_t
+spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
+{
+	return bs->io_unit_size;
+}
+
+uint64_t
+spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
+{
+	return bs->num_free_clusters;
+}
+
+uint64_t
+spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
+{
+	return bs->total_data_clusters;
+}
+
+static int
+spdk_bs_register_md_thread(struct spdk_blob_store *bs)
+{
+	bs->md_channel = spdk_get_io_channel(bs);
+	if (!bs->md_channel) {
+		SPDK_ERRLOG("Failed to get IO channel.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_bs_unregister_md_thread(struct spdk_blob_store *bs)
+{
+	spdk_put_io_channel(bs->md_channel);
+
+	return 0;
+}
+
+spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	return blob->id;
+}
+
+uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	return _spdk_bs_cluster_to_page(blob->bs, blob->active.num_clusters);
+}
+
+uint64_t spdk_blob_get_num_io_units(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	return spdk_blob_get_num_pages(blob) * _spdk_bs_io_unit_per_page(blob->bs);
+}
+
+uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	return blob->active.num_clusters;
+}
+
+/* START spdk_bs_create_blob */
+
+static void
+_spdk_bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob *blob = cb_arg;
+
+	_spdk_blob_free(blob);
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static int
+_spdk_blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
+		      bool internal)
+{
+	uint64_t i;
+	size_t value_len = 0;
+	int rc;
+	const void *value = NULL;
+	if (xattrs->count > 0 && xattrs->get_value == NULL) {
+		return -EINVAL;
+	}
+	for (i = 0; i < xattrs->count; i++) {
+		xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
+		if (value == NULL || value_len == 0) {
+			return -EINVAL;
+		}
+		rc = _spdk_blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
+		if (rc < 0) {
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static void
+_spdk_blob_set_thin_provision(struct spdk_blob *blob)
+{
+	_spdk_blob_verify_md_op(blob);
+	blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+}
+
+static void
+_spdk_bs_create_blob(struct spdk_blob_store *bs,
+		     const struct spdk_blob_opts *opts,
+		     const struct spdk_blob_xattr_opts *internal_xattrs,
+		     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob	*blob;
+	uint32_t		page_idx;
+	struct spdk_bs_cpl	cpl;
+	struct spdk_blob_opts	opts_default;
+	struct spdk_blob_xattr_opts internal_xattrs_default;
+	spdk_bs_sequence_t	*seq;
+	spdk_blob_id		id;
+	int rc;
+
+	assert(spdk_get_thread() == bs->md_thread);
+
+	page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
+	if (page_idx == UINT32_MAX) {
+		cb_fn(cb_arg, 0, -ENOMEM);
+		return;
+	}
+	spdk_bit_array_set(bs->used_blobids, page_idx);
+	spdk_bit_array_set(bs->used_md_pages, page_idx);
+
+	id = _spdk_bs_page_to_blobid(page_idx);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Creating blob with id %lu at page %u\n", id, page_idx);
+
+	blob = _spdk_blob_alloc(bs, id);
+	if (!blob) {
+		cb_fn(cb_arg, 0, -ENOMEM);
+		return;
+	}
+
+	if (!opts) {
+		spdk_blob_opts_init(&opts_default);
+		opts = &opts_default;
+	}
+	if (!internal_xattrs) {
+		_spdk_blob_xattrs_init(&internal_xattrs_default);
+		internal_xattrs = &internal_xattrs_default;
+	}
+
+	rc = _spdk_blob_set_xattrs(blob, &opts->xattrs, false);
+	if (rc < 0) {
+		_spdk_blob_free(blob);
+		cb_fn(cb_arg, 0, rc);
+		return;
+	}
+
+	rc = _spdk_blob_set_xattrs(blob, internal_xattrs, true);
+	if (rc < 0) {
+		_spdk_blob_free(blob);
+		cb_fn(cb_arg, 0, rc);
+		return;
+	}
+
+	if (opts->thin_provision) {
+		_spdk_blob_set_thin_provision(blob);
+	}
+
+	rc = _spdk_blob_resize(blob, opts->num_clusters);
+	if (rc < 0) {
+		_spdk_blob_free(blob);
+		cb_fn(cb_arg, 0, rc);
+		return;
+	}
+	cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
+	cpl.u.blobid.cb_fn = cb_fn;
+	cpl.u.blobid.cb_arg = cb_arg;
+	cpl.u.blobid.blobid = blob->id;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		_spdk_blob_free(blob);
+		cb_fn(cb_arg, 0, -ENOMEM);
+		return;
+	}
+
+	_spdk_blob_persist(seq, blob, _spdk_bs_create_blob_cpl, blob);
+}
+
+void spdk_bs_create_blob(struct spdk_blob_store *bs,
+			 spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	_spdk_bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
+}
+
+void spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
+			     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	_spdk_bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
+}
+
+/* END spdk_bs_create_blob */
+
+/* START blob_cleanup */
+
+struct spdk_clone_snapshot_ctx {
+	struct spdk_bs_cpl      cpl;
+	int bserrno;
+	bool frozen;
+
+	struct spdk_io_channel *channel;
+
+	/* Current cluster for inflate operation */
+	uint64_t cluster;
+
+	/* For inflation force allocation of all unallocated clusters and remove
+	 * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
+	bool allocate_all;
+
+	struct {
+		spdk_blob_id id;
+		struct spdk_blob *blob;
+	} original;
+	struct {
+		spdk_blob_id id;
+		struct spdk_blob *blob;
+	} new;
+
+	/* xattrs specified for snapshot/clones only. They have no impact on
+	 * the original blobs xattrs. */
+	const struct spdk_blob_xattr_opts *xattrs;
+};
+
+static void
+_spdk_bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = cb_arg;
+	struct spdk_bs_cpl *cpl = &ctx->cpl;
+
+	if (bserrno != 0) {
+		if (ctx->bserrno != 0) {
+			SPDK_ERRLOG("Cleanup error %d\n", bserrno);
+		} else {
+			ctx->bserrno = bserrno;
+		}
+	}
+
+	switch (cpl->type) {
+	case SPDK_BS_CPL_TYPE_BLOBID:
+		cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_BLOB_BASIC:
+		cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
+		break;
+	default:
+		SPDK_UNREACHABLE();
+		break;
+	}
+
+	free(ctx);
+}
+
+static void
+_spdk_bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+
+	if (bserrno != 0) {
+		if (ctx->bserrno != 0) {
+			SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
+		} else {
+			ctx->bserrno = bserrno;
+		}
+	}
+
+	ctx->original.id = origblob->id;
+	spdk_blob_close(origblob, _spdk_bs_clone_snapshot_cleanup_finish, ctx);
+}
+
+static void
+_spdk_bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+
+	if (bserrno != 0) {
+		if (ctx->bserrno != 0) {
+			SPDK_ERRLOG("Cleanup error %d\n", bserrno);
+		} else {
+			ctx->bserrno = bserrno;
+		}
+	}
+
+	if (ctx->frozen) {
+		/* Unfreeze any outstanding I/O */
+		_spdk_blob_unfreeze_io(origblob, _spdk_bs_snapshot_unfreeze_cpl, ctx);
+	} else {
+		_spdk_bs_snapshot_unfreeze_cpl(ctx, 0);
+	}
+
+}
+
+static void
+_spdk_bs_clone_snapshot_newblob_cleanup(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *newblob = ctx->new.blob;
+
+	if (bserrno != 0) {
+		if (ctx->bserrno != 0) {
+			SPDK_ERRLOG("Cleanup error %d\n", bserrno);
+		} else {
+			ctx->bserrno = bserrno;
+		}
+	}
+
+	ctx->new.id = newblob->id;
+	spdk_blob_close(newblob, _spdk_bs_clone_snapshot_origblob_cleanup, ctx);
+}
+
+/* END blob_cleanup */
+
+/* START spdk_bs_create_snapshot */
+
+static void
+_spdk_bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *newblob = ctx->new.blob;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	/* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
+	bserrno = _spdk_blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	_spdk_bs_blob_list_add(ctx->original.blob);
+
+	spdk_blob_set_read_only(newblob);
+
+	/* sync snapshot metadata */
+	spdk_blob_sync_md(newblob, _spdk_bs_clone_snapshot_origblob_cleanup, cb_arg);
+}
+
+static void
+_spdk_bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+	struct spdk_blob *newblob = ctx->new.blob;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	/* Set internal xattr for snapshot id */
+	bserrno = _spdk_blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	_spdk_bs_blob_list_remove(origblob);
+	origblob->parent_id = newblob->id;
+
+	/* Create new back_bs_dev for snapshot */
+	origblob->back_bs_dev = spdk_bs_create_blob_bs_dev(newblob);
+	if (origblob->back_bs_dev == NULL) {
+		_spdk_bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
+		return;
+	}
+
+	/* set clone blob as thin provisioned */
+	_spdk_blob_set_thin_provision(origblob);
+
+	_spdk_bs_blob_list_add(newblob);
+
+	/* Zero out origblob cluster map */
+	memset(origblob->active.clusters, 0,
+	       origblob->active.num_clusters * sizeof(origblob->active.clusters));
+
+	/* sync clone metadata */
+	spdk_blob_sync_md(origblob, _spdk_bs_snapshot_origblob_sync_cpl, ctx);
+}
+
+static void
+_spdk_bs_snapshot_freeze_cpl(void *cb_arg, int rc)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+	struct spdk_blob *newblob = ctx->new.blob;
+	int bserrno;
+
+	if (rc != 0) {
+		_spdk_bs_clone_snapshot_newblob_cleanup(ctx, rc);
+		return;
+	}
+
+	ctx->frozen = true;
+
+	/* set new back_bs_dev for snapshot */
+	newblob->back_bs_dev = origblob->back_bs_dev;
+	/* Set invalid flags from origblob */
+	newblob->invalid_flags = origblob->invalid_flags;
+
+	/* inherit parent from original blob if set */
+	newblob->parent_id = origblob->parent_id;
+	if (origblob->parent_id != SPDK_BLOBID_INVALID) {
+		/* Set internal xattr for snapshot id */
+		bserrno = _spdk_blob_set_xattr(newblob, BLOB_SNAPSHOT,
+					       &origblob->parent_id, sizeof(spdk_blob_id), true);
+		if (bserrno != 0) {
+			_spdk_bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
+			return;
+		}
+	}
+
+	/* Copy cluster map to snapshot */
+	memcpy(newblob->active.clusters, origblob->active.clusters,
+	       origblob->active.num_clusters * sizeof(origblob->active.clusters));
+
+	/* sync snapshot metadata */
+	spdk_blob_sync_md(newblob, _spdk_bs_snapshot_newblob_sync_cpl, ctx);
+}
+
+static void
+_spdk_bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+	struct spdk_blob *newblob = _blob;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	ctx->new.blob = newblob;
+
+	_spdk_blob_freeze_io(origblob, _spdk_bs_snapshot_freeze_cpl, ctx);
+}
+
+static void
+_spdk_bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *origblob = ctx->original.blob;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	ctx->new.id = blobid;
+	ctx->cpl.u.blobid.blobid = blobid;
+
+	spdk_bs_open_blob(origblob->bs, ctx->new.id, _spdk_bs_snapshot_newblob_open_cpl, ctx);
+}
+
+
+static void
+_spdk_bs_xattr_snapshot(void *arg, const char *name,
+			const void **value, size_t *value_len)
+{
+	assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
+
+	struct spdk_blob *blob = (struct spdk_blob *)arg;
+	*value = &blob->id;
+	*value_len = sizeof(blob->id);
+}
+
+static void
+_spdk_bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob_opts opts;
+	struct spdk_blob_xattr_opts internal_xattrs;
+	char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_cleanup_finish(ctx, bserrno);
+		return;
+	}
+
+	ctx->original.blob = _blob;
+
+	if (_blob->data_ro || _blob->md_ro) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Cannot create snapshot from read only blob with id %lu\n",
+			      _blob->id);
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
+		return;
+	}
+
+	spdk_blob_opts_init(&opts);
+	_spdk_blob_xattrs_init(&internal_xattrs);
+
+	/* Change the size of new blob to the same as in original blob,
+	 * but do not allocate clusters */
+	opts.thin_provision = true;
+	opts.num_clusters = spdk_blob_get_num_clusters(_blob);
+
+	/* If there are any xattrs specified for snapshot, set them now */
+	if (ctx->xattrs) {
+		memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
+	}
+	/* Set internal xattr SNAPSHOT_IN_PROGRESS */
+	internal_xattrs.count = 1;
+	internal_xattrs.ctx = _blob;
+	internal_xattrs.names = xattrs_names;
+	internal_xattrs.get_value = _spdk_bs_xattr_snapshot;
+
+	_spdk_bs_create_blob(_blob->bs, &opts, &internal_xattrs,
+			     _spdk_bs_snapshot_newblob_create_cpl, ctx);
+}
+
+void spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
+			     const struct spdk_blob_xattr_opts *snapshot_xattrs,
+			     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
+
+	if (!ctx) {
+		cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
+		return;
+	}
+	ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
+	ctx->cpl.u.blobid.cb_fn = cb_fn;
+	ctx->cpl.u.blobid.cb_arg = cb_arg;
+	ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
+	ctx->bserrno = 0;
+	ctx->frozen = false;
+	ctx->original.id = blobid;
+	ctx->xattrs = snapshot_xattrs;
+
+	spdk_bs_open_blob(bs, ctx->original.id, _spdk_bs_snapshot_origblob_open_cpl, ctx);
+}
+/* END spdk_bs_create_snapshot */
+
+/* START spdk_bs_create_clone */
+
+static void
+_spdk_bs_xattr_clone(void *arg, const char *name,
+		     const void **value, size_t *value_len)
+{
+	assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
+
+	struct spdk_blob *blob = (struct spdk_blob *)arg;
+	*value = &blob->id;
+	*value_len = sizeof(blob->id);
+}
+
+static void
+_spdk_bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *clone = _blob;
+
+	ctx->new.blob = clone;
+	_spdk_bs_blob_list_add(clone);
+
+	spdk_blob_close(clone, _spdk_bs_clone_snapshot_origblob_cleanup, ctx);
+}
+
+static void
+_spdk_bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+
+	ctx->cpl.u.blobid.blobid = blobid;
+	spdk_bs_open_blob(ctx->original.blob->bs, blobid, _spdk_bs_clone_newblob_open_cpl, ctx);
+}
+
+static void
+_spdk_bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx	*ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob_opts		opts;
+	struct spdk_blob_xattr_opts internal_xattrs;
+	char *xattr_names[] = { BLOB_SNAPSHOT };
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_cleanup_finish(ctx, bserrno);
+		return;
+	}
+
+	ctx->original.blob = _blob;
+
+	if (!_blob->data_ro || !_blob->md_ro) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Clone not from read-only blob\n");
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
+		return;
+	}
+
+	spdk_blob_opts_init(&opts);
+	_spdk_blob_xattrs_init(&internal_xattrs);
+
+	opts.thin_provision = true;
+	opts.num_clusters = spdk_blob_get_num_clusters(_blob);
+	if (ctx->xattrs) {
+		memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
+	}
+
+	/* Set internal xattr BLOB_SNAPSHOT */
+	internal_xattrs.count = 1;
+	internal_xattrs.ctx = _blob;
+	internal_xattrs.names = xattr_names;
+	internal_xattrs.get_value = _spdk_bs_xattr_clone;
+
+	_spdk_bs_create_blob(_blob->bs, &opts, &internal_xattrs,
+			     _spdk_bs_clone_newblob_create_cpl, ctx);
+}
+
+void spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
+			  const struct spdk_blob_xattr_opts *clone_xattrs,
+			  spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
+{
+	struct spdk_clone_snapshot_ctx	*ctx = calloc(1, sizeof(*ctx));
+
+	if (!ctx) {
+		cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
+		return;
+	}
+
+	ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
+	ctx->cpl.u.blobid.cb_fn = cb_fn;
+	ctx->cpl.u.blobid.cb_arg = cb_arg;
+	ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
+	ctx->bserrno = 0;
+	ctx->xattrs = clone_xattrs;
+	ctx->original.id = blobid;
+
+	spdk_bs_open_blob(bs, ctx->original.id, _spdk_bs_clone_origblob_open_cpl, ctx);
+}
+
+/* END spdk_bs_create_clone */
+
+/* START spdk_bs_inflate_blob */
+
+static void
+_spdk_bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *_blob = ctx->original.blob;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	assert(_parent != NULL);
+
+	_spdk_bs_blob_list_remove(_blob);
+	_blob->parent_id = _parent->id;
+	_spdk_blob_set_xattr(_blob, BLOB_SNAPSHOT, &_blob->parent_id,
+			     sizeof(spdk_blob_id), true);
+
+	_blob->back_bs_dev->destroy(_blob->back_bs_dev);
+	_blob->back_bs_dev = spdk_bs_create_blob_bs_dev(_parent);
+	_spdk_bs_blob_list_add(_blob);
+
+	spdk_blob_sync_md(_blob, _spdk_bs_clone_snapshot_origblob_cleanup, ctx);
+}
+
+static void
+_spdk_bs_inflate_blob_done(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *_blob = ctx->original.blob;
+	struct spdk_blob *_parent;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	if (ctx->allocate_all) {
+		/* remove thin provisioning */
+		_spdk_bs_blob_list_remove(_blob);
+		_spdk_blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
+		_blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
+		_blob->back_bs_dev->destroy(_blob->back_bs_dev);
+		_blob->back_bs_dev = NULL;
+		_blob->parent_id = SPDK_BLOBID_INVALID;
+	} else {
+		_parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
+		if (_parent->parent_id != SPDK_BLOBID_INVALID) {
+			/* We must change the parent of the inflated blob */
+			spdk_bs_open_blob(_blob->bs, _parent->parent_id,
+					  _spdk_bs_inflate_blob_set_parent_cpl, ctx);
+			return;
+		}
+
+		_spdk_bs_blob_list_remove(_blob);
+		_spdk_blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
+		_blob->parent_id = SPDK_BLOBID_INVALID;
+		_blob->back_bs_dev->destroy(_blob->back_bs_dev);
+		_blob->back_bs_dev = spdk_bs_create_zeroes_dev();
+	}
+
+	_blob->state = SPDK_BLOB_STATE_DIRTY;
+	spdk_blob_sync_md(_blob, _spdk_bs_clone_snapshot_origblob_cleanup, ctx);
+}
+
+/* Check if cluster needs allocation */
+static inline bool
+_spdk_bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
+{
+	struct spdk_blob_bs_dev *b;
+
+	assert(blob != NULL);
+
+	if (blob->active.clusters[cluster] != 0) {
+		/* Cluster is already allocated */
+		return false;
+	}
+
+	if (blob->parent_id == SPDK_BLOBID_INVALID) {
+		/* Blob have no parent blob */
+		return allocate_all;
+	}
+
+	b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
+	return (allocate_all || b->blob->active.clusters[cluster] != 0);
+}
+
+static void
+_spdk_bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	struct spdk_blob *_blob = ctx->original.blob;
+	uint64_t offset;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
+		return;
+	}
+
+	for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
+		if (_spdk_bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
+			break;
+		}
+	}
+
+	if (ctx->cluster < _blob->active.num_clusters) {
+		offset = _spdk_bs_cluster_to_lba(_blob->bs, ctx->cluster);
+
+		/* We may safely increment a cluster before write */
+		ctx->cluster++;
+
+		/* Use zero length write to touch a cluster */
+		spdk_blob_io_write(_blob, ctx->channel, NULL, offset, 0,
+				   _spdk_bs_inflate_blob_touch_next, ctx);
+	} else {
+		_spdk_bs_inflate_blob_done(cb_arg, bserrno);
+	}
+}
+
+static void
+_spdk_bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
+	uint64_t lfc; /* lowest free cluster */
+	uint64_t i;
+
+	if (bserrno != 0) {
+		_spdk_bs_clone_snapshot_cleanup_finish(ctx, bserrno);
+		return;
+	}
+	ctx->original.blob = _blob;
+
+	if (!ctx->allocate_all && _blob->parent_id == SPDK_BLOBID_INVALID) {
+		/* This blob have no parent, so we cannot decouple it. */
+		SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
+		return;
+	}
+
+	if (spdk_blob_is_thin_provisioned(_blob) == false) {
+		/* This is not thin provisioned blob. No need to inflate. */
+		_spdk_bs_clone_snapshot_origblob_cleanup(ctx, 0);
+		return;
+	}
+
+	/* Do two passes - one to verify that we can obtain enough clusters
+	 * and another to actually claim them.
+	 */
+	lfc = 0;
+	for (i = 0; i < _blob->active.num_clusters; i++) {
+		if (_spdk_bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
+			lfc = spdk_bit_array_find_first_clear(_blob->bs->used_clusters, lfc);
+			if (lfc == UINT32_MAX) {
+				/* No more free clusters. Cannot satisfy the request */
+				_spdk_bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
+				return;
+			}
+			lfc++;
+		}
+	}
+
+	ctx->cluster = 0;
+	_spdk_bs_inflate_blob_touch_next(ctx, 0);
+}
+
+static void
+_spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
+		      spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
+
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	ctx->cpl.u.bs_basic.cb_fn = cb_fn;
+	ctx->cpl.u.bs_basic.cb_arg = cb_arg;
+	ctx->bserrno = 0;
+	ctx->original.id = blobid;
+	ctx->channel = channel;
+	ctx->allocate_all = allocate_all;
+
+	spdk_bs_open_blob(bs, ctx->original.id, _spdk_bs_inflate_blob_open_cpl, ctx);
+}
+
+void
+spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
+		     spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
+}
+
+void
+spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
+			     spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
+}
+/* END spdk_bs_inflate_blob */
+
+/* START spdk_blob_resize */
+struct spdk_bs_resize_ctx {
+	spdk_blob_op_complete cb_fn;
+	void *cb_arg;
+	struct spdk_blob *blob;
+	uint64_t sz;
+	int rc;
+};
+
+static void
+_spdk_bs_resize_unfreeze_cpl(void *cb_arg, int rc)
+{
+	struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
+
+	if (rc != 0) {
+		SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
+	}
+
+	if (ctx->rc != 0) {
+		SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
+		rc = ctx->rc;
+	}
+
+	ctx->blob->resize_in_progress = false;
+
+	ctx->cb_fn(ctx->cb_arg, rc);
+	free(ctx);
+}
+
+static void
+_spdk_bs_resize_freeze_cpl(void *cb_arg, int rc)
+{
+	struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
+
+	if (rc != 0) {
+		ctx->blob->resize_in_progress = false;
+		ctx->cb_fn(ctx->cb_arg, rc);
+		free(ctx);
+		return;
+	}
+
+	ctx->rc = _spdk_blob_resize(ctx->blob, ctx->sz);
+
+	_spdk_blob_unfreeze_io(ctx->blob, _spdk_bs_resize_unfreeze_cpl, ctx);
+}
+
+void
+spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_resize_ctx *ctx;
+
+	_spdk_blob_verify_md_op(blob);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Resizing blob %lu to %lu clusters\n", blob->id, sz);
+
+	if (blob->md_ro) {
+		cb_fn(cb_arg, -EPERM);
+		return;
+	}
+
+	if (sz == blob->active.num_clusters) {
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	if (blob->resize_in_progress) {
+		cb_fn(cb_arg, -EBUSY);
+		return;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	blob->resize_in_progress = true;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+	ctx->blob = blob;
+	ctx->sz = sz;
+	_spdk_blob_freeze_io(blob, _spdk_bs_resize_freeze_cpl, ctx);
+}
+
+/* END spdk_blob_resize */
+
+
+/* START spdk_bs_delete_blob */
+
+static void
+_spdk_bs_delete_close_cpl(void *cb_arg, int bserrno)
+{
+	spdk_bs_sequence_t *seq = cb_arg;
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static void
+_spdk_bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob *blob = cb_arg;
+
+	if (bserrno != 0) {
+		/*
+		 * We already removed this blob from the blobstore tailq, so
+		 *  we need to free it here since this is the last reference
+		 *  to it.
+		 */
+		_spdk_blob_free(blob);
+		_spdk_bs_delete_close_cpl(seq, bserrno);
+		return;
+	}
+
+	/*
+	 * This will immediately decrement the ref_count and call
+	 *  the completion routine since the metadata state is clean.
+	 *  By calling spdk_blob_close, we reduce the number of call
+	 *  points into code that touches the blob->open_ref count
+	 *  and the blobstore's blob list.
+	 */
+	spdk_blob_close(blob, _spdk_bs_delete_close_cpl, seq);
+}
+
+static void
+_spdk_bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
+{
+	spdk_bs_sequence_t *seq = cb_arg;
+	uint32_t page_num;
+
+	if (bserrno != 0) {
+		spdk_bs_sequence_finish(seq, bserrno);
+		return;
+	}
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (blob->open_ref > 1) {
+		/*
+		 * Someone has this blob open (besides this delete context).
+		 *  Decrement the ref count directly and return -EBUSY.
+		 */
+		blob->open_ref--;
+		spdk_bs_sequence_finish(seq, -EBUSY);
+		return;
+	}
+
+	bserrno = _spdk_bs_blob_list_remove(blob);
+	if (bserrno != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Remove blob #%" PRIu64 " from a list\n", blob->id);
+		spdk_bs_sequence_finish(seq, bserrno);
+		return;
+	}
+
+	/*
+	 * Remove the blob from the blob_store list now, to ensure it does not
+	 *  get returned after this point by _spdk_blob_lookup().
+	 */
+	TAILQ_REMOVE(&blob->bs->blobs, blob, link);
+	page_num = _spdk_bs_blobid_to_page(blob->id);
+	spdk_bit_array_clear(blob->bs->used_blobids, page_num);
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+	blob->active.num_pages = 0;
+	_spdk_blob_resize(blob, 0);
+
+	_spdk_blob_persist(seq, blob, _spdk_bs_delete_persist_cpl, blob);
+}
+
+void
+spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
+		    spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+	struct spdk_blob_list	*snapshot_entry = NULL;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Deleting blob %lu\n", blobid);
+
+	assert(spdk_get_thread() == bs->md_thread);
+
+	/* Check if this is a snapshot with clones */
+	TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
+		if (snapshot_entry->id == blobid) {
+			break;
+		}
+	}
+	if (snapshot_entry != NULL) {
+		/* If snapshot have clones, we cannot remove it */
+		if (!TAILQ_EMPTY(&snapshot_entry->clones)) {
+			SPDK_ERRLOG("Cannot remove snapshot with clones\n");
+			cb_fn(cb_arg, -EBUSY);
+			return;
+		}
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = cb_fn;
+	cpl.u.blob_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	spdk_bs_open_blob(bs, blobid, _spdk_bs_delete_open_cpl, seq);
+}
+
+/* END spdk_bs_delete_blob */
+
+/* START spdk_bs_open_blob */
+
+static void
+_spdk_bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob *blob = cb_arg;
+
+	/* If the blob have crc error, we just return NULL. */
+	if (blob == NULL) {
+		seq->cpl.u.blob_handle.blob = NULL;
+		spdk_bs_sequence_finish(seq, bserrno);
+		return;
+	}
+
+	blob->open_ref++;
+
+	TAILQ_INSERT_HEAD(&blob->bs->blobs, blob, link);
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
+		       spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob		*blob;
+	struct spdk_bs_cpl		cpl;
+	spdk_bs_sequence_t		*seq;
+	uint32_t			page_num;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Opening blob %lu\n", blobid);
+	assert(spdk_get_thread() == bs->md_thread);
+
+	page_num = _spdk_bs_blobid_to_page(blobid);
+	if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
+		/* Invalid blobid */
+		cb_fn(cb_arg, NULL, -ENOENT);
+		return;
+	}
+
+	blob = _spdk_blob_lookup(bs, blobid);
+	if (blob) {
+		blob->open_ref++;
+		cb_fn(cb_arg, blob, 0);
+		return;
+	}
+
+	blob = _spdk_blob_alloc(bs, blobid);
+	if (!blob) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
+	cpl.u.blob_handle.cb_fn = cb_fn;
+	cpl.u.blob_handle.cb_arg = cb_arg;
+	cpl.u.blob_handle.blob = blob;
+
+	seq = spdk_bs_sequence_start(bs->md_channel, &cpl);
+	if (!seq) {
+		_spdk_blob_free(blob);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	_spdk_blob_load(seq, blob, _spdk_bs_open_blob_cpl, blob);
+}
+/* END spdk_bs_open_blob */
+
+/* START spdk_blob_set_read_only */
+int spdk_blob_set_read_only(struct spdk_blob *blob)
+{
+	_spdk_blob_verify_md_op(blob);
+
+	blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
+
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+	return 0;
+}
+/* END spdk_blob_set_read_only */
+
+/* START spdk_blob_sync_md */
+
+static void
+_spdk_blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob *blob = cb_arg;
+
+	if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
+		blob->data_ro = true;
+		blob->md_ro = true;
+	}
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static void
+_spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = cb_fn;
+	cpl.u.blob_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(blob->bs->md_channel, &cpl);
+	if (!seq) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	_spdk_blob_persist(seq, blob, _spdk_blob_sync_md_cpl, blob);
+}
+
+void
+spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_verify_md_op(blob);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Syncing blob %lu\n", blob->id);
+
+	if (blob->md_ro) {
+		assert(blob->state == SPDK_BLOB_STATE_CLEAN);
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	_spdk_blob_sync_md(blob, cb_fn, cb_arg);
+}
+
+/* END spdk_blob_sync_md */
+
+struct spdk_blob_insert_cluster_ctx {
+	struct spdk_thread	*thread;
+	struct spdk_blob	*blob;
+	uint32_t		cluster_num;	/* cluster index in blob */
+	uint32_t		cluster;	/* cluster on disk */
+	int			rc;
+	spdk_blob_op_complete	cb_fn;
+	void			*cb_arg;
+};
+
+static void
+_spdk_blob_insert_cluster_msg_cpl(void *arg)
+{
+	struct spdk_blob_insert_cluster_ctx *ctx = arg;
+
+	ctx->cb_fn(ctx->cb_arg, ctx->rc);
+	free(ctx);
+}
+
+static void
+_spdk_blob_insert_cluster_msg_cb(void *arg, int bserrno)
+{
+	struct spdk_blob_insert_cluster_ctx *ctx = arg;
+
+	ctx->rc = bserrno;
+	spdk_thread_send_msg(ctx->thread, _spdk_blob_insert_cluster_msg_cpl, ctx);
+}
+
+static void
+_spdk_blob_insert_cluster_msg(void *arg)
+{
+	struct spdk_blob_insert_cluster_ctx *ctx = arg;
+
+	ctx->rc = _spdk_blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
+	if (ctx->rc != 0) {
+		spdk_thread_send_msg(ctx->thread, _spdk_blob_insert_cluster_msg_cpl, ctx);
+		return;
+	}
+
+	ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
+	_spdk_blob_sync_md(ctx->blob, _spdk_blob_insert_cluster_msg_cb, ctx);
+}
+
+static void
+_spdk_blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
+				       uint64_t cluster, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob_insert_cluster_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->thread = spdk_get_thread();
+	ctx->blob = blob;
+	ctx->cluster_num = cluster_num;
+	ctx->cluster = cluster;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	spdk_thread_send_msg(blob->bs->md_thread, _spdk_blob_insert_cluster_msg, ctx);
+}
+
+/* START spdk_blob_close */
+
+static void
+_spdk_blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
+{
+	struct spdk_blob *blob = cb_arg;
+
+	if (bserrno == 0) {
+		blob->open_ref--;
+		if (blob->open_ref == 0) {
+			/*
+			 * Blobs with active.num_pages == 0 are deleted blobs.
+			 *  these blobs are removed from the blob_store list
+			 *  when the deletion process starts - so don't try to
+			 *  remove them again.
+			 */
+			if (blob->active.num_pages > 0) {
+				TAILQ_REMOVE(&blob->bs->blobs, blob, link);
+			}
+			_spdk_blob_free(blob);
+		}
+	}
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+void spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_cpl	cpl;
+	spdk_bs_sequence_t	*seq;
+
+	_spdk_blob_verify_md_op(blob);
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Closing blob %lu\n", blob->id);
+
+	if (blob->open_ref == 0) {
+		cb_fn(cb_arg, -EBADF);
+		return;
+	}
+
+	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	cpl.u.blob_basic.cb_fn = cb_fn;
+	cpl.u.blob_basic.cb_arg = cb_arg;
+
+	seq = spdk_bs_sequence_start(blob->bs->md_channel, &cpl);
+	if (!seq) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	/* Sync metadata */
+	_spdk_blob_persist(seq, blob, _spdk_blob_close_cpl, blob);
+}
+
+/* END spdk_blob_close */
+
+struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
+{
+	return spdk_get_io_channel(bs);
+}
+
+void spdk_bs_free_io_channel(struct spdk_io_channel *channel)
+{
+	spdk_put_io_channel(channel);
+}
+
+void spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
+			uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
+				     SPDK_BLOB_UNMAP);
+}
+
+void spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
+			       uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
+				     SPDK_BLOB_WRITE_ZEROES);
+}
+
+void spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
+			void *payload, uint64_t offset, uint64_t length,
+			spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
+				     SPDK_BLOB_WRITE);
+}
+
+void spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
+		       void *payload, uint64_t offset, uint64_t length,
+		       spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
+				     SPDK_BLOB_READ);
+}
+
+void spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
+			 struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+			 spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false);
+}
+
+void spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
+			struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
+			spdk_blob_op_complete cb_fn, void *cb_arg)
+{
+	_spdk_blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true);
+}
+
+struct spdk_bs_iter_ctx {
+	int64_t page_num;
+	struct spdk_blob_store *bs;
+
+	spdk_blob_op_with_handle_complete cb_fn;
+	void *cb_arg;
+};
+
+static void
+_spdk_bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
+{
+	struct spdk_bs_iter_ctx *ctx = cb_arg;
+	struct spdk_blob_store *bs = ctx->bs;
+	spdk_blob_id id;
+
+	if (bserrno == 0) {
+		ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
+		free(ctx);
+		return;
+	}
+
+	ctx->page_num++;
+	ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
+	if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
+		ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
+		free(ctx);
+		return;
+	}
+
+	id = _spdk_bs_page_to_blobid(ctx->page_num);
+
+	spdk_bs_open_blob(bs, id, _spdk_bs_iter_cpl, ctx);
+}
+
+void
+spdk_bs_iter_first(struct spdk_blob_store *bs,
+		   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_iter_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	ctx->page_num = -1;
+	ctx->bs = bs;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	_spdk_bs_iter_cpl(ctx, NULL, -1);
+}
+
+static void
+_spdk_bs_iter_close_cpl(void *cb_arg, int bserrno)
+{
+	struct spdk_bs_iter_ctx *ctx = cb_arg;
+
+	_spdk_bs_iter_cpl(ctx, NULL, -1);
+}
+
+void
+spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
+		  spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_bs_iter_ctx *ctx;
+
+	assert(blob != NULL);
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	ctx->page_num = _spdk_bs_blobid_to_page(blob->id);
+	ctx->bs = bs;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	/* Close the existing blob */
+	spdk_blob_close(blob, _spdk_bs_iter_close_cpl, ctx);
+}
+
+static int
+_spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
+		     uint16_t value_len, bool internal)
+{
+	struct spdk_xattr_tailq *xattrs;
+	struct spdk_xattr	*xattr;
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (blob->md_ro) {
+		return -EPERM;
+	}
+
+	if (internal) {
+		xattrs = &blob->xattrs_internal;
+		blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
+	} else {
+		xattrs = &blob->xattrs;
+	}
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		if (!strcmp(name, xattr->name)) {
+			free(xattr->value);
+			xattr->value_len = value_len;
+			xattr->value = malloc(value_len);
+			memcpy(xattr->value, value, value_len);
+
+			blob->state = SPDK_BLOB_STATE_DIRTY;
+
+			return 0;
+		}
+	}
+
+	xattr = calloc(1, sizeof(*xattr));
+	if (!xattr) {
+		return -ENOMEM;
+	}
+	xattr->name = strdup(name);
+	xattr->value_len = value_len;
+	xattr->value = malloc(value_len);
+	memcpy(xattr->value, value, value_len);
+	TAILQ_INSERT_TAIL(xattrs, xattr, link);
+
+	blob->state = SPDK_BLOB_STATE_DIRTY;
+
+	return 0;
+}
+
+int
+spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
+		    uint16_t value_len)
+{
+	return _spdk_blob_set_xattr(blob, name, value, value_len, false);
+}
+
+static int
+_spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
+{
+	struct spdk_xattr_tailq *xattrs;
+	struct spdk_xattr	*xattr;
+
+	_spdk_blob_verify_md_op(blob);
+
+	if (blob->md_ro) {
+		return -EPERM;
+	}
+	xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		if (!strcmp(name, xattr->name)) {
+			TAILQ_REMOVE(xattrs, xattr, link);
+			free(xattr->value);
+			free(xattr->name);
+			free(xattr);
+
+			if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
+				blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
+			}
+			blob->state = SPDK_BLOB_STATE_DIRTY;
+
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+int
+spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
+{
+	return _spdk_blob_remove_xattr(blob, name, false);
+}
+
+static int
+_spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
+			   const void **value, size_t *value_len, bool internal)
+{
+	struct spdk_xattr	*xattr;
+	struct spdk_xattr_tailq *xattrs;
+
+	xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		if (!strcmp(name, xattr->name)) {
+			*value = xattr->value;
+			*value_len = xattr->value_len;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+int
+spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
+			  const void **value, size_t *value_len)
+{
+	_spdk_blob_verify_md_op(blob);
+
+	return _spdk_blob_get_xattr_value(blob, name, value, value_len, false);
+}
+
+struct spdk_xattr_names {
+	uint32_t	count;
+	const char	*names[0];
+};
+
+static int
+_spdk_blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
+{
+	struct spdk_xattr	*xattr;
+	int			count = 0;
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		count++;
+	}
+
+	*names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
+	if (*names == NULL) {
+		return -ENOMEM;
+	}
+
+	TAILQ_FOREACH(xattr, xattrs, link) {
+		(*names)->names[(*names)->count++] = xattr->name;
+	}
+
+	return 0;
+}
+
+int
+spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
+{
+	_spdk_blob_verify_md_op(blob);
+
+	return _spdk_blob_get_xattr_names(&blob->xattrs, names);
+}
+
+uint32_t
+spdk_xattr_names_get_count(struct spdk_xattr_names *names)
+{
+	assert(names != NULL);
+
+	return names->count;
+}
+
+const char *
+spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
+{
+	if (index >= names->count) {
+		return NULL;
+	}
+
+	return names->names[index];
+}
+
+void
+spdk_xattr_names_free(struct spdk_xattr_names *names)
+{
+	free(names);
+}
+
+struct spdk_bs_type
+spdk_bs_get_bstype(struct spdk_blob_store *bs)
+{
+	return bs->bstype;
+}
+
+void
+spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
+{
+	memcpy(&bs->bstype, &bstype, sizeof(bstype));
+}
+
+bool
+spdk_blob_is_read_only(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+	return (blob->data_ro || blob->md_ro);
+}
+
+bool
+spdk_blob_is_snapshot(struct spdk_blob *blob)
+{
+	struct spdk_blob_list *snapshot_entry;
+
+	assert(blob != NULL);
+
+	TAILQ_FOREACH(snapshot_entry, &blob->bs->snapshots, link) {
+		if (snapshot_entry->id == blob->id) {
+			break;
+		}
+	}
+
+	if (snapshot_entry == NULL) {
+		return false;
+	}
+
+	return true;
+}
+
+bool
+spdk_blob_is_clone(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+
+	if (blob->parent_id != SPDK_BLOBID_INVALID) {
+		assert(spdk_blob_is_thin_provisioned(blob));
+		return true;
+	}
+
+	return false;
+}
+
+bool
+spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
+{
+	assert(blob != NULL);
+	return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
+}
+
+spdk_blob_id
+spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
+{
+	struct spdk_blob_list *snapshot_entry = NULL;
+	struct spdk_blob_list *clone_entry = NULL;
+
+	TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
+		TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
+			if (clone_entry->id == blob_id) {
+				return snapshot_entry->id;
+			}
+		}
+	}
+
+	return SPDK_BLOBID_INVALID;
+}
+
+int
+spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
+		     size_t *count)
+{
+	struct spdk_blob_list *snapshot_entry, *clone_entry;
+	size_t n;
+
+	TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
+		if (snapshot_entry->id == blobid) {
+			break;
+		}
+	}
+	if (snapshot_entry == NULL) {
+		*count = 0;
+		return 0;
+	}
+
+	if (ids == NULL || *count < snapshot_entry->clone_count) {
+		*count = snapshot_entry->clone_count;
+		return -ENOMEM;
+	}
+	*count = snapshot_entry->clone_count;
+
+	n = 0;
+	TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
+		ids[n++] = clone_entry->id;
+	}
+
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("blob", SPDK_LOG_BLOB)
diff --git a/src/spdk/lib/blob/blobstore.h b/src/spdk/lib/blob/blobstore.h
new file mode 100644
index 00000000..60df98d8
--- /dev/null
+++ b/src/spdk/lib/blob/blobstore.h
@@ -0,0 +1,572 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BLOBSTORE_H
+#define SPDK_BLOBSTORE_H
+
+#include "spdk/assert.h"
+#include "spdk/blob.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+#include "request.h"
+
+/* In Memory Data Structures
+ *
+ * The following data structures exist only in memory.
+ */
+
+#define SPDK_BLOB_OPTS_CLUSTER_SZ (1024 * 1024)
+#define SPDK_BLOB_OPTS_NUM_MD_PAGES UINT32_MAX
+#define SPDK_BLOB_OPTS_MAX_MD_OPS 32
+#define SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS 512
+#define SPDK_BLOB_BLOBID_HIGH_BIT (1ULL << 32)
+
+struct spdk_xattr {
+	uint32_t	index;
+	uint16_t	value_len;
+	char		*name;
+	void		*value;
+	TAILQ_ENTRY(spdk_xattr)	link;
+};
+
+/* The mutable part of the blob data that is sync'd to
+ * disk. The data in here is both mutable and persistent.
+ */
+struct spdk_blob_mut_data {
+	/* Number of data clusters in the blob */
+	uint64_t	num_clusters;
+
+	/* Array LBAs that are the beginning of a cluster, in
+	 * the order they appear in the blob.
+	 */
+	uint64_t	*clusters;
+
+	/* The size of the clusters array. This is greater than or
+	 * equal to 'num_clusters'.
+	 */
+	size_t		cluster_array_size;
+
+	/* Number of metadata pages */
+	uint32_t	num_pages;
+
+	/* Array of page offsets into the metadata region, in
+	 * the order of the metadata page sequence.
+	 */
+	uint32_t	*pages;
+};
+
+enum spdk_blob_state {
+	/* The blob in-memory version does not match the on-disk
+	 * version.
+	 */
+	SPDK_BLOB_STATE_DIRTY,
+
+	/* The blob in memory version of the blob matches the on disk
+	 * version.
+	 */
+	SPDK_BLOB_STATE_CLEAN,
+
+	/* The in-memory state being synchronized with the on-disk
+	 * blob state. */
+	SPDK_BLOB_STATE_LOADING,
+};
+
+TAILQ_HEAD(spdk_xattr_tailq, spdk_xattr);
+
+struct spdk_blob_list {
+	spdk_blob_id id;
+	size_t clone_count;
+	TAILQ_HEAD(, spdk_blob_list) clones;
+	TAILQ_ENTRY(spdk_blob_list) link;
+};
+
+struct spdk_blob {
+	struct spdk_blob_store *bs;
+
+	uint32_t	open_ref;
+
+	spdk_blob_id	id;
+	spdk_blob_id	parent_id;
+
+	enum spdk_blob_state		state;
+
+	/* Two copies of the mutable data. One is a version
+	 * that matches the last known data on disk (clean).
+	 * The other (active) is the current data. Syncing
+	 * a blob makes the clean match the active.
+	 */
+	struct spdk_blob_mut_data	clean;
+	struct spdk_blob_mut_data	active;
+
+	bool		invalid;
+	bool		data_ro;
+	bool		md_ro;
+
+	uint64_t	invalid_flags;
+	uint64_t	data_ro_flags;
+	uint64_t	md_ro_flags;
+
+	struct spdk_bs_dev *back_bs_dev;
+
+	/* TODO: The xattrs are mutable, but we don't want to be
+	 * copying them unnecessarily. Figure this out.
+	 */
+	struct spdk_xattr_tailq xattrs;
+	struct spdk_xattr_tailq xattrs_internal;
+
+	TAILQ_ENTRY(spdk_blob) link;
+
+	uint32_t frozen_refcnt;
+	bool resize_in_progress;
+};
+
+struct spdk_blob_store {
+	uint64_t			md_start; /* Offset from beginning of disk, in pages */
+	uint32_t			md_len; /* Count, in pages */
+
+	struct spdk_io_channel		*md_channel;
+	uint32_t			max_channel_ops;
+
+	struct spdk_thread		*md_thread;
+
+	struct spdk_bs_dev		*dev;
+
+	struct spdk_bit_array		*used_md_pages;
+	struct spdk_bit_array		*used_clusters;
+	struct spdk_bit_array		*used_blobids;
+
+	pthread_mutex_t			used_clusters_mutex;
+
+	uint32_t			cluster_sz;
+	uint64_t			total_clusters;
+	uint64_t			total_data_clusters;
+	uint64_t			num_free_clusters;
+	uint64_t			pages_per_cluster;
+	uint32_t			io_unit_size;
+
+	spdk_blob_id			super_blob;
+	struct spdk_bs_type		bstype;
+
+	struct spdk_bs_cpl		unload_cpl;
+	int				unload_err;
+
+	TAILQ_HEAD(, spdk_blob)		blobs;
+	TAILQ_HEAD(, spdk_blob_list)	snapshots;
+
+	bool                            clean;
+};
+
+struct spdk_bs_channel {
+	struct spdk_bs_request_set	*req_mem;
+	TAILQ_HEAD(, spdk_bs_request_set) reqs;
+
+	struct spdk_blob_store		*bs;
+
+	struct spdk_bs_dev		*dev;
+	struct spdk_io_channel		*dev_channel;
+
+	TAILQ_HEAD(, spdk_bs_request_set) need_cluster_alloc;
+	TAILQ_HEAD(, spdk_bs_request_set) queued_io;
+};
+
+/** operation type */
+enum spdk_blob_op_type {
+	SPDK_BLOB_WRITE,
+	SPDK_BLOB_READ,
+	SPDK_BLOB_UNMAP,
+	SPDK_BLOB_WRITE_ZEROES,
+	SPDK_BLOB_WRITEV,
+	SPDK_BLOB_READV,
+};
+
+/* back bs_dev */
+
+#define BLOB_SNAPSHOT "SNAP"
+#define SNAPSHOT_IN_PROGRESS "SNAPTMP"
+
+struct spdk_blob_bs_dev {
+	struct spdk_bs_dev bs_dev;
+	struct spdk_blob *blob;
+};
+
+/* On-Disk Data Structures
+ *
+ * The following data structures exist on disk.
+ */
+#define SPDK_BS_INITIAL_VERSION 1
+#define SPDK_BS_VERSION 3 /* current version */
+
+#pragma pack(push, 1)
+
+#define SPDK_MD_MASK_TYPE_USED_PAGES 0
+#define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1
+#define SPDK_MD_MASK_TYPE_USED_BLOBIDS 2
+
+struct spdk_bs_md_mask {
+	uint8_t		type;
+	uint32_t	length; /* In bits */
+	uint8_t		mask[0];
+};
+
+#define SPDK_MD_DESCRIPTOR_TYPE_PADDING 0
+#define SPDK_MD_DESCRIPTOR_TYPE_EXTENT 1
+#define SPDK_MD_DESCRIPTOR_TYPE_XATTR 2
+#define SPDK_MD_DESCRIPTOR_TYPE_FLAGS 3
+#define SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL 4
+
+struct spdk_blob_md_descriptor_xattr {
+	uint8_t		type;
+	uint32_t	length;
+
+	uint16_t	name_length;
+	uint16_t	value_length;
+
+	char		name[0];
+	/* String name immediately followed by string value. */
+};
+
+struct spdk_blob_md_descriptor_extent {
+	uint8_t		type;
+	uint32_t	length;
+
+	struct {
+		uint32_t        cluster_idx;
+		uint32_t        length; /* In units of clusters */
+	} extents[0];
+};
+
+#define SPDK_BLOB_THIN_PROV (1ULL << 0)
+#define SPDK_BLOB_INTERNAL_XATTR (1ULL << 1)
+#define SPDK_BLOB_INVALID_FLAGS_MASK	(SPDK_BLOB_THIN_PROV | SPDK_BLOB_INTERNAL_XATTR)
+
+#define SPDK_BLOB_READ_ONLY (1ULL << 0)
+#define SPDK_BLOB_DATA_RO_FLAGS_MASK	SPDK_BLOB_READ_ONLY
+#define SPDK_BLOB_MD_RO_FLAGS_MASK	0
+
+struct spdk_blob_md_descriptor_flags {
+	uint8_t		type;
+	uint32_t	length;
+
+	/*
+	 * If a flag in invalid_flags is set that the application is not aware of,
+	 *  it will not allow the blob to be opened.
+	 */
+	uint64_t	invalid_flags;
+
+	/*
+	 * If a flag in data_ro_flags is set that the application is not aware of,
+	 *  allow the blob to be opened in data_read_only and md_read_only mode.
+	 */
+	uint64_t	data_ro_flags;
+
+	/*
+	 * If a flag in md_ro_flags is set the the application is not aware of,
+	 *  allow the blob to be opened in md_read_only mode.
+	 */
+	uint64_t	md_ro_flags;
+};
+
+struct spdk_blob_md_descriptor {
+	uint8_t		type;
+	uint32_t	length;
+};
+
+#define SPDK_INVALID_MD_PAGE UINT32_MAX
+
+struct spdk_blob_md_page {
+	spdk_blob_id     id;
+
+	uint32_t        sequence_num;
+	uint32_t	reserved0;
+
+	/* Descriptors here */
+	uint8_t		descriptors[4072];
+
+	uint32_t	next;
+	uint32_t	crc;
+};
+#define SPDK_BS_PAGE_SIZE 0x1000
+SPDK_STATIC_ASSERT(SPDK_BS_PAGE_SIZE == sizeof(struct spdk_blob_md_page), "Invalid md page size");
+
+#define SPDK_BS_SUPER_BLOCK_SIG "SPDKBLOB"
+
+struct spdk_bs_super_block {
+	uint8_t		signature[8];
+	uint32_t        version;
+	uint32_t        length;
+	uint32_t	clean; /* If there was a clean shutdown, this is 1. */
+	spdk_blob_id	super_blob;
+
+	uint32_t	cluster_size; /* In bytes */
+
+	uint32_t	used_page_mask_start; /* Offset from beginning of disk, in pages */
+	uint32_t	used_page_mask_len; /* Count, in pages */
+
+	uint32_t	used_cluster_mask_start; /* Offset from beginning of disk, in pages */
+	uint32_t	used_cluster_mask_len; /* Count, in pages */
+
+	uint32_t	md_start; /* Offset from beginning of disk, in pages */
+	uint32_t	md_len; /* Count, in pages */
+
+	struct spdk_bs_type	bstype; /* blobstore type */
+
+	uint32_t	used_blobid_mask_start; /* Offset from beginning of disk, in pages */
+	uint32_t	used_blobid_mask_len; /* Count, in pages */
+
+	uint64_t        size; /* size of blobstore in bytes */
+	uint32_t        io_unit_size; /* Size of io unit in bytes */
+
+	uint8_t         reserved[4000];
+	uint32_t	crc;
+};
+SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size");
+
+#pragma pack(pop)
+
+struct spdk_bs_dev *spdk_bs_create_zeroes_dev(void);
+struct spdk_bs_dev *spdk_bs_create_blob_bs_dev(struct spdk_blob *blob);
+
+/* Unit Conversions
+ *
+ * The blobstore works with several different units:
+ * - Byte: Self explanatory
+ * - LBA: The logical blocks on the backing storage device.
+ * - Page: The read/write units of blobs and metadata. This is
+ *         an offset into a blob in units of 4KiB.
+ * - Cluster Index: The disk is broken into a sequential list of
+ *		    clusters. This is the offset from the beginning.
+ *
+ * NOTE: These conversions all act on simple magnitudes, not with any sort
+ *        of knowledge about the blobs themselves. For instance, converting
+ *        a page to an lba with the conversion function below simply converts
+ *        a number of pages to an equivalent number of lbas, but that
+ *        lba certainly isn't the right lba that corresponds to a page offset
+ *        for a particular blob.
+ */
+static inline uint64_t
+_spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length)
+{
+	assert(length % bs->dev->blocklen == 0);
+
+	return length / bs->dev->blocklen;
+}
+
+static inline uint64_t
+_spdk_bs_dev_byte_to_lba(struct spdk_bs_dev *bs_dev, uint64_t length)
+{
+	assert(length % bs_dev->blocklen == 0);
+
+	return length / bs_dev->blocklen;
+}
+
+static inline uint64_t
+_spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page)
+{
+	return page * SPDK_BS_PAGE_SIZE / bs->dev->blocklen;
+}
+
+static inline uint64_t
+_spdk_bs_dev_page_to_lba(struct spdk_bs_dev *bs_dev, uint64_t page)
+{
+	return page * SPDK_BS_PAGE_SIZE / bs_dev->blocklen;
+}
+
+static inline uint64_t
+_spdk_bs_io_unit_per_page(struct spdk_blob_store *bs)
+{
+	return SPDK_BS_PAGE_SIZE / bs->io_unit_size;
+}
+
+static inline uint64_t
+_spdk_bs_io_unit_to_page(struct spdk_blob_store *bs, uint64_t io_unit)
+{
+	return io_unit / _spdk_bs_io_unit_per_page(bs);
+}
+
+static inline uint64_t
+_spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster)
+{
+	return (uint64_t)cluster * bs->pages_per_cluster;
+}
+
+static inline uint32_t
+_spdk_bs_page_to_cluster(struct spdk_blob_store *bs, uint64_t page)
+{
+	assert(page % bs->pages_per_cluster == 0);
+
+	return page / bs->pages_per_cluster;
+}
+
+static inline uint64_t
+_spdk_bs_cluster_to_lba(struct spdk_blob_store *bs, uint32_t cluster)
+{
+	return (uint64_t)cluster * (bs->cluster_sz / bs->dev->blocklen);
+}
+
+static inline uint32_t
+_spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba)
+{
+	assert(lba % (bs->cluster_sz / bs->dev->blocklen) == 0);
+
+	return lba / (bs->cluster_sz / bs->dev->blocklen);
+}
+
+static inline uint64_t
+_spdk_bs_io_unit_to_back_dev_lba(struct spdk_blob *blob, uint64_t io_unit)
+{
+	return io_unit * (blob->bs->io_unit_size / blob->back_bs_dev->blocklen);
+}
+
+static inline uint64_t
+_spdk_bs_back_dev_lba_to_io_unit(struct spdk_blob *blob, uint64_t lba)
+{
+	return lba * (blob->back_bs_dev->blocklen / blob->bs->io_unit_size);
+}
+
+/* End basic conversions */
+
+static inline uint64_t
+_spdk_bs_blobid_to_page(spdk_blob_id id)
+{
+	return id & 0xFFFFFFFF;
+}
+
+/* The blob id is a 64 bit number. The lower 32 bits are the page_idx. The upper
+ * 32 bits are not currently used. Stick a 1 there just to catch bugs where the
+ * code assumes blob id == page_idx.
+ */
+static inline spdk_blob_id
+_spdk_bs_page_to_blobid(uint64_t page_idx)
+{
+	if (page_idx > UINT32_MAX) {
+		return SPDK_BLOBID_INVALID;
+	}
+	return SPDK_BLOB_BLOBID_HIGH_BIT | page_idx;
+}
+
+/* Given an io unit offset into a blob, look up the LBA for the
+ * start of that io unit.
+ */
+static inline uint64_t
+_spdk_bs_blob_io_unit_to_lba(struct spdk_blob *blob, uint64_t io_unit)
+{
+	uint64_t	lba;
+	uint64_t	pages_per_cluster;
+	uint64_t	io_units_per_cluster;
+	uint64_t	io_units_per_page;
+	uint64_t	page;
+
+	page = _spdk_bs_io_unit_to_page(blob->bs, io_unit);
+
+	pages_per_cluster = blob->bs->pages_per_cluster;
+	io_units_per_page = _spdk_bs_io_unit_per_page(blob->bs);
+	io_units_per_cluster = io_units_per_page * pages_per_cluster;
+
+	assert(page < blob->active.num_clusters * pages_per_cluster);
+
+	lba = blob->active.clusters[page / pages_per_cluster];
+	lba += io_unit % io_units_per_cluster;
+	return lba;
+}
+
+/* Given an io_unit offset into a blob, look up the number of io_units until the
+ * next cluster boundary.
+ */
+static inline uint32_t
+_spdk_bs_num_io_units_to_cluster_boundary(struct spdk_blob *blob, uint64_t io_unit)
+{
+	uint64_t	io_units_per_cluster;
+
+	io_units_per_cluster = _spdk_bs_io_unit_per_page(blob->bs) * blob->bs->pages_per_cluster;
+
+	return io_units_per_cluster - (io_unit % io_units_per_cluster);
+}
+
+/* Given a page offset into a blob, look up the number of pages until the
+ * next cluster boundary.
+ */
+static inline uint32_t
+_spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob *blob, uint64_t page)
+{
+	uint64_t	pages_per_cluster;
+
+	pages_per_cluster = blob->bs->pages_per_cluster;
+
+	return pages_per_cluster - (page % pages_per_cluster);
+}
+
+/* Given an io_unit offset into a blob, look up the number of pages into blob to beginning of current cluster */
+static inline uint32_t
+_spdk_bs_io_unit_to_cluster_start(struct spdk_blob *blob, uint64_t io_unit)
+{
+	uint64_t	pages_per_cluster;
+	uint64_t	page;
+
+	pages_per_cluster = blob->bs->pages_per_cluster;
+	page = _spdk_bs_io_unit_to_page(blob->bs, io_unit);
+
+	return page - (page % pages_per_cluster);
+}
+
+/* Given an io_unit offset into a blob, look up the number of pages into blob to beginning of current cluster */
+static inline uint32_t
+_spdk_bs_io_unit_to_cluster_number(struct spdk_blob *blob, uint64_t io_unit)
+{
+	return (io_unit / _spdk_bs_io_unit_per_page(blob->bs)) / blob->bs->pages_per_cluster;
+}
+
+/* Given an io unit offset into a blob, look up if it is from allocated cluster. */
+static inline bool
+_spdk_bs_io_unit_is_allocated(struct spdk_blob *blob, uint64_t io_unit)
+{
+	uint64_t	lba;
+	uint64_t	page;
+	uint64_t	pages_per_cluster;
+
+	pages_per_cluster = blob->bs->pages_per_cluster;
+	page = _spdk_bs_io_unit_to_page(blob->bs, io_unit);
+
+	assert(page < blob->active.num_clusters * pages_per_cluster);
+
+	lba = blob->active.clusters[page / pages_per_cluster];
+
+	if (lba == 0) {
+		assert(spdk_blob_is_thin_provisioned(blob));
+		return false;
+	} else {
+		return true;
+	}
+}
+
+#endif
diff --git a/src/spdk/lib/blob/request.c b/src/spdk/lib/blob/request.c
new file mode 100644
index 00000000..b66fa765
--- /dev/null
+++ b/src/spdk/lib/blob/request.c
@@ -0,0 +1,558 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "blobstore.h"
+#include "request.h"
+
+#include "spdk/thread.h"
+#include "spdk/queue.h"
+
+#include "spdk_internal/log.h"
+
+void
+spdk_bs_call_cpl(struct spdk_bs_cpl *cpl, int bserrno)
+{
+	switch (cpl->type) {
+	case SPDK_BS_CPL_TYPE_BS_BASIC:
+		cpl->u.bs_basic.cb_fn(cpl->u.bs_basic.cb_arg,
+				      bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_BS_HANDLE:
+		cpl->u.bs_handle.cb_fn(cpl->u.bs_handle.cb_arg,
+				       cpl->u.bs_handle.bs,
+				       bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_BLOB_BASIC:
+		cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg,
+					bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_BLOBID:
+		cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg,
+				    cpl->u.blobid.blobid,
+				    bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_BLOB_HANDLE:
+		cpl->u.blob_handle.cb_fn(cpl->u.blob_handle.cb_arg,
+					 cpl->u.blob_handle.blob,
+					 bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_NESTED_SEQUENCE:
+		cpl->u.nested_seq.cb_fn(cpl->u.nested_seq.cb_arg,
+					cpl->u.nested_seq.parent,
+					bserrno);
+		break;
+	case SPDK_BS_CPL_TYPE_NONE:
+		/* this completion's callback is handled elsewhere */
+		break;
+	}
+}
+
+static void
+spdk_bs_request_set_complete(struct spdk_bs_request_set *set)
+{
+	struct spdk_bs_cpl cpl = set->cpl;
+	int bserrno = set->bserrno;
+
+	TAILQ_INSERT_TAIL(&set->channel->reqs, set, link);
+
+	spdk_bs_call_cpl(&cpl, bserrno);
+}
+
+static void
+spdk_bs_sequence_completion(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
+{
+	struct spdk_bs_request_set *set = cb_arg;
+
+	set->bserrno = bserrno;
+	set->u.sequence.cb_fn((spdk_bs_sequence_t *)set, set->u.sequence.cb_arg, bserrno);
+}
+
+spdk_bs_sequence_t *
+spdk_bs_sequence_start(struct spdk_io_channel *_channel,
+		       struct spdk_bs_cpl *cpl)
+{
+	struct spdk_bs_channel		*channel;
+	struct spdk_bs_request_set	*set;
+
+	channel = spdk_io_channel_get_ctx(_channel);
+
+	set = TAILQ_FIRST(&channel->reqs);
+	if (!set) {
+		return NULL;
+	}
+	TAILQ_REMOVE(&channel->reqs, set, link);
+
+	set->cpl = *cpl;
+	set->bserrno = 0;
+	set->channel = channel;
+
+	set->cb_args.cb_fn = spdk_bs_sequence_completion;
+	set->cb_args.cb_arg = set;
+	set->cb_args.channel = channel->dev_channel;
+
+	return (spdk_bs_sequence_t *)set;
+}
+
+void
+spdk_bs_sequence_read_bs_dev(spdk_bs_sequence_t *seq, struct spdk_bs_dev *bs_dev,
+			     void *payload, uint64_t lba, uint32_t lba_count,
+			     spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	bs_dev->read(bs_dev, spdk_io_channel_from_ctx(channel), payload, lba, lba_count, &set->cb_args);
+}
+
+void
+spdk_bs_sequence_read_dev(spdk_bs_sequence_t *seq, void *payload,
+			  uint64_t lba, uint32_t lba_count,
+			  spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	channel->dev->read(channel->dev, channel->dev_channel, payload, lba, lba_count, &set->cb_args);
+}
+
+void
+spdk_bs_sequence_write_dev(spdk_bs_sequence_t *seq, void *payload,
+			   uint64_t lba, uint32_t lba_count,
+			   spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Writing %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	channel->dev->write(channel->dev, channel->dev_channel, payload, lba, lba_count,
+			    &set->cb_args);
+}
+
+void
+spdk_bs_sequence_readv_bs_dev(spdk_bs_sequence_t *seq, struct spdk_bs_dev *bs_dev,
+			      struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
+			      spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	bs_dev->readv(bs_dev, spdk_io_channel_from_ctx(channel), iov, iovcnt, lba, lba_count,
+		      &set->cb_args);
+}
+
+void
+spdk_bs_sequence_readv_dev(spdk_bs_sequence_t *seq, struct iovec *iov, int iovcnt,
+			   uint64_t lba, uint32_t lba_count, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+	channel->dev->readv(channel->dev, channel->dev_channel, iov, iovcnt, lba, lba_count,
+			    &set->cb_args);
+}
+
+void
+spdk_bs_sequence_writev_dev(spdk_bs_sequence_t *seq, struct iovec *iov, int iovcnt,
+			    uint64_t lba, uint32_t lba_count,
+			    spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Writing %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	channel->dev->writev(channel->dev, channel->dev_channel, iov, iovcnt, lba, lba_count,
+			     &set->cb_args);
+}
+
+void
+spdk_bs_sequence_unmap_dev(spdk_bs_sequence_t *seq,
+			   uint64_t lba, uint32_t lba_count,
+			   spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Unmapping %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	channel->dev->unmap(channel->dev, channel->dev_channel, lba, lba_count,
+			    &set->cb_args);
+}
+
+void
+spdk_bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq,
+				  uint64_t lba, uint32_t lba_count,
+				  spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
+	struct spdk_bs_channel       *channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "writing zeroes to %" PRIu32 " blocks at LBA %" PRIu64 "\n",
+		      lba_count, lba);
+
+	set->u.sequence.cb_fn = cb_fn;
+	set->u.sequence.cb_arg = cb_arg;
+
+	channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count,
+				   &set->cb_args);
+}
+
+void
+spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno)
+{
+	if (bserrno != 0) {
+		seq->bserrno = bserrno;
+	}
+	spdk_bs_request_set_complete((struct spdk_bs_request_set *)seq);
+}
+
+void
+spdk_bs_user_op_sequence_finish(void *cb_arg, int bserrno)
+{
+	spdk_bs_sequence_t *seq = cb_arg;
+
+	spdk_bs_sequence_finish(seq, bserrno);
+}
+
+static void
+spdk_bs_batch_completion(struct spdk_io_channel *_channel,
+			 void *cb_arg, int bserrno)
+{
+	struct spdk_bs_request_set	*set = cb_arg;
+
+	set->u.batch.outstanding_ops--;
+	if (bserrno != 0) {
+		set->bserrno = bserrno;
+	}
+
+	if (set->u.batch.outstanding_ops == 0 && set->u.batch.batch_closed) {
+		if (set->u.batch.cb_fn) {
+			set->cb_args.cb_fn = spdk_bs_sequence_completion;
+			set->u.batch.cb_fn((spdk_bs_sequence_t *)set, set->u.batch.cb_arg, bserrno);
+		} else {
+			spdk_bs_request_set_complete(set);
+		}
+	}
+}
+
+spdk_bs_batch_t *
+spdk_bs_batch_open(struct spdk_io_channel *_channel,
+		   struct spdk_bs_cpl *cpl)
+{
+	struct spdk_bs_channel		*channel;
+	struct spdk_bs_request_set	*set;
+
+	channel = spdk_io_channel_get_ctx(_channel);
+
+	set = TAILQ_FIRST(&channel->reqs);
+	if (!set) {
+		return NULL;
+	}
+	TAILQ_REMOVE(&channel->reqs, set, link);
+
+	set->cpl = *cpl;
+	set->bserrno = 0;
+	set->channel = channel;
+
+	set->u.batch.cb_fn = NULL;
+	set->u.batch.cb_arg = NULL;
+	set->u.batch.outstanding_ops = 0;
+	set->u.batch.batch_closed = 0;
+
+	set->cb_args.cb_fn = spdk_bs_batch_completion;
+	set->cb_args.cb_arg = set;
+	set->cb_args.channel = channel->dev_channel;
+
+	return (spdk_bs_batch_t *)set;
+}
+
+void
+spdk_bs_batch_read_bs_dev(spdk_bs_batch_t *batch, struct spdk_bs_dev *bs_dev,
+			  void *payload, uint64_t lba, uint32_t lba_count)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+	struct spdk_bs_channel		*channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.batch.outstanding_ops++;
+	bs_dev->read(bs_dev, spdk_io_channel_from_ctx(channel), payload, lba, lba_count, &set->cb_args);
+}
+
+void
+spdk_bs_batch_read_dev(spdk_bs_batch_t *batch, void *payload,
+		       uint64_t lba, uint32_t lba_count)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+	struct spdk_bs_channel		*channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Reading %" PRIu32 " blocks from LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.batch.outstanding_ops++;
+	channel->dev->read(channel->dev, channel->dev_channel, payload, lba, lba_count, &set->cb_args);
+}
+
+void
+spdk_bs_batch_write_dev(spdk_bs_batch_t *batch, void *payload,
+			uint64_t lba, uint32_t lba_count)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+	struct spdk_bs_channel		*channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Writing %" PRIu32 " blocks to LBA %" PRIu64 "\n", lba_count, lba);
+
+	set->u.batch.outstanding_ops++;
+	channel->dev->write(channel->dev, channel->dev_channel, payload, lba, lba_count,
+			    &set->cb_args);
+}
+
+void
+spdk_bs_batch_unmap_dev(spdk_bs_batch_t *batch,
+			uint64_t lba, uint32_t lba_count)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+	struct spdk_bs_channel		*channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Unmapping %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count,
+		      lba);
+
+	set->u.batch.outstanding_ops++;
+	channel->dev->unmap(channel->dev, channel->dev_channel, lba, lba_count,
+			    &set->cb_args);
+}
+
+void
+spdk_bs_batch_write_zeroes_dev(spdk_bs_batch_t *batch,
+			       uint64_t lba, uint32_t lba_count)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+	struct spdk_bs_channel		*channel = set->channel;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOB_RW, "Zeroing %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count, lba);
+
+	set->u.batch.outstanding_ops++;
+	channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count,
+				   &set->cb_args);
+}
+
+void
+spdk_bs_batch_close(spdk_bs_batch_t *batch)
+{
+	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
+
+	set->u.batch.batch_closed = 1;
+
+	if (set->u.batch.outstanding_ops == 0) {
+		if (set->u.batch.cb_fn) {
+			set->cb_args.cb_fn = spdk_bs_sequence_completion;
+			set->u.batch.cb_fn((spdk_bs_sequence_t *)set, set->u.batch.cb_arg, set->bserrno);
+		} else {
+			spdk_bs_request_set_complete(set);
+		}
+	}
+}
+
+spdk_bs_batch_t *
+spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
+{
+	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq;
+
+	set->u.batch.cb_fn = cb_fn;
+	set->u.batch.cb_arg = cb_arg;
+	set->u.batch.outstanding_ops = 0;
+	set->u.batch.batch_closed = 0;
+
+	set->cb_args.cb_fn = spdk_bs_batch_completion;
+
+	return set;
+}
+
+spdk_bs_sequence_t *
+spdk_bs_batch_to_sequence(spdk_bs_batch_t *batch)
+{
+	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch;
+
+	set->u.batch.outstanding_ops++;
+
+	set->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
+	set->cpl.u.blob_basic.cb_fn = spdk_bs_sequence_to_batch_completion;
+	set->cpl.u.blob_basic.cb_arg = set;
+	set->bserrno = 0;
+
+	set->cb_args.cb_fn = spdk_bs_sequence_completion;
+	set->cb_args.cb_arg = set;
+	set->cb_args.channel = set->channel->dev_channel;
+
+	return (spdk_bs_sequence_t *)set;
+}
+
+spdk_bs_user_op_t *
+spdk_bs_user_op_alloc(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
+		      enum spdk_blob_op_type op_type, struct spdk_blob *blob,
+		      void *payload, int iovcnt, uint64_t offset, uint64_t length)
+{
+	struct spdk_bs_channel		*channel;
+	struct spdk_bs_request_set	*set;
+	struct spdk_bs_user_op_args	*args;
+
+	channel = spdk_io_channel_get_ctx(_channel);
+
+	set = TAILQ_FIRST(&channel->reqs);
+	if (!set) {
+		return NULL;
+	}
+	TAILQ_REMOVE(&channel->reqs, set, link);
+
+	set->cpl = *cpl;
+	set->channel = channel;
+
+	args = &set->u.user_op;
+
+	args->type = op_type;
+	args->iovcnt = iovcnt;
+	args->blob = blob;
+	args->offset = offset;
+	args->length = length;
+	args->payload = payload;
+
+	return (spdk_bs_user_op_t *)set;
+}
+
+void
+spdk_bs_user_op_execute(spdk_bs_user_op_t *op)
+{
+	struct spdk_bs_request_set	*set;
+	struct spdk_bs_user_op_args	*args;
+	struct spdk_io_channel		*ch;
+
+	set = (struct spdk_bs_request_set *)op;
+	args = &set->u.user_op;
+	ch = spdk_io_channel_from_ctx(set->channel);
+
+	switch (args->type) {
+	case SPDK_BLOB_READ:
+		spdk_blob_io_read(args->blob, ch, args->payload, args->offset, args->length,
+				  set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	case SPDK_BLOB_WRITE:
+		spdk_blob_io_write(args->blob, ch, args->payload, args->offset, args->length,
+				   set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	case SPDK_BLOB_UNMAP:
+		spdk_blob_io_unmap(args->blob, ch, args->offset, args->length,
+				   set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	case SPDK_BLOB_WRITE_ZEROES:
+		spdk_blob_io_write_zeroes(args->blob, ch, args->offset, args->length,
+					  set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	case SPDK_BLOB_READV:
+		spdk_blob_io_readv(args->blob, ch, args->payload, args->iovcnt,
+				   args->offset, args->length,
+				   set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	case SPDK_BLOB_WRITEV:
+		spdk_blob_io_writev(args->blob, ch, args->payload, args->iovcnt,
+				    args->offset, args->length,
+				    set->cpl.u.blob_basic.cb_fn, set->cpl.u.blob_basic.cb_arg);
+		break;
+	}
+	TAILQ_INSERT_TAIL(&set->channel->reqs, set, link);
+}
+
+void
+spdk_bs_user_op_abort(spdk_bs_user_op_t *op)
+{
+	struct spdk_bs_request_set	*set;
+
+	set = (struct spdk_bs_request_set *)op;
+
+	set->cpl.u.blob_basic.cb_fn(set->cpl.u.blob_basic.cb_arg, -EIO);
+	TAILQ_INSERT_TAIL(&set->channel->reqs, set, link);
+}
+
+void
+spdk_bs_sequence_to_batch_completion(void *cb_arg, int bserrno)
+{
+	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)cb_arg;
+
+	set->u.batch.outstanding_ops--;
+
+	if (set->u.batch.outstanding_ops == 0 && set->u.batch.batch_closed) {
+		if (set->cb_args.cb_fn) {
+			set->cb_args.cb_fn(set->cb_args.channel, set->cb_args.cb_arg, bserrno);
+		}
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("blob_rw", SPDK_LOG_BLOB_RW)
diff --git a/src/spdk/lib/blob/request.h b/src/spdk/lib/blob/request.h
new file mode 100644
index 00000000..4efb5cd5
--- /dev/null
+++ b/src/spdk/lib/blob/request.h
@@ -0,0 +1,223 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BS_REQUEST_H
+#define SPDK_BS_REQUEST_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blob.h"
+
+enum spdk_bs_cpl_type {
+	SPDK_BS_CPL_TYPE_NONE,
+	SPDK_BS_CPL_TYPE_BS_BASIC,
+	SPDK_BS_CPL_TYPE_BS_HANDLE,
+	SPDK_BS_CPL_TYPE_BLOB_BASIC,
+	SPDK_BS_CPL_TYPE_BLOBID,
+	SPDK_BS_CPL_TYPE_BLOB_HANDLE,
+	SPDK_BS_CPL_TYPE_NESTED_SEQUENCE,
+};
+
+enum spdk_blob_op_type;
+
+struct spdk_bs_request_set;
+
+/* Use a sequence to submit a set of requests serially */
+typedef struct spdk_bs_request_set spdk_bs_sequence_t;
+
+/* Use a batch to submit a set of requests in parallel */
+typedef struct spdk_bs_request_set spdk_bs_batch_t;
+
+/* Use a user_op to queue a user operation for later execution */
+typedef struct spdk_bs_request_set spdk_bs_user_op_t;
+
+typedef void (*spdk_bs_nested_seq_complete)(void *cb_arg, spdk_bs_sequence_t *parent, int bserrno);
+
+struct spdk_bs_cpl {
+	enum spdk_bs_cpl_type type;
+	union {
+		struct {
+			spdk_bs_op_complete     cb_fn;
+			void                    *cb_arg;
+		} bs_basic;
+
+		struct {
+			spdk_bs_op_with_handle_complete cb_fn;
+			void                            *cb_arg;
+			struct spdk_blob_store          *bs;
+		} bs_handle;
+
+		struct {
+			spdk_blob_op_complete   cb_fn;
+			void                    *cb_arg;
+		} blob_basic;
+
+		struct {
+			spdk_blob_op_with_id_complete   cb_fn;
+			void                            *cb_arg;
+			spdk_blob_id                     blobid;
+		} blobid;
+
+		struct {
+			spdk_blob_op_with_handle_complete       cb_fn;
+			void                                    *cb_arg;
+			struct spdk_blob                        *blob;
+		} blob_handle;
+
+		struct {
+			spdk_bs_nested_seq_complete	cb_fn;
+			void				*cb_arg;
+			spdk_bs_sequence_t		*parent;
+		} nested_seq;
+	} u;
+};
+
+typedef void (*spdk_bs_sequence_cpl)(spdk_bs_sequence_t *sequence,
+				     void *cb_arg, int bserrno);
+
+/* A generic request set. Can be a sequence, batch or a user_op. */
+struct spdk_bs_request_set {
+	struct spdk_bs_cpl      cpl;
+
+	int                     bserrno;
+
+	struct spdk_bs_channel		*channel;
+
+	struct spdk_bs_dev_cb_args	cb_args;
+
+	union {
+		struct {
+			spdk_bs_sequence_cpl    cb_fn;
+			void                    *cb_arg;
+		} sequence;
+
+		struct {
+			uint32_t		outstanding_ops;
+			uint32_t		batch_closed;
+			spdk_bs_sequence_cpl	cb_fn;
+			void			*cb_arg;
+		} batch;
+
+		struct spdk_bs_user_op_args {
+			int			type;
+			int			iovcnt;
+			struct spdk_blob	*blob;
+			uint64_t		offset;
+			uint64_t		length;
+			spdk_blob_op_complete	cb_fn;
+			void			*cb_arg;
+			void			*payload; /* cast to iov for readv/writev */
+		} user_op;
+	} u;
+
+	TAILQ_ENTRY(spdk_bs_request_set) link;
+};
+
+void spdk_bs_call_cpl(struct spdk_bs_cpl *cpl, int bserrno);
+
+spdk_bs_sequence_t *spdk_bs_sequence_start(struct spdk_io_channel *channel,
+		struct spdk_bs_cpl *cpl);
+
+void spdk_bs_sequence_read_bs_dev(spdk_bs_sequence_t *seq, struct spdk_bs_dev *bs_dev,
+				  void *payload, uint64_t lba, uint32_t lba_count,
+				  spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_read_dev(spdk_bs_sequence_t *seq, void *payload,
+			       uint64_t lba, uint32_t lba_count,
+			       spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_write_dev(spdk_bs_sequence_t *seq, void *payload,
+				uint64_t lba, uint32_t lba_count,
+				spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_readv_bs_dev(spdk_bs_batch_t *batch, struct spdk_bs_dev *bs_dev,
+				   struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
+				   spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_readv_dev(spdk_bs_batch_t *batch, struct iovec *iov, int iovcnt,
+				uint64_t lba, uint32_t lba_count,
+				spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_writev_dev(spdk_bs_batch_t *batch, struct iovec *iov, int iovcnt,
+				 uint64_t lba, uint32_t lba_count,
+				 spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_unmap_dev(spdk_bs_sequence_t *seq,
+				uint64_t lba, uint32_t lba_count,
+				spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq,
+				       uint64_t lba, uint32_t lba_count,
+				       spdk_bs_sequence_cpl cb_fn, void *cb_arg);
+
+void spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno);
+
+void spdk_bs_user_op_sequence_finish(void *cb_arg, int bserrno);
+
+spdk_bs_batch_t *spdk_bs_batch_open(struct spdk_io_channel *channel,
+				    struct spdk_bs_cpl *cpl);
+
+void spdk_bs_batch_read_bs_dev(spdk_bs_batch_t *batch, struct spdk_bs_dev *bs_dev,
+			       void *payload, uint64_t lba, uint32_t lba_count);
+
+void spdk_bs_batch_read_dev(spdk_bs_batch_t *batch, void *payload,
+			    uint64_t lba, uint32_t lba_count);
+
+void spdk_bs_batch_write_dev(spdk_bs_batch_t *batch, void *payload,
+			     uint64_t lba, uint32_t lba_count);
+
+void spdk_bs_batch_unmap_dev(spdk_bs_batch_t *batch,
+			     uint64_t lba, uint32_t lba_count);
+
+void spdk_bs_batch_write_zeroes_dev(spdk_bs_batch_t *batch,
+				    uint64_t lba, uint32_t lba_count);
+
+void spdk_bs_batch_close(spdk_bs_batch_t *batch);
+
+spdk_bs_batch_t *spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq,
+		spdk_bs_sequence_cpl cb_fn,
+		void *cb_arg);
+
+spdk_bs_sequence_t *spdk_bs_batch_to_sequence(spdk_bs_batch_t *batch);
+
+spdk_bs_user_op_t *spdk_bs_user_op_alloc(struct spdk_io_channel *channel, struct spdk_bs_cpl *cpl,
+		enum spdk_blob_op_type op_type, struct spdk_blob *blob,
+		void *payload, int iovcnt, uint64_t offset, uint64_t length);
+
+void spdk_bs_user_op_execute(spdk_bs_user_op_t *op);
+
+void spdk_bs_user_op_abort(spdk_bs_user_op_t *op);
+
+void spdk_bs_sequence_to_batch_completion(void *cb_arg, int bserrno);
+
+#endif
diff --git a/src/spdk/lib/blob/zeroes.c b/src/spdk/lib/blob/zeroes.c
new file mode 100644
index 00000000..5b482417
--- /dev/null
+++ b/src/spdk/lib/blob/zeroes.c
@@ -0,0 +1,122 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/blob.h"
+
+#include "blobstore.h"
+
+static void
+zeroes_destroy(struct spdk_bs_dev *bs_dev)
+{
+	return;
+}
+
+static void
+zeroes_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+	    uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	memset(payload, 0, dev->blocklen * lba_count);
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
+}
+
+static void
+zeroes_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
+	     uint64_t lba, uint32_t lba_count,
+	     struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+zeroes_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+	     struct iovec *iov, int iovcnt,
+	     uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
+{
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		memset(iov[i].iov_base, 0, iov[i].iov_len);
+	}
+
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
+}
+
+
+
+static void
+zeroes_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+	      struct iovec *iov, int iovcnt,
+	      uint64_t lba, uint32_t lba_count,
+	      struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+	assert(false);
+}
+
+static void
+zeroes_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+		    uint64_t lba, uint32_t lba_count,
+		    struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
+}
+
+static void
+zeroes_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
+	     uint64_t lba, uint32_t lba_count,
+	     struct spdk_bs_dev_cb_args *cb_args)
+{
+	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
+}
+
+static struct spdk_bs_dev g_zeroes_bs_dev = {
+	.blockcnt = UINT64_MAX,
+	.blocklen = 512,
+	.create_channel = NULL,
+	.destroy_channel = NULL,
+	.destroy = zeroes_destroy,
+	.read = zeroes_read,
+	.write = zeroes_write,
+	.readv = zeroes_readv,
+	.writev = zeroes_writev,
+	.write_zeroes = zeroes_write_zeroes,
+	.unmap = zeroes_unmap,
+};
+
+struct spdk_bs_dev *
+spdk_bs_create_zeroes_dev(void)
+{
+	return &g_zeroes_bs_dev;
+}
diff --git a/src/spdk/lib/blobfs/Makefile b/src/spdk/lib/blobfs/Makefile
new file mode 100644
index 00000000..ea36b6ab
--- /dev/null
+++ b/src/spdk/lib/blobfs/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = blobfs.c tree.c
+LIBNAME = blobfs
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/blobfs/blobfs.c b/src/spdk/lib/blobfs/blobfs.c
new file mode 100644
index 00000000..48e9f481
--- /dev/null
+++ b/src/spdk/lib/blobfs/blobfs.c
@@ -0,0 +1,2617 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blobfs.h"
+#include "spdk/conf.h"
+#include "blobfs_internal.h"
+
+#include "spdk/queue.h"
+#include "spdk/thread.h"
+#include "spdk/assert.h"
+#include "spdk/env.h"
+#include "spdk/util.h"
+#include "spdk_internal/log.h"
+
+#define BLOBFS_TRACE(file, str, args...) \
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s " str, file->name, ##args)
+
+#define BLOBFS_TRACE_RW(file, str, args...) \
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS_RW, "file=%s " str, file->name, ##args)
+
+#define BLOBFS_DEFAULT_CACHE_SIZE (4ULL * 1024 * 1024 * 1024)
+#define SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ (1024 * 1024)
+
+static uint64_t g_fs_cache_size = BLOBFS_DEFAULT_CACHE_SIZE;
+static struct spdk_mempool *g_cache_pool;
+static TAILQ_HEAD(, spdk_file) g_caches;
+static int g_fs_count = 0;
+static pthread_mutex_t g_cache_init_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_spinlock_t g_caches_lock;
+
+void
+spdk_cache_buffer_free(struct cache_buffer *cache_buffer)
+{
+	spdk_mempool_put(g_cache_pool, cache_buffer->buf);
+	free(cache_buffer);
+}
+
+#define CACHE_READAHEAD_THRESHOLD	(128 * 1024)
+
+struct spdk_file {
+	struct spdk_filesystem	*fs;
+	struct spdk_blob	*blob;
+	char			*name;
+	uint64_t		length;
+	bool                    is_deleted;
+	bool			open_for_writing;
+	uint64_t		length_flushed;
+	uint64_t		append_pos;
+	uint64_t		seq_byte_count;
+	uint64_t		next_seq_offset;
+	uint32_t		priority;
+	TAILQ_ENTRY(spdk_file)	tailq;
+	spdk_blob_id		blobid;
+	uint32_t		ref_count;
+	pthread_spinlock_t	lock;
+	struct cache_buffer	*last;
+	struct cache_tree	*tree;
+	TAILQ_HEAD(open_requests_head, spdk_fs_request) open_requests;
+	TAILQ_HEAD(sync_requests_head, spdk_fs_request) sync_requests;
+	TAILQ_ENTRY(spdk_file)	cache_tailq;
+};
+
+struct spdk_deleted_file {
+	spdk_blob_id	id;
+	TAILQ_ENTRY(spdk_deleted_file)	tailq;
+};
+
+struct spdk_filesystem {
+	struct spdk_blob_store	*bs;
+	TAILQ_HEAD(, spdk_file)	files;
+	struct spdk_bs_opts	bs_opts;
+	struct spdk_bs_dev	*bdev;
+	fs_send_request_fn	send_request;
+
+	struct {
+		uint32_t		max_ops;
+		struct spdk_io_channel	*sync_io_channel;
+		struct spdk_fs_channel	*sync_fs_channel;
+	} sync_target;
+
+	struct {
+		uint32_t		max_ops;
+		struct spdk_io_channel	*md_io_channel;
+		struct spdk_fs_channel	*md_fs_channel;
+	} md_target;
+
+	struct {
+		uint32_t		max_ops;
+	} io_target;
+};
+
+struct spdk_fs_cb_args {
+	union {
+		spdk_fs_op_with_handle_complete		fs_op_with_handle;
+		spdk_fs_op_complete			fs_op;
+		spdk_file_op_with_handle_complete	file_op_with_handle;
+		spdk_file_op_complete			file_op;
+		spdk_file_stat_op_complete		stat_op;
+	} fn;
+	void *arg;
+	sem_t *sem;
+	struct spdk_filesystem *fs;
+	struct spdk_file *file;
+	int rc;
+	bool from_request;
+	union {
+		struct {
+			TAILQ_HEAD(, spdk_deleted_file)	deleted_files;
+		} fs_load;
+		struct {
+			uint64_t	length;
+		} truncate;
+		struct {
+			struct spdk_io_channel	*channel;
+			void		*user_buf;
+			void		*pin_buf;
+			int		is_read;
+			off_t		offset;
+			size_t		length;
+			uint64_t	start_lba;
+			uint64_t	num_lba;
+			uint32_t	blocklen;
+		} rw;
+		struct {
+			const char	*old_name;
+			const char	*new_name;
+		} rename;
+		struct {
+			struct cache_buffer	*cache_buffer;
+			uint64_t		length;
+		} flush;
+		struct {
+			struct cache_buffer	*cache_buffer;
+			uint64_t		length;
+			uint64_t		offset;
+		} readahead;
+		struct {
+			uint64_t			offset;
+			TAILQ_ENTRY(spdk_fs_request)	tailq;
+			bool				xattr_in_progress;
+		} sync;
+		struct {
+			uint32_t			num_clusters;
+		} resize;
+		struct {
+			const char	*name;
+			uint32_t	flags;
+			TAILQ_ENTRY(spdk_fs_request)	tailq;
+		} open;
+		struct {
+			const char		*name;
+			struct spdk_blob	*blob;
+		} create;
+		struct {
+			const char	*name;
+		} delete;
+		struct {
+			const char	*name;
+		} stat;
+	} op;
+};
+
+static void cache_free_buffers(struct spdk_file *file);
+
+void
+spdk_fs_opts_init(struct spdk_blobfs_opts *opts)
+{
+	opts->cluster_sz = SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ;
+}
+
+static void
+__initialize_cache(void)
+{
+	assert(g_cache_pool == NULL);
+
+	g_cache_pool = spdk_mempool_create("spdk_fs_cache",
+					   g_fs_cache_size / CACHE_BUFFER_SIZE,
+					   CACHE_BUFFER_SIZE,
+					   SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+					   SPDK_ENV_SOCKET_ID_ANY);
+	if (!g_cache_pool) {
+		SPDK_ERRLOG("Create mempool failed, you may "
+			    "increase the memory and try again\n");
+		assert(false);
+	}
+	TAILQ_INIT(&g_caches);
+	pthread_spin_init(&g_caches_lock, 0);
+}
+
+static void
+__free_cache(void)
+{
+	assert(g_cache_pool != NULL);
+
+	spdk_mempool_free(g_cache_pool);
+	g_cache_pool = NULL;
+}
+
+static uint64_t
+__file_get_blob_size(struct spdk_file *file)
+{
+	uint64_t cluster_sz;
+
+	cluster_sz = file->fs->bs_opts.cluster_sz;
+	return cluster_sz * spdk_blob_get_num_clusters(file->blob);
+}
+
+struct spdk_fs_request {
+	struct spdk_fs_cb_args		args;
+	TAILQ_ENTRY(spdk_fs_request)	link;
+	struct spdk_fs_channel		*channel;
+};
+
+struct spdk_fs_channel {
+	struct spdk_fs_request		*req_mem;
+	TAILQ_HEAD(, spdk_fs_request)	reqs;
+	sem_t				sem;
+	struct spdk_filesystem		*fs;
+	struct spdk_io_channel		*bs_channel;
+	fs_send_request_fn		send_request;
+	bool				sync;
+	pthread_spinlock_t		lock;
+};
+
+static struct spdk_fs_request *
+alloc_fs_request(struct spdk_fs_channel *channel)
+{
+	struct spdk_fs_request *req;
+
+	if (channel->sync) {
+		pthread_spin_lock(&channel->lock);
+	}
+
+	req = TAILQ_FIRST(&channel->reqs);
+	if (req) {
+		TAILQ_REMOVE(&channel->reqs, req, link);
+	}
+
+	if (channel->sync) {
+		pthread_spin_unlock(&channel->lock);
+	}
+
+	if (req == NULL) {
+		return NULL;
+	}
+	memset(req, 0, sizeof(*req));
+	req->channel = channel;
+	req->args.from_request = true;
+
+	return req;
+}
+
+static void
+free_fs_request(struct spdk_fs_request *req)
+{
+	struct spdk_fs_channel *channel = req->channel;
+
+	if (channel->sync) {
+		pthread_spin_lock(&channel->lock);
+	}
+
+	TAILQ_INSERT_HEAD(&req->channel->reqs, req, link);
+
+	if (channel->sync) {
+		pthread_spin_unlock(&channel->lock);
+	}
+}
+
+static int
+_spdk_fs_channel_create(struct spdk_filesystem *fs, struct spdk_fs_channel *channel,
+			uint32_t max_ops)
+{
+	uint32_t i;
+
+	channel->req_mem = calloc(max_ops, sizeof(struct spdk_fs_request));
+	if (!channel->req_mem) {
+		return -1;
+	}
+
+	TAILQ_INIT(&channel->reqs);
+	sem_init(&channel->sem, 0, 0);
+
+	for (i = 0; i < max_ops; i++) {
+		TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
+	}
+
+	channel->fs = fs;
+
+	return 0;
+}
+
+static int
+_spdk_fs_md_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_filesystem		*fs;
+	struct spdk_fs_channel		*channel = ctx_buf;
+
+	fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, md_target);
+
+	return _spdk_fs_channel_create(fs, channel, fs->md_target.max_ops);
+}
+
+static int
+_spdk_fs_sync_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_filesystem		*fs;
+	struct spdk_fs_channel		*channel = ctx_buf;
+
+	fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, sync_target);
+
+	return _spdk_fs_channel_create(fs, channel, fs->sync_target.max_ops);
+}
+
+static int
+_spdk_fs_io_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_filesystem		*fs;
+	struct spdk_fs_channel		*channel = ctx_buf;
+
+	fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, io_target);
+
+	return _spdk_fs_channel_create(fs, channel, fs->io_target.max_ops);
+}
+
+static void
+_spdk_fs_channel_destroy(void *io_device, void *ctx_buf)
+{
+	struct spdk_fs_channel *channel = ctx_buf;
+
+	free(channel->req_mem);
+	if (channel->bs_channel != NULL) {
+		spdk_bs_free_io_channel(channel->bs_channel);
+	}
+}
+
+static void
+__send_request_direct(fs_request_fn fn, void *arg)
+{
+	fn(arg);
+}
+
+static void
+common_fs_bs_init(struct spdk_filesystem *fs, struct spdk_blob_store *bs)
+{
+	fs->bs = bs;
+	fs->bs_opts.cluster_sz = spdk_bs_get_cluster_size(bs);
+	fs->md_target.md_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
+	fs->md_target.md_fs_channel->send_request = __send_request_direct;
+	fs->sync_target.sync_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
+	fs->sync_target.sync_fs_channel->send_request = __send_request_direct;
+
+	pthread_mutex_lock(&g_cache_init_lock);
+	if (g_fs_count == 0) {
+		__initialize_cache();
+	}
+	g_fs_count++;
+	pthread_mutex_unlock(&g_cache_init_lock);
+}
+
+static void
+init_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+
+	if (bserrno == 0) {
+		common_fs_bs_init(fs, bs);
+	} else {
+		free(fs);
+		fs = NULL;
+	}
+
+	args->fn.fs_op_with_handle(args->arg, fs, bserrno);
+	free_fs_request(req);
+}
+
+static void
+fs_conf_parse(void)
+{
+	struct spdk_conf_section *sp;
+
+	sp = spdk_conf_find_section(NULL, "Blobfs");
+	if (sp == NULL) {
+		g_fs_cache_buffer_shift = CACHE_BUFFER_SHIFT_DEFAULT;
+		return;
+	}
+
+	g_fs_cache_buffer_shift = spdk_conf_section_get_intval(sp, "CacheBufferShift");
+	if (g_fs_cache_buffer_shift <= 0) {
+		g_fs_cache_buffer_shift = CACHE_BUFFER_SHIFT_DEFAULT;
+	}
+}
+
+static struct spdk_filesystem *
+fs_alloc(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn)
+{
+	struct spdk_filesystem *fs;
+
+	fs = calloc(1, sizeof(*fs));
+	if (fs == NULL) {
+		return NULL;
+	}
+
+	fs->bdev = dev;
+	fs->send_request = send_request_fn;
+	TAILQ_INIT(&fs->files);
+
+	fs->md_target.max_ops = 512;
+	spdk_io_device_register(&fs->md_target, _spdk_fs_md_channel_create, _spdk_fs_channel_destroy,
+				sizeof(struct spdk_fs_channel), "blobfs_md");
+	fs->md_target.md_io_channel = spdk_get_io_channel(&fs->md_target);
+	fs->md_target.md_fs_channel = spdk_io_channel_get_ctx(fs->md_target.md_io_channel);
+
+	fs->sync_target.max_ops = 512;
+	spdk_io_device_register(&fs->sync_target, _spdk_fs_sync_channel_create, _spdk_fs_channel_destroy,
+				sizeof(struct spdk_fs_channel), "blobfs_sync");
+	fs->sync_target.sync_io_channel = spdk_get_io_channel(&fs->sync_target);
+	fs->sync_target.sync_fs_channel = spdk_io_channel_get_ctx(fs->sync_target.sync_io_channel);
+
+	fs->io_target.max_ops = 512;
+	spdk_io_device_register(&fs->io_target, _spdk_fs_io_channel_create, _spdk_fs_channel_destroy,
+				sizeof(struct spdk_fs_channel), "blobfs_io");
+
+	return fs;
+}
+
+static void
+__wake_caller(void *arg, int fserrno)
+{
+	struct spdk_fs_cb_args *args = arg;
+
+	args->rc = fserrno;
+	sem_post(args->sem);
+}
+
+void
+spdk_fs_init(struct spdk_bs_dev *dev, struct spdk_blobfs_opts *opt,
+	     fs_send_request_fn send_request_fn,
+	     spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_filesystem *fs;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	struct spdk_bs_opts opts = {};
+
+	fs = fs_alloc(dev, send_request_fn);
+	if (fs == NULL) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	fs_conf_parse();
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		spdk_put_io_channel(fs->md_target.md_io_channel);
+		spdk_io_device_unregister(&fs->md_target, NULL);
+		spdk_put_io_channel(fs->sync_target.sync_io_channel);
+		spdk_io_device_unregister(&fs->sync_target, NULL);
+		spdk_io_device_unregister(&fs->io_target, NULL);
+		free(fs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.fs_op_with_handle = cb_fn;
+	args->arg = cb_arg;
+	args->fs = fs;
+
+	spdk_bs_opts_init(&opts);
+	snprintf(opts.bstype.bstype, sizeof(opts.bstype.bstype), "BLOBFS");
+	if (opt) {
+		opts.cluster_sz = opt->cluster_sz;
+	}
+	spdk_bs_init(dev, &opts, init_cb, req);
+}
+
+static struct spdk_file *
+file_alloc(struct spdk_filesystem *fs)
+{
+	struct spdk_file *file;
+
+	file = calloc(1, sizeof(*file));
+	if (file == NULL) {
+		return NULL;
+	}
+
+	file->tree = calloc(1, sizeof(*file->tree));
+	if (file->tree == NULL) {
+		free(file);
+		return NULL;
+	}
+
+	file->fs = fs;
+	TAILQ_INIT(&file->open_requests);
+	TAILQ_INIT(&file->sync_requests);
+	pthread_spin_init(&file->lock, 0);
+	TAILQ_INSERT_TAIL(&fs->files, file, tailq);
+	file->priority = SPDK_FILE_PRIORITY_LOW;
+	return file;
+}
+
+static void fs_load_done(void *ctx, int bserrno);
+
+static int
+_handle_deleted_files(struct spdk_fs_request *req)
+{
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+
+	if (!TAILQ_EMPTY(&args->op.fs_load.deleted_files)) {
+		struct spdk_deleted_file *deleted_file;
+
+		deleted_file = TAILQ_FIRST(&args->op.fs_load.deleted_files);
+		TAILQ_REMOVE(&args->op.fs_load.deleted_files, deleted_file, tailq);
+		spdk_bs_delete_blob(fs->bs, deleted_file->id, fs_load_done, req);
+		free(deleted_file);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void
+fs_load_done(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+
+	/* The filesystem has been loaded.  Now check if there are any files that
+	 *  were marked for deletion before last unload.  Do not complete the
+	 *  fs_load callback until all of them have been deleted on disk.
+	 */
+	if (_handle_deleted_files(req) == 0) {
+		/* We found a file that's been marked for deleting but not actually
+		 *  deleted yet.  This function will get called again once the delete
+		 *  operation is completed.
+		 */
+		return;
+	}
+
+	args->fn.fs_op_with_handle(args->arg, fs, 0);
+	free_fs_request(req);
+
+}
+
+static void
+iter_cb(void *ctx, struct spdk_blob *blob, int rc)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+	uint64_t *length;
+	const char *name;
+	uint32_t *is_deleted;
+	size_t value_len;
+
+	if (rc < 0) {
+		args->fn.fs_op_with_handle(args->arg, fs, rc);
+		free_fs_request(req);
+		return;
+	}
+
+	rc = spdk_blob_get_xattr_value(blob, "name", (const void **)&name, &value_len);
+	if (rc < 0) {
+		args->fn.fs_op_with_handle(args->arg, fs, rc);
+		free_fs_request(req);
+		return;
+	}
+
+	rc = spdk_blob_get_xattr_value(blob, "length", (const void **)&length, &value_len);
+	if (rc < 0) {
+		args->fn.fs_op_with_handle(args->arg, fs, rc);
+		free_fs_request(req);
+		return;
+	}
+
+	assert(value_len == 8);
+
+	/* This file could be deleted last time without close it, then app crashed, so we delete it now */
+	rc = spdk_blob_get_xattr_value(blob, "is_deleted", (const void **)&is_deleted, &value_len);
+	if (rc < 0) {
+		struct spdk_file *f;
+
+		f = file_alloc(fs);
+		if (f == NULL) {
+			args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
+			free_fs_request(req);
+			return;
+		}
+
+		f->name = strdup(name);
+		f->blobid = spdk_blob_get_id(blob);
+		f->length = *length;
+		f->length_flushed = *length;
+		f->append_pos = *length;
+		SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "added file %s length=%ju\n", f->name, f->length);
+	} else {
+		struct spdk_deleted_file *deleted_file;
+
+		deleted_file = calloc(1, sizeof(*deleted_file));
+		if (deleted_file == NULL) {
+			args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
+			free_fs_request(req);
+			return;
+		}
+		deleted_file->id = spdk_blob_get_id(blob);
+		TAILQ_INSERT_TAIL(&args->op.fs_load.deleted_files, deleted_file, tailq);
+	}
+}
+
+static void
+load_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+	struct spdk_bs_type bstype;
+	static const struct spdk_bs_type blobfs_type = {"BLOBFS"};
+	static const struct spdk_bs_type zeros;
+
+	if (bserrno != 0) {
+		args->fn.fs_op_with_handle(args->arg, NULL, bserrno);
+		free_fs_request(req);
+		free(fs);
+		return;
+	}
+
+	bstype = spdk_bs_get_bstype(bs);
+
+	if (!memcmp(&bstype, &zeros, sizeof(bstype))) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "assigning bstype\n");
+		spdk_bs_set_bstype(bs, blobfs_type);
+	} else if (memcmp(&bstype, &blobfs_type, sizeof(bstype))) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOB, "not blobfs\n");
+		SPDK_TRACEDUMP(SPDK_LOG_BLOB, "bstype", &bstype, sizeof(bstype));
+		args->fn.fs_op_with_handle(args->arg, NULL, bserrno);
+		free_fs_request(req);
+		free(fs);
+		return;
+	}
+
+	common_fs_bs_init(fs, bs);
+	fs_load_done(req, 0);
+}
+
+static void
+spdk_fs_io_device_unregister(struct spdk_filesystem *fs)
+{
+	assert(fs != NULL);
+	spdk_io_device_unregister(&fs->md_target, NULL);
+	spdk_io_device_unregister(&fs->sync_target, NULL);
+	spdk_io_device_unregister(&fs->io_target, NULL);
+	free(fs);
+}
+
+static void
+spdk_fs_free_io_channels(struct spdk_filesystem *fs)
+{
+	assert(fs != NULL);
+	spdk_fs_free_io_channel(fs->md_target.md_io_channel);
+	spdk_fs_free_io_channel(fs->sync_target.sync_io_channel);
+}
+
+void
+spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn,
+	     spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_filesystem *fs;
+	struct spdk_fs_cb_args *args;
+	struct spdk_fs_request *req;
+	struct spdk_bs_opts	bs_opts;
+
+	fs = fs_alloc(dev, send_request_fn);
+	if (fs == NULL) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	fs_conf_parse();
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		spdk_fs_free_io_channels(fs);
+		spdk_fs_io_device_unregister(fs);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.fs_op_with_handle = cb_fn;
+	args->arg = cb_arg;
+	args->fs = fs;
+	TAILQ_INIT(&args->op.fs_load.deleted_files);
+	spdk_bs_opts_init(&bs_opts);
+	bs_opts.iter_cb_fn = iter_cb;
+	bs_opts.iter_cb_arg = req;
+	spdk_bs_load(dev, &bs_opts, load_cb, req);
+}
+
+static void
+unload_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_filesystem *fs = args->fs;
+	struct spdk_file *file, *tmp;
+
+	TAILQ_FOREACH_SAFE(file, &fs->files, tailq, tmp) {
+		TAILQ_REMOVE(&fs->files, file, tailq);
+		cache_free_buffers(file);
+		free(file->name);
+		free(file->tree);
+		free(file);
+	}
+
+	pthread_mutex_lock(&g_cache_init_lock);
+	g_fs_count--;
+	if (g_fs_count == 0) {
+		__free_cache();
+	}
+	pthread_mutex_unlock(&g_cache_init_lock);
+
+	args->fn.fs_op(args->arg, bserrno);
+	free(req);
+
+	spdk_fs_io_device_unregister(fs);
+}
+
+void
+spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	/*
+	 * We must free the md_channel before unloading the blobstore, so just
+	 *  allocate this request from the general heap.
+	 */
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.fs_op = cb_fn;
+	args->arg = cb_arg;
+	args->fs = fs;
+
+	spdk_fs_free_io_channels(fs);
+	spdk_bs_unload(fs->bs, unload_cb, req);
+}
+
+static struct spdk_file *
+fs_find_file(struct spdk_filesystem *fs, const char *name)
+{
+	struct spdk_file *file;
+
+	TAILQ_FOREACH(file, &fs->files, tailq) {
+		if (!strncmp(name, file->name, SPDK_FILE_NAME_MAX)) {
+			return file;
+		}
+	}
+
+	return NULL;
+}
+
+void
+spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name,
+			spdk_file_stat_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_file_stat stat;
+	struct spdk_file *f = NULL;
+
+	if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
+		cb_fn(cb_arg, NULL, -ENAMETOOLONG);
+		return;
+	}
+
+	f = fs_find_file(fs, name);
+	if (f != NULL) {
+		stat.blobid = f->blobid;
+		stat.size = f->append_pos >= f->length ? f->append_pos : f->length;
+		cb_fn(cb_arg, &stat, 0);
+		return;
+	}
+
+	cb_fn(cb_arg, NULL, -ENOENT);
+}
+
+static void
+__copy_stat(void *arg, struct spdk_file_stat *stat, int fserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->rc = fserrno;
+	if (fserrno == 0) {
+		memcpy(args->arg, stat, sizeof(*stat));
+	}
+	sem_post(args->sem);
+}
+
+static void
+__file_stat(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	spdk_fs_file_stat_async(args->fs, args->op.stat.name,
+				args->fn.stat_op, req);
+}
+
+int
+spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_io_channel *_channel,
+		  const char *name, struct spdk_file_stat *stat)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	int rc;
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	req->args.fs = fs;
+	req->args.op.stat.name = name;
+	req->args.fn.stat_op = __copy_stat;
+	req->args.arg = stat;
+	req->args.sem = &channel->sem;
+	channel->send_request(__file_stat, req);
+	sem_wait(&channel->sem);
+
+	rc = req->args.rc;
+	free_fs_request(req);
+
+	return rc;
+}
+
+static void
+fs_create_blob_close_cb(void *ctx, int bserrno)
+{
+	int rc;
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	rc = args->rc ? args->rc : bserrno;
+	args->fn.file_op(args->arg, rc);
+	free_fs_request(req);
+}
+
+static void
+fs_create_blob_resize_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *f = args->file;
+	struct spdk_blob *blob = args->op.create.blob;
+	uint64_t length = 0;
+
+	args->rc = bserrno;
+	if (bserrno) {
+		spdk_blob_close(blob, fs_create_blob_close_cb, args);
+		return;
+	}
+
+	spdk_blob_set_xattr(blob, "name", f->name, strlen(f->name) + 1);
+	spdk_blob_set_xattr(blob, "length", &length, sizeof(length));
+
+	spdk_blob_close(blob, fs_create_blob_close_cb, args);
+}
+
+static void
+fs_create_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	if (bserrno) {
+		args->fn.file_op(args->arg, bserrno);
+		free_fs_request(req);
+		return;
+	}
+
+	args->op.create.blob = blob;
+	spdk_blob_resize(blob, 1, fs_create_blob_resize_cb, req);
+}
+
+static void
+fs_create_blob_create_cb(void *ctx, spdk_blob_id blobid, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *f = args->file;
+
+	if (bserrno) {
+		args->fn.file_op(args->arg, bserrno);
+		free_fs_request(req);
+		return;
+	}
+
+	f->blobid = blobid;
+	spdk_bs_open_blob(f->fs->bs, blobid, fs_create_blob_open_cb, req);
+}
+
+void
+spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name,
+			  spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_file *file;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
+		cb_fn(cb_arg, -ENAMETOOLONG);
+		return;
+	}
+
+	file = fs_find_file(fs, name);
+	if (file != NULL) {
+		cb_fn(cb_arg, -EEXIST);
+		return;
+	}
+
+	file = file_alloc(fs);
+	if (file == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->file = file;
+	args->fn.file_op = cb_fn;
+	args->arg = cb_arg;
+
+	file->name = strdup(name);
+	spdk_bs_create_blob(fs->bs, fs_create_blob_create_cb, args);
+}
+
+static void
+__fs_create_file_done(void *arg, int fserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->rc = fserrno;
+	sem_post(args->sem);
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.create.name);
+}
+
+static void
+__fs_create_file(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.create.name);
+	spdk_fs_create_file_async(args->fs, args->op.create.name, __fs_create_file_done, req);
+}
+
+int
+spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, const char *name)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	int rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+	args->fs = fs;
+	args->op.create.name = name;
+	args->sem = &channel->sem;
+	fs->send_request(__fs_create_file, req);
+	sem_wait(&channel->sem);
+	rc = args->rc;
+	free_fs_request(req);
+
+	return rc;
+}
+
+static void
+fs_open_blob_done(void *ctx, struct spdk_blob *blob, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *f = args->file;
+
+	f->blob = blob;
+	while (!TAILQ_EMPTY(&f->open_requests)) {
+		req = TAILQ_FIRST(&f->open_requests);
+		args = &req->args;
+		TAILQ_REMOVE(&f->open_requests, req, args.op.open.tailq);
+		args->fn.file_op_with_handle(args->arg, f, bserrno);
+		free_fs_request(req);
+	}
+}
+
+static void
+fs_open_blob_create_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *file = args->file;
+	struct spdk_filesystem *fs = args->fs;
+
+	if (file == NULL) {
+		/*
+		 * This is from an open with CREATE flag - the file
+		 *  is now created so look it up in the file list for this
+		 *  filesystem.
+		 */
+		file = fs_find_file(fs, args->op.open.name);
+		assert(file != NULL);
+		args->file = file;
+	}
+
+	file->ref_count++;
+	TAILQ_INSERT_TAIL(&file->open_requests, req, args.op.open.tailq);
+	if (file->ref_count == 1) {
+		assert(file->blob == NULL);
+		spdk_bs_open_blob(fs->bs, file->blobid, fs_open_blob_done, req);
+	} else if (file->blob != NULL) {
+		fs_open_blob_done(req, file->blob, 0);
+	} else {
+		/*
+		 * The blob open for this file is in progress due to a previous
+		 *  open request.  When that open completes, it will invoke the
+		 *  open callback for this request.
+		 */
+	}
+}
+
+void
+spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags,
+			spdk_file_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_file *f = NULL;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
+		cb_fn(cb_arg, NULL, -ENAMETOOLONG);
+		return;
+	}
+
+	f = fs_find_file(fs, name);
+	if (f == NULL && !(flags & SPDK_BLOBFS_OPEN_CREATE)) {
+		cb_fn(cb_arg, NULL, -ENOENT);
+		return;
+	}
+
+	if (f != NULL && f->is_deleted == true) {
+		cb_fn(cb_arg, NULL, -ENOENT);
+		return;
+	}
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.file_op_with_handle = cb_fn;
+	args->arg = cb_arg;
+	args->file = f;
+	args->fs = fs;
+	args->op.open.name = name;
+
+	if (f == NULL) {
+		spdk_fs_create_file_async(fs, name, fs_open_blob_create_cb, req);
+	} else {
+		fs_open_blob_create_cb(req, 0);
+	}
+}
+
+static void
+__fs_open_file_done(void *arg, struct spdk_file *file, int bserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->file = file;
+	__wake_caller(args, bserrno);
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.open.name);
+}
+
+static void
+__fs_open_file(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", args->op.open.name);
+	spdk_fs_open_file_async(args->fs, args->op.open.name, args->op.open.flags,
+				__fs_open_file_done, req);
+}
+
+int
+spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel,
+		  const char *name, uint32_t flags, struct spdk_file **file)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	int rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+	args->fs = fs;
+	args->op.open.name = name;
+	args->op.open.flags = flags;
+	args->sem = &channel->sem;
+	fs->send_request(__fs_open_file, req);
+	sem_wait(&channel->sem);
+	rc = args->rc;
+	if (rc == 0) {
+		*file = args->file;
+	} else {
+		*file = NULL;
+	}
+	free_fs_request(req);
+
+	return rc;
+}
+
+static void
+fs_rename_blob_close_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->fn.fs_op(args->arg, bserrno);
+	free_fs_request(req);
+}
+
+static void
+fs_rename_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	const char *new_name = args->op.rename.new_name;
+
+	spdk_blob_set_xattr(blob, "name", new_name, strlen(new_name) + 1);
+	spdk_blob_close(blob, fs_rename_blob_close_cb, req);
+}
+
+static void
+__spdk_fs_md_rename_file(struct spdk_fs_request *req)
+{
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *f;
+
+	f = fs_find_file(args->fs, args->op.rename.old_name);
+	if (f == NULL) {
+		args->fn.fs_op(args->arg, -ENOENT);
+		free_fs_request(req);
+		return;
+	}
+
+	free(f->name);
+	f->name = strdup(args->op.rename.new_name);
+	args->file = f;
+	spdk_bs_open_blob(args->fs->bs, f->blobid, fs_rename_blob_open_cb, req);
+}
+
+static void
+fs_rename_delete_done(void *arg, int fserrno)
+{
+	__spdk_fs_md_rename_file(arg);
+}
+
+void
+spdk_fs_rename_file_async(struct spdk_filesystem *fs,
+			  const char *old_name, const char *new_name,
+			  spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_file *f;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "old=%s new=%s\n", old_name, new_name);
+	if (strnlen(new_name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
+		cb_fn(cb_arg, -ENAMETOOLONG);
+		return;
+	}
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.fs_op = cb_fn;
+	args->fs = fs;
+	args->arg = cb_arg;
+	args->op.rename.old_name = old_name;
+	args->op.rename.new_name = new_name;
+
+	f = fs_find_file(fs, new_name);
+	if (f == NULL) {
+		__spdk_fs_md_rename_file(req);
+		return;
+	}
+
+	/*
+	 * The rename overwrites an existing file.  So delete the existing file, then
+	 *  do the actual rename.
+	 */
+	spdk_fs_delete_file_async(fs, new_name, fs_rename_delete_done, req);
+}
+
+static void
+__fs_rename_file_done(void *arg, int fserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	__wake_caller(args, fserrno);
+}
+
+static void
+__fs_rename_file(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	spdk_fs_rename_file_async(args->fs, args->op.rename.old_name, args->op.rename.new_name,
+				  __fs_rename_file_done, req);
+}
+
+int
+spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel,
+		    const char *old_name, const char *new_name)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	int rc;
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+
+	args->fs = fs;
+	args->op.rename.old_name = old_name;
+	args->op.rename.new_name = new_name;
+	args->sem = &channel->sem;
+	fs->send_request(__fs_rename_file, req);
+	sem_wait(&channel->sem);
+	rc = args->rc;
+	free_fs_request(req);
+	return rc;
+}
+
+static void
+blob_delete_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->fn.file_op(args->arg, bserrno);
+	free_fs_request(req);
+}
+
+void
+spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name,
+			  spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_file *f;
+	spdk_blob_id blobid;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s\n", name);
+
+	if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
+		cb_fn(cb_arg, -ENAMETOOLONG);
+		return;
+	}
+
+	f = fs_find_file(fs, name);
+	if (f == NULL) {
+		cb_fn(cb_arg, -ENOENT);
+		return;
+	}
+
+	req = alloc_fs_request(fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.file_op = cb_fn;
+	args->arg = cb_arg;
+
+	if (f->ref_count > 0) {
+		/* If the ref > 0, we mark the file as deleted and delete it when we close it. */
+		f->is_deleted = true;
+		spdk_blob_set_xattr(f->blob, "is_deleted", &f->is_deleted, sizeof(bool));
+		spdk_blob_sync_md(f->blob, blob_delete_cb, args);
+		return;
+	}
+
+	TAILQ_REMOVE(&fs->files, f, tailq);
+
+	cache_free_buffers(f);
+
+	blobid = f->blobid;
+
+	free(f->name);
+	free(f->tree);
+	free(f);
+
+	spdk_bs_delete_blob(fs->bs, blobid, blob_delete_cb, req);
+}
+
+static void
+__fs_delete_file_done(void *arg, int fserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	__wake_caller(args, fserrno);
+}
+
+static void
+__fs_delete_file(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	spdk_fs_delete_file_async(args->fs, args->op.delete.name, __fs_delete_file_done, req);
+}
+
+int
+spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel,
+		    const char *name)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	int rc;
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+	args->fs = fs;
+	args->op.delete.name = name;
+	args->sem = &channel->sem;
+	fs->send_request(__fs_delete_file, req);
+	sem_wait(&channel->sem);
+	rc = args->rc;
+	free_fs_request(req);
+
+	return rc;
+}
+
+spdk_fs_iter
+spdk_fs_iter_first(struct spdk_filesystem *fs)
+{
+	struct spdk_file *f;
+
+	f = TAILQ_FIRST(&fs->files);
+	return f;
+}
+
+spdk_fs_iter
+spdk_fs_iter_next(spdk_fs_iter iter)
+{
+	struct spdk_file *f = iter;
+
+	if (f == NULL) {
+		return NULL;
+	}
+
+	f = TAILQ_NEXT(f, tailq);
+	return f;
+}
+
+const char *
+spdk_file_get_name(struct spdk_file *file)
+{
+	return file->name;
+}
+
+uint64_t
+spdk_file_get_length(struct spdk_file *file)
+{
+	assert(file != NULL);
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s length=0x%jx\n", file->name, file->length);
+	return file->length;
+}
+
+static void
+fs_truncate_complete_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	args->fn.file_op(args->arg, bserrno);
+	free_fs_request(req);
+}
+
+static void
+fs_truncate_resize_cb(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *file = args->file;
+	uint64_t *length = &args->op.truncate.length;
+
+	if (bserrno) {
+		args->fn.file_op(args->arg, bserrno);
+		free_fs_request(req);
+		return;
+	}
+
+	spdk_blob_set_xattr(file->blob, "length", length, sizeof(*length));
+
+	file->length = *length;
+	if (file->append_pos > file->length) {
+		file->append_pos = file->length;
+	}
+
+	spdk_blob_sync_md(file->blob, fs_truncate_complete_cb, args);
+}
+
+static uint64_t
+__bytes_to_clusters(uint64_t length, uint64_t cluster_sz)
+{
+	return (length + cluster_sz - 1) / cluster_sz;
+}
+
+void
+spdk_file_truncate_async(struct spdk_file *file, uint64_t length,
+			 spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_filesystem *fs;
+	size_t num_clusters;
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s old=0x%jx new=0x%jx\n", file->name, file->length, length);
+	if (length == file->length) {
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	req = alloc_fs_request(file->fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->fn.file_op = cb_fn;
+	args->arg = cb_arg;
+	args->file = file;
+	args->op.truncate.length = length;
+	fs = file->fs;
+
+	num_clusters = __bytes_to_clusters(length, fs->bs_opts.cluster_sz);
+
+	spdk_blob_resize(file->blob, num_clusters, fs_truncate_resize_cb, req);
+}
+
+static void
+__truncate(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	spdk_file_truncate_async(args->file, args->op.truncate.length,
+				 args->fn.file_op, args);
+}
+
+int
+spdk_file_truncate(struct spdk_file *file, struct spdk_io_channel *_channel,
+		   uint64_t length)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	int rc;
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+
+	args->file = file;
+	args->op.truncate.length = length;
+	args->fn.file_op = __wake_caller;
+	args->sem = &channel->sem;
+
+	channel->send_request(__truncate, req);
+	sem_wait(&channel->sem);
+	rc = args->rc;
+	free_fs_request(req);
+
+	return rc;
+}
+
+static void
+__rw_done(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	spdk_dma_free(args->op.rw.pin_buf);
+	args->fn.file_op(args->arg, bserrno);
+	free_fs_request(req);
+}
+
+static void
+__read_done(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	assert(req != NULL);
+	if (args->op.rw.is_read) {
+		memcpy(args->op.rw.user_buf,
+		       args->op.rw.pin_buf + (args->op.rw.offset & (args->op.rw.blocklen - 1)),
+		       args->op.rw.length);
+		__rw_done(req, 0);
+	} else {
+		memcpy(args->op.rw.pin_buf + (args->op.rw.offset & (args->op.rw.blocklen - 1)),
+		       args->op.rw.user_buf,
+		       args->op.rw.length);
+		spdk_blob_io_write(args->file->blob, args->op.rw.channel,
+				   args->op.rw.pin_buf,
+				   args->op.rw.start_lba, args->op.rw.num_lba,
+				   __rw_done, req);
+	}
+}
+
+static void
+__do_blob_read(void *ctx, int fserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	if (fserrno) {
+		__rw_done(req, fserrno);
+		return;
+	}
+	spdk_blob_io_read(args->file->blob, args->op.rw.channel,
+			  args->op.rw.pin_buf,
+			  args->op.rw.start_lba, args->op.rw.num_lba,
+			  __read_done, req);
+}
+
+static void
+__get_page_parameters(struct spdk_file *file, uint64_t offset, uint64_t length,
+		      uint64_t *start_lba, uint32_t *lba_size, uint64_t *num_lba)
+{
+	uint64_t end_lba;
+
+	*lba_size = spdk_bs_get_io_unit_size(file->fs->bs);
+	*start_lba = offset / *lba_size;
+	end_lba = (offset + length - 1) / *lba_size;
+	*num_lba = (end_lba - *start_lba + 1);
+}
+
+static void
+__readwrite(struct spdk_file *file, struct spdk_io_channel *_channel,
+	    void *payload, uint64_t offset, uint64_t length,
+	    spdk_file_op_complete cb_fn, void *cb_arg, int is_read)
+{
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	uint64_t start_lba, num_lba, pin_buf_length;
+	uint32_t lba_size;
+
+	if (is_read && offset + length > file->length) {
+		cb_fn(cb_arg, -EINVAL);
+		return;
+	}
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
+
+	args = &req->args;
+	args->fn.file_op = cb_fn;
+	args->arg = cb_arg;
+	args->file = file;
+	args->op.rw.channel = channel->bs_channel;
+	args->op.rw.user_buf = payload;
+	args->op.rw.is_read = is_read;
+	args->op.rw.offset = offset;
+	args->op.rw.length = length;
+	args->op.rw.blocklen = lba_size;
+
+	pin_buf_length = num_lba * lba_size;
+	args->op.rw.pin_buf = spdk_dma_malloc(pin_buf_length, lba_size, NULL);
+	if (args->op.rw.pin_buf == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "Failed to allocate buf for: file=%s offset=%jx length=%jx\n",
+			      file->name, offset, length);
+		free_fs_request(req);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args->op.rw.start_lba = start_lba;
+	args->op.rw.num_lba = num_lba;
+
+	if (!is_read && file->length < offset + length) {
+		spdk_file_truncate_async(file, offset + length, __do_blob_read, req);
+	} else {
+		__do_blob_read(req, 0);
+	}
+}
+
+void
+spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel,
+		      void *payload, uint64_t offset, uint64_t length,
+		      spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	__readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 0);
+}
+
+void
+spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel,
+		     void *payload, uint64_t offset, uint64_t length,
+		     spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "file=%s offset=%jx length=%jx\n",
+		      file->name, offset, length);
+	__readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 1);
+}
+
+struct spdk_io_channel *
+spdk_fs_alloc_io_channel(struct spdk_filesystem *fs)
+{
+	struct spdk_io_channel *io_channel;
+	struct spdk_fs_channel *fs_channel;
+
+	io_channel = spdk_get_io_channel(&fs->io_target);
+	fs_channel = spdk_io_channel_get_ctx(io_channel);
+	fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
+	fs_channel->send_request = __send_request_direct;
+
+	return io_channel;
+}
+
+struct spdk_io_channel *
+spdk_fs_alloc_io_channel_sync(struct spdk_filesystem *fs)
+{
+	struct spdk_io_channel *io_channel;
+	struct spdk_fs_channel *fs_channel;
+
+	io_channel = spdk_get_io_channel(&fs->io_target);
+	fs_channel = spdk_io_channel_get_ctx(io_channel);
+	fs_channel->send_request = fs->send_request;
+	fs_channel->sync = 1;
+	pthread_spin_init(&fs_channel->lock, 0);
+
+	return io_channel;
+}
+
+void
+spdk_fs_free_io_channel(struct spdk_io_channel *channel)
+{
+	spdk_put_io_channel(channel);
+}
+
+void
+spdk_fs_set_cache_size(uint64_t size_in_mb)
+{
+	g_fs_cache_size = size_in_mb * 1024 * 1024;
+}
+
+uint64_t
+spdk_fs_get_cache_size(void)
+{
+	return g_fs_cache_size / (1024 * 1024);
+}
+
+static void __file_flush(void *_args);
+
+static void *
+alloc_cache_memory_buffer(struct spdk_file *context)
+{
+	struct spdk_file *file;
+	void *buf;
+
+	buf = spdk_mempool_get(g_cache_pool);
+	if (buf != NULL) {
+		return buf;
+	}
+
+	pthread_spin_lock(&g_caches_lock);
+	TAILQ_FOREACH(file, &g_caches, cache_tailq) {
+		if (!file->open_for_writing &&
+		    file->priority == SPDK_FILE_PRIORITY_LOW &&
+		    file != context) {
+			break;
+		}
+	}
+	pthread_spin_unlock(&g_caches_lock);
+	if (file != NULL) {
+		cache_free_buffers(file);
+		buf = spdk_mempool_get(g_cache_pool);
+		if (buf != NULL) {
+			return buf;
+		}
+	}
+
+	pthread_spin_lock(&g_caches_lock);
+	TAILQ_FOREACH(file, &g_caches, cache_tailq) {
+		if (!file->open_for_writing && file != context) {
+			break;
+		}
+	}
+	pthread_spin_unlock(&g_caches_lock);
+	if (file != NULL) {
+		cache_free_buffers(file);
+		buf = spdk_mempool_get(g_cache_pool);
+		if (buf != NULL) {
+			return buf;
+		}
+	}
+
+	pthread_spin_lock(&g_caches_lock);
+	TAILQ_FOREACH(file, &g_caches, cache_tailq) {
+		if (file != context) {
+			break;
+		}
+	}
+	pthread_spin_unlock(&g_caches_lock);
+	if (file != NULL) {
+		cache_free_buffers(file);
+		buf = spdk_mempool_get(g_cache_pool);
+		if (buf != NULL) {
+			return buf;
+		}
+	}
+
+	return NULL;
+}
+
+static struct cache_buffer *
+cache_insert_buffer(struct spdk_file *file, uint64_t offset)
+{
+	struct cache_buffer *buf;
+	int count = 0;
+
+	buf = calloc(1, sizeof(*buf));
+	if (buf == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "calloc failed\n");
+		return NULL;
+	}
+
+	buf->buf = alloc_cache_memory_buffer(file);
+	while (buf->buf == NULL) {
+		/*
+		 * TODO: alloc_cache_memory_buffer() should eventually free
+		 *  some buffers.  Need a more sophisticated check here, instead
+		 *  of just bailing if 100 tries does not result in getting a
+		 *  free buffer.  This will involve using the sync channel's
+		 *  semaphore to block until a buffer becomes available.
+		 */
+		if (count++ == 100) {
+			SPDK_ERRLOG("could not allocate cache buffer\n");
+			assert(false);
+			free(buf);
+			return NULL;
+		}
+		buf->buf = alloc_cache_memory_buffer(file);
+	}
+
+	buf->buf_size = CACHE_BUFFER_SIZE;
+	buf->offset = offset;
+
+	pthread_spin_lock(&g_caches_lock);
+	if (file->tree->present_mask == 0) {
+		TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
+	}
+	file->tree = spdk_tree_insert_buffer(file->tree, buf);
+	pthread_spin_unlock(&g_caches_lock);
+
+	return buf;
+}
+
+static struct cache_buffer *
+cache_append_buffer(struct spdk_file *file)
+{
+	struct cache_buffer *last;
+
+	assert(file->last == NULL || file->last->bytes_filled == file->last->buf_size);
+	assert((file->append_pos % CACHE_BUFFER_SIZE) == 0);
+
+	last = cache_insert_buffer(file, file->append_pos);
+	if (last == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_BLOBFS, "cache_insert_buffer failed\n");
+		return NULL;
+	}
+
+	file->last = last;
+
+	return last;
+}
+
+static void __check_sync_reqs(struct spdk_file *file);
+
+static void
+__file_cache_finish_sync(void *ctx, int bserrno)
+{
+	struct spdk_file *file = ctx;
+	struct spdk_fs_request *sync_req;
+	struct spdk_fs_cb_args *sync_args;
+
+	pthread_spin_lock(&file->lock);
+	sync_req = TAILQ_FIRST(&file->sync_requests);
+	sync_args = &sync_req->args;
+	assert(sync_args->op.sync.offset <= file->length_flushed);
+	BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset);
+	TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq);
+	pthread_spin_unlock(&file->lock);
+
+	sync_args->fn.file_op(sync_args->arg, bserrno);
+	__check_sync_reqs(file);
+
+	pthread_spin_lock(&file->lock);
+	free_fs_request(sync_req);
+	pthread_spin_unlock(&file->lock);
+}
+
+static void
+__free_args(struct spdk_fs_cb_args *args)
+{
+	struct spdk_fs_request *req;
+
+	if (!args->from_request) {
+		free(args);
+	} else {
+		/* Depends on args being at the start of the spdk_fs_request structure. */
+		req = (struct spdk_fs_request *)args;
+		free_fs_request(req);
+	}
+}
+
+static void
+__check_sync_reqs(struct spdk_file *file)
+{
+	struct spdk_fs_request *sync_req;
+
+	pthread_spin_lock(&file->lock);
+
+	TAILQ_FOREACH(sync_req, &file->sync_requests, args.op.sync.tailq) {
+		if (sync_req->args.op.sync.offset <= file->length_flushed) {
+			break;
+		}
+	}
+
+	if (sync_req != NULL && !sync_req->args.op.sync.xattr_in_progress) {
+		BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed);
+		sync_req->args.op.sync.xattr_in_progress = true;
+		spdk_blob_set_xattr(file->blob, "length", &file->length_flushed,
+				    sizeof(file->length_flushed));
+
+		pthread_spin_unlock(&file->lock);
+		spdk_blob_sync_md(file->blob, __file_cache_finish_sync, file);
+	} else {
+		pthread_spin_unlock(&file->lock);
+	}
+}
+
+static void
+__file_flush_done(void *arg, int bserrno)
+{
+	struct spdk_fs_cb_args *args = arg;
+	struct spdk_file *file = args->file;
+	struct cache_buffer *next = args->op.flush.cache_buffer;
+
+	BLOBFS_TRACE(file, "length=%jx\n", args->op.flush.length);
+
+	pthread_spin_lock(&file->lock);
+	next->in_progress = false;
+	next->bytes_flushed += args->op.flush.length;
+	file->length_flushed += args->op.flush.length;
+	if (file->length_flushed > file->length) {
+		file->length = file->length_flushed;
+	}
+	if (next->bytes_flushed == next->buf_size) {
+		BLOBFS_TRACE(file, "write buffer fully flushed 0x%jx\n", file->length_flushed);
+		next = spdk_tree_find_buffer(file->tree, file->length_flushed);
+	}
+
+	/*
+	 * Assert that there is no cached data that extends past the end of the underlying
+	 *  blob.
+	 */
+	assert(next == NULL || next->offset < __file_get_blob_size(file) ||
+	       next->bytes_filled == 0);
+
+	pthread_spin_unlock(&file->lock);
+
+	__check_sync_reqs(file);
+
+	__file_flush(args);
+}
+
+static void
+__file_flush(void *_args)
+{
+	struct spdk_fs_cb_args *args = _args;
+	struct spdk_file *file = args->file;
+	struct cache_buffer *next;
+	uint64_t offset, length, start_lba, num_lba;
+	uint32_t lba_size;
+
+	pthread_spin_lock(&file->lock);
+	next = spdk_tree_find_buffer(file->tree, file->length_flushed);
+	if (next == NULL || next->in_progress) {
+		/*
+		 * There is either no data to flush, or a flush I/O is already in
+		 *  progress.  So return immediately - if a flush I/O is in
+		 *  progress we will flush more data after that is completed.
+		 */
+		__free_args(args);
+		if (next == NULL) {
+			/*
+			 * For cases where a file's cache was evicted, and then the
+			 *  file was later appended, we will write the data directly
+			 *  to disk and bypass cache.  So just update length_flushed
+			 *  here to reflect that all data was already written to disk.
+			 */
+			file->length_flushed = file->append_pos;
+		}
+		pthread_spin_unlock(&file->lock);
+		if (next == NULL) {
+			/*
+			 * There is no data to flush, but we still need to check for any
+			 *  outstanding sync requests to make sure metadata gets updated.
+			 */
+			__check_sync_reqs(file);
+		}
+		return;
+	}
+
+	offset = next->offset + next->bytes_flushed;
+	length = next->bytes_filled - next->bytes_flushed;
+	if (length == 0) {
+		__free_args(args);
+		pthread_spin_unlock(&file->lock);
+		return;
+	}
+	args->op.flush.length = length;
+	args->op.flush.cache_buffer = next;
+
+	__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
+
+	next->in_progress = true;
+	BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n",
+		     offset, length, start_lba, num_lba);
+	pthread_spin_unlock(&file->lock);
+	spdk_blob_io_write(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
+			   next->buf + (start_lba * lba_size) - next->offset,
+			   start_lba, num_lba, __file_flush_done, args);
+}
+
+static void
+__file_extend_done(void *arg, int bserrno)
+{
+	struct spdk_fs_cb_args *args = arg;
+
+	__wake_caller(args, bserrno);
+}
+
+static void
+__file_extend_resize_cb(void *_args, int bserrno)
+{
+	struct spdk_fs_cb_args *args = _args;
+	struct spdk_file *file = args->file;
+
+	if (bserrno) {
+		__wake_caller(args, bserrno);
+		return;
+	}
+
+	spdk_blob_sync_md(file->blob, __file_extend_done, args);
+}
+
+static void
+__file_extend_blob(void *_args)
+{
+	struct spdk_fs_cb_args *args = _args;
+	struct spdk_file *file = args->file;
+
+	spdk_blob_resize(file->blob, args->op.resize.num_clusters, __file_extend_resize_cb, args);
+}
+
+static void
+__rw_from_file_done(void *arg, int bserrno)
+{
+	struct spdk_fs_cb_args *args = arg;
+
+	__wake_caller(args, bserrno);
+	__free_args(args);
+}
+
+static void
+__rw_from_file(void *_args)
+{
+	struct spdk_fs_cb_args *args = _args;
+	struct spdk_file *file = args->file;
+
+	if (args->op.rw.is_read) {
+		spdk_file_read_async(file, file->fs->sync_target.sync_io_channel, args->op.rw.user_buf,
+				     args->op.rw.offset, args->op.rw.length,
+				     __rw_from_file_done, args);
+	} else {
+		spdk_file_write_async(file, file->fs->sync_target.sync_io_channel, args->op.rw.user_buf,
+				      args->op.rw.offset, args->op.rw.length,
+				      __rw_from_file_done, args);
+	}
+}
+
+static int
+__send_rw_from_file(struct spdk_file *file, sem_t *sem, void *payload,
+		    uint64_t offset, uint64_t length, bool is_read)
+{
+	struct spdk_fs_cb_args *args;
+
+	args = calloc(1, sizeof(*args));
+	if (args == NULL) {
+		sem_post(sem);
+		return -ENOMEM;
+	}
+
+	args->file = file;
+	args->sem = sem;
+	args->op.rw.user_buf = payload;
+	args->op.rw.offset = offset;
+	args->op.rw.length = length;
+	args->op.rw.is_read = is_read;
+	file->fs->send_request(__rw_from_file, args);
+	return 0;
+}
+
+int
+spdk_file_write(struct spdk_file *file, struct spdk_io_channel *_channel,
+		void *payload, uint64_t offset, uint64_t length)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_cb_args *args;
+	uint64_t rem_length, copy, blob_size, cluster_sz;
+	uint32_t cache_buffers_filled = 0;
+	uint8_t *cur_payload;
+	struct cache_buffer *last;
+
+	BLOBFS_TRACE_RW(file, "offset=%jx length=%jx\n", offset, length);
+
+	if (length == 0) {
+		return 0;
+	}
+
+	if (offset != file->append_pos) {
+		BLOBFS_TRACE(file, " error offset=%jx append_pos=%jx\n", offset, file->append_pos);
+		return -EINVAL;
+	}
+
+	pthread_spin_lock(&file->lock);
+	file->open_for_writing = true;
+
+	if (file->last == NULL) {
+		if (file->append_pos % CACHE_BUFFER_SIZE == 0) {
+			cache_append_buffer(file);
+		} else {
+			int rc;
+
+			file->append_pos += length;
+			pthread_spin_unlock(&file->lock);
+			rc = __send_rw_from_file(file, &channel->sem, payload,
+						 offset, length, false);
+			sem_wait(&channel->sem);
+			return rc;
+		}
+	}
+
+	blob_size = __file_get_blob_size(file);
+
+	if ((offset + length) > blob_size) {
+		struct spdk_fs_cb_args extend_args = {};
+
+		cluster_sz = file->fs->bs_opts.cluster_sz;
+		extend_args.sem = &channel->sem;
+		extend_args.op.resize.num_clusters = __bytes_to_clusters((offset + length), cluster_sz);
+		extend_args.file = file;
+		BLOBFS_TRACE(file, "start resize to %u clusters\n", extend_args.op.resize.num_clusters);
+		pthread_spin_unlock(&file->lock);
+		file->fs->send_request(__file_extend_blob, &extend_args);
+		sem_wait(&channel->sem);
+		if (extend_args.rc) {
+			return extend_args.rc;
+		}
+	}
+
+	last = file->last;
+	rem_length = length;
+	cur_payload = payload;
+	while (rem_length > 0) {
+		copy = last->buf_size - last->bytes_filled;
+		if (copy > rem_length) {
+			copy = rem_length;
+		}
+		BLOBFS_TRACE_RW(file, "  fill offset=%jx length=%jx\n", file->append_pos, copy);
+		memcpy(&last->buf[last->bytes_filled], cur_payload, copy);
+		file->append_pos += copy;
+		if (file->length < file->append_pos) {
+			file->length = file->append_pos;
+		}
+		cur_payload += copy;
+		last->bytes_filled += copy;
+		rem_length -= copy;
+		if (last->bytes_filled == last->buf_size) {
+			cache_buffers_filled++;
+			last = cache_append_buffer(file);
+			if (last == NULL) {
+				BLOBFS_TRACE(file, "nomem\n");
+				pthread_spin_unlock(&file->lock);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	pthread_spin_unlock(&file->lock);
+
+	if (cache_buffers_filled == 0) {
+		return 0;
+	}
+
+	args = calloc(1, sizeof(*args));
+	if (args == NULL) {
+		return -ENOMEM;
+	}
+
+	args->file = file;
+	file->fs->send_request(__file_flush, args);
+	return 0;
+}
+
+static void
+__readahead_done(void *arg, int bserrno)
+{
+	struct spdk_fs_cb_args *args = arg;
+	struct cache_buffer *cache_buffer = args->op.readahead.cache_buffer;
+	struct spdk_file *file = args->file;
+
+	BLOBFS_TRACE(file, "offset=%jx\n", cache_buffer->offset);
+
+	pthread_spin_lock(&file->lock);
+	cache_buffer->bytes_filled = args->op.readahead.length;
+	cache_buffer->bytes_flushed = args->op.readahead.length;
+	cache_buffer->in_progress = false;
+	pthread_spin_unlock(&file->lock);
+
+	__free_args(args);
+}
+
+static void
+__readahead(void *_args)
+{
+	struct spdk_fs_cb_args *args = _args;
+	struct spdk_file *file = args->file;
+	uint64_t offset, length, start_lba, num_lba;
+	uint32_t lba_size;
+
+	offset = args->op.readahead.offset;
+	length = args->op.readahead.length;
+	assert(length > 0);
+
+	__get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
+
+	BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n",
+		     offset, length, start_lba, num_lba);
+	spdk_blob_io_read(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
+			  args->op.readahead.cache_buffer->buf,
+			  start_lba, num_lba, __readahead_done, args);
+}
+
+static uint64_t
+__next_cache_buffer_offset(uint64_t offset)
+{
+	return (offset + CACHE_BUFFER_SIZE) & ~(CACHE_TREE_LEVEL_MASK(0));
+}
+
+static void
+check_readahead(struct spdk_file *file, uint64_t offset)
+{
+	struct spdk_fs_cb_args *args;
+
+	offset = __next_cache_buffer_offset(offset);
+	if (spdk_tree_find_buffer(file->tree, offset) != NULL || file->length <= offset) {
+		return;
+	}
+
+	args = calloc(1, sizeof(*args));
+	if (args == NULL) {
+		return;
+	}
+
+	BLOBFS_TRACE(file, "offset=%jx\n", offset);
+
+	args->file = file;
+	args->op.readahead.offset = offset;
+	args->op.readahead.cache_buffer = cache_insert_buffer(file, offset);
+	if (!args->op.readahead.cache_buffer) {
+		BLOBFS_TRACE(file, "Cannot allocate buf for offset=%jx\n", offset);
+		free(args);
+		return;
+	}
+
+	args->op.readahead.cache_buffer->in_progress = true;
+	if (file->length < (offset + CACHE_BUFFER_SIZE)) {
+		args->op.readahead.length = file->length & (CACHE_BUFFER_SIZE - 1);
+	} else {
+		args->op.readahead.length = CACHE_BUFFER_SIZE;
+	}
+	file->fs->send_request(__readahead, args);
+}
+
+static int
+__file_read(struct spdk_file *file, void *payload, uint64_t offset, uint64_t length, sem_t *sem)
+{
+	struct cache_buffer *buf;
+	int rc;
+
+	buf = spdk_tree_find_filled_buffer(file->tree, offset);
+	if (buf == NULL) {
+		pthread_spin_unlock(&file->lock);
+		rc = __send_rw_from_file(file, sem, payload, offset, length, true);
+		pthread_spin_lock(&file->lock);
+		return rc;
+	}
+
+	if ((offset + length) > (buf->offset + buf->bytes_filled)) {
+		length = buf->offset + buf->bytes_filled - offset;
+	}
+	BLOBFS_TRACE(file, "read %p offset=%ju length=%ju\n", payload, offset, length);
+	memcpy(payload, &buf->buf[offset - buf->offset], length);
+	if ((offset + length) % CACHE_BUFFER_SIZE == 0) {
+		pthread_spin_lock(&g_caches_lock);
+		spdk_tree_remove_buffer(file->tree, buf);
+		if (file->tree->present_mask == 0) {
+			TAILQ_REMOVE(&g_caches, file, cache_tailq);
+		}
+		pthread_spin_unlock(&g_caches_lock);
+	}
+
+	sem_post(sem);
+	return 0;
+}
+
+int64_t
+spdk_file_read(struct spdk_file *file, struct spdk_io_channel *_channel,
+	       void *payload, uint64_t offset, uint64_t length)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	uint64_t final_offset, final_length;
+	uint32_t sub_reads = 0;
+	int rc = 0;
+
+	pthread_spin_lock(&file->lock);
+
+	BLOBFS_TRACE_RW(file, "offset=%ju length=%ju\n", offset, length);
+
+	file->open_for_writing = false;
+
+	if (length == 0 || offset >= file->append_pos) {
+		pthread_spin_unlock(&file->lock);
+		return 0;
+	}
+
+	if (offset + length > file->append_pos) {
+		length = file->append_pos - offset;
+	}
+
+	if (offset != file->next_seq_offset) {
+		file->seq_byte_count = 0;
+	}
+	file->seq_byte_count += length;
+	file->next_seq_offset = offset + length;
+	if (file->seq_byte_count >= CACHE_READAHEAD_THRESHOLD) {
+		check_readahead(file, offset);
+		check_readahead(file, offset + CACHE_BUFFER_SIZE);
+	}
+
+	final_length = 0;
+	final_offset = offset + length;
+	while (offset < final_offset) {
+		length = NEXT_CACHE_BUFFER_OFFSET(offset) - offset;
+		if (length > (final_offset - offset)) {
+			length = final_offset - offset;
+		}
+		rc = __file_read(file, payload, offset, length, &channel->sem);
+		if (rc == 0) {
+			final_length += length;
+		} else {
+			break;
+		}
+		payload += length;
+		offset += length;
+		sub_reads++;
+	}
+	pthread_spin_unlock(&file->lock);
+	while (sub_reads-- > 0) {
+		sem_wait(&channel->sem);
+	}
+	if (rc == 0) {
+		return final_length;
+	} else {
+		return rc;
+	}
+}
+
+static void
+_file_sync(struct spdk_file *file, struct spdk_fs_channel *channel,
+	   spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_fs_request *sync_req;
+	struct spdk_fs_request *flush_req;
+	struct spdk_fs_cb_args *sync_args;
+	struct spdk_fs_cb_args *flush_args;
+
+	BLOBFS_TRACE(file, "offset=%jx\n", file->append_pos);
+
+	pthread_spin_lock(&file->lock);
+	if (file->append_pos <= file->length_flushed) {
+		BLOBFS_TRACE(file, "done - no data to flush\n");
+		pthread_spin_unlock(&file->lock);
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	sync_req = alloc_fs_request(channel);
+	if (!sync_req) {
+		pthread_spin_unlock(&file->lock);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	sync_args = &sync_req->args;
+
+	flush_req = alloc_fs_request(channel);
+	if (!flush_req) {
+		pthread_spin_unlock(&file->lock);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	flush_args = &flush_req->args;
+
+	sync_args->file = file;
+	sync_args->fn.file_op = cb_fn;
+	sync_args->arg = cb_arg;
+	sync_args->op.sync.offset = file->append_pos;
+	sync_args->op.sync.xattr_in_progress = false;
+	TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq);
+	pthread_spin_unlock(&file->lock);
+
+	flush_args->file = file;
+	channel->send_request(__file_flush, flush_args);
+}
+
+int
+spdk_file_sync(struct spdk_file *file, struct spdk_io_channel *_channel)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_cb_args args = {};
+
+	args.sem = &channel->sem;
+	_file_sync(file, channel, __wake_caller, &args);
+	sem_wait(&channel->sem);
+
+	return args.rc;
+}
+
+void
+spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *_channel,
+		     spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+
+	_file_sync(file, channel, cb_fn, cb_arg);
+}
+
+void
+spdk_file_set_priority(struct spdk_file *file, uint32_t priority)
+{
+	BLOBFS_TRACE(file, "priority=%u\n", priority);
+	file->priority = priority;
+
+}
+
+/*
+ * Close routines
+ */
+
+static void
+__file_close_async_done(void *ctx, int bserrno)
+{
+	struct spdk_fs_request *req = ctx;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *file = args->file;
+
+	if (file->is_deleted) {
+		spdk_fs_delete_file_async(file->fs, file->name, blob_delete_cb, ctx);
+		return;
+	}
+
+	args->fn.file_op(args->arg, bserrno);
+	free_fs_request(req);
+}
+
+static void
+__file_close_async(struct spdk_file *file, struct spdk_fs_request *req)
+{
+	struct spdk_blob *blob;
+
+	pthread_spin_lock(&file->lock);
+	if (file->ref_count == 0) {
+		pthread_spin_unlock(&file->lock);
+		__file_close_async_done(req, -EBADF);
+		return;
+	}
+
+	file->ref_count--;
+	if (file->ref_count > 0) {
+		pthread_spin_unlock(&file->lock);
+		req->args.fn.file_op(req->args.arg, 0);
+		free_fs_request(req);
+		return;
+	}
+
+	pthread_spin_unlock(&file->lock);
+
+	blob = file->blob;
+	file->blob = NULL;
+	spdk_blob_close(blob, __file_close_async_done, req);
+}
+
+static void
+__file_close_async__sync_done(void *arg, int fserrno)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+
+	__file_close_async(args->file, req);
+}
+
+void
+spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	req = alloc_fs_request(file->fs->md_target.md_fs_channel);
+	if (req == NULL) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	args = &req->args;
+	args->file = file;
+	args->fn.file_op = cb_fn;
+	args->arg = cb_arg;
+
+	spdk_file_sync_async(file, file->fs->md_target.md_io_channel, __file_close_async__sync_done, req);
+}
+
+static void
+__file_close(void *arg)
+{
+	struct spdk_fs_request *req = arg;
+	struct spdk_fs_cb_args *args = &req->args;
+	struct spdk_file *file = args->file;
+
+	__file_close_async(file, req);
+}
+
+int
+spdk_file_close(struct spdk_file *file, struct spdk_io_channel *_channel)
+{
+	struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
+	struct spdk_fs_request *req;
+	struct spdk_fs_cb_args *args;
+
+	req = alloc_fs_request(channel);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	args = &req->args;
+
+	spdk_file_sync(file, _channel);
+	BLOBFS_TRACE(file, "name=%s\n", file->name);
+	args->file = file;
+	args->sem = &channel->sem;
+	args->fn.file_op = __wake_caller;
+	args->arg = req;
+	channel->send_request(__file_close, req);
+	sem_wait(&channel->sem);
+
+	return args->rc;
+}
+
+int
+spdk_file_get_id(struct spdk_file *file, void *id, size_t size)
+{
+	if (size < sizeof(spdk_blob_id)) {
+		return -EINVAL;
+	}
+
+	memcpy(id, &file->blobid, sizeof(spdk_blob_id));
+
+	return sizeof(spdk_blob_id);
+}
+
+static void
+cache_free_buffers(struct spdk_file *file)
+{
+	BLOBFS_TRACE(file, "free=%s\n", file->name);
+	pthread_spin_lock(&file->lock);
+	pthread_spin_lock(&g_caches_lock);
+	if (file->tree->present_mask == 0) {
+		pthread_spin_unlock(&g_caches_lock);
+		pthread_spin_unlock(&file->lock);
+		return;
+	}
+	spdk_tree_free_buffers(file->tree);
+
+	TAILQ_REMOVE(&g_caches, file, cache_tailq);
+	/* If not freed, put it in the end of the queue */
+	if (file->tree->present_mask != 0) {
+		TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
+	}
+	file->last = NULL;
+	pthread_spin_unlock(&g_caches_lock);
+	pthread_spin_unlock(&file->lock);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("blobfs", SPDK_LOG_BLOBFS)
+SPDK_LOG_REGISTER_COMPONENT("blobfs_rw", SPDK_LOG_BLOBFS_RW)
diff --git a/src/spdk/lib/blobfs/blobfs_internal.h b/src/spdk/lib/blobfs/blobfs_internal.h
new file mode 100644
index 00000000..4e2ae395
--- /dev/null
+++ b/src/spdk/lib/blobfs/blobfs_internal.h
@@ -0,0 +1,69 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BLOBFS_INTERNAL_H
+#define SPDK_BLOBFS_INTERNAL_H
+
+#include "tree.h"
+
+void spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name,
+			     spdk_file_stat_op_complete cb_fn, void *cb_arg);
+void spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name,
+			       spdk_file_op_complete cb_fn, void *cb_args);
+void spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags,
+			     spdk_file_op_with_handle_complete cb_fn, void *cb_arg);
+void spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg);
+void spdk_fs_rename_file_async(struct spdk_filesystem *fs, const char *old_name,
+			       const char *new_name, spdk_fs_op_complete cb_fn,
+			       void *cb_arg);
+void spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name,
+			       spdk_file_op_complete cb_fn, void *cb_arg);
+void spdk_file_truncate_async(struct spdk_file *file, uint64_t length,
+			      spdk_file_op_complete cb_fn, void *arg);
+void spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel,
+			   void *payload, uint64_t offset, uint64_t length,
+			   spdk_file_op_complete cb_fn, void *cb_arg);
+void spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel,
+			  void *payload, uint64_t offset, uint64_t length,
+			  spdk_file_op_complete cb_fn, void *cb_arg);
+
+/* Sync all dirty cache buffers to the backing block device.  For async
+ *  usage models, completion of the sync indicates only that data written
+ *  when the sync command was issued have been flushed to disk - it does
+ *  not guarantee any writes submitted after the sync have been flushed,
+ *  even if those writes are completed before the sync.
+ */
+void spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *channel,
+			  spdk_file_op_complete cb_fn, void *cb_arg);
+
+#endif /* SPDK_BLOBFS_INTERNAL_H_ */
diff --git a/src/spdk/lib/blobfs/tree.c b/src/spdk/lib/blobfs/tree.c
new file mode 100644
index 00000000..ffb6bce6
--- /dev/null
+++ b/src/spdk/lib/blobfs/tree.c
@@ -0,0 +1,181 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/blobfs.h"
+#include "blobfs_internal.h"
+
+#include "spdk/queue.h"
+#include "spdk/assert.h"
+#include "spdk/env.h"
+#include "spdk_internal/log.h"
+
+uint32_t g_fs_cache_buffer_shift = CACHE_BUFFER_SHIFT_DEFAULT;
+
+struct cache_buffer *
+spdk_tree_find_buffer(struct cache_tree *tree, uint64_t offset)
+{
+	uint64_t index;
+
+	while (tree != NULL) {
+		index = offset / CACHE_TREE_LEVEL_SIZE(tree->level);
+		if (index >= CACHE_TREE_WIDTH) {
+			return NULL;
+		}
+		if (tree->level == 0) {
+			return tree->u.buffer[index];
+		} else {
+			offset &= CACHE_TREE_LEVEL_MASK(tree->level);
+			tree = tree->u.tree[index];
+		}
+	}
+
+	return NULL;
+}
+
+struct cache_buffer *
+spdk_tree_find_filled_buffer(struct cache_tree *tree, uint64_t offset)
+{
+	struct cache_buffer *buf;
+
+	buf = spdk_tree_find_buffer(tree, offset);
+	if (buf != NULL && buf->bytes_filled > 0) {
+		return buf;
+	} else {
+		return NULL;
+	}
+}
+
+struct cache_tree *
+spdk_tree_insert_buffer(struct cache_tree *root, struct cache_buffer *buffer)
+{
+	struct cache_tree *tree;
+	uint64_t index, offset;
+
+	offset = buffer->offset;
+	while (offset >= CACHE_TREE_LEVEL_SIZE(root->level + 1)) {
+		if (root->present_mask != 0) {
+			tree = calloc(1, sizeof(*tree));
+			tree->level = root->level + 1;
+			tree->u.tree[0] = root;
+			root = tree;
+			root->present_mask = 0x1ULL;
+		} else {
+			root->level++;
+		}
+	}
+
+	tree = root;
+	while (tree->level > 0) {
+		index = offset / CACHE_TREE_LEVEL_SIZE(tree->level);
+		assert(index < CACHE_TREE_WIDTH);
+		offset &= CACHE_TREE_LEVEL_MASK(tree->level);
+		if (tree->u.tree[index] == NULL) {
+			tree->u.tree[index] = calloc(1, sizeof(*tree));
+			tree->u.tree[index]->level = tree->level - 1;
+			tree->present_mask |= (1ULL << index);
+		}
+		tree = tree->u.tree[index];
+	}
+
+	index = offset / CACHE_BUFFER_SIZE;
+	assert(index < CACHE_TREE_WIDTH);
+	assert(tree->u.buffer[index] == NULL);
+	tree->u.buffer[index] = buffer;
+	tree->present_mask |= (1ULL << index);
+	return root;
+}
+
+void
+spdk_tree_remove_buffer(struct cache_tree *tree, struct cache_buffer *buffer)
+{
+	struct cache_tree *child;
+	uint64_t index;
+
+	index = CACHE_TREE_INDEX(tree->level, buffer->offset);
+
+	if (tree->level == 0) {
+		assert(tree->u.buffer[index] != NULL);
+		assert(buffer == tree->u.buffer[index]);
+		tree->present_mask &= ~(1ULL << index);
+		tree->u.buffer[index] = NULL;
+		spdk_cache_buffer_free(buffer);
+		return;
+	}
+
+	child = tree->u.tree[index];
+	assert(child != NULL);
+	spdk_tree_remove_buffer(child, buffer);
+	if (child->present_mask == 0) {
+		tree->present_mask &= ~(1ULL << index);
+		tree->u.tree[index] = NULL;
+		free(child);
+	}
+}
+
+void
+spdk_tree_free_buffers(struct cache_tree *tree)
+{
+	struct cache_buffer *buffer;
+	struct cache_tree *child;
+	uint32_t i;
+
+	if (tree->present_mask == 0) {
+		return;
+	}
+
+	if (tree->level == 0) {
+		for (i = 0; i < CACHE_TREE_WIDTH; i++) {
+			buffer = tree->u.buffer[i];
+			if (buffer != NULL && buffer->in_progress == false &&
+			    buffer->bytes_filled == buffer->bytes_flushed) {
+				spdk_cache_buffer_free(buffer);
+				tree->u.buffer[i] = NULL;
+				tree->present_mask &= ~(1ULL << i);
+			}
+		}
+	} else {
+		for (i = 0; i < CACHE_TREE_WIDTH; i++) {
+			child = tree->u.tree[i];
+			if (child != NULL) {
+				spdk_tree_free_buffers(child);
+				if (child->present_mask == 0) {
+					free(child);
+					tree->u.tree[i] = NULL;
+					tree->present_mask &= ~(1ULL << i);
+				}
+			}
+		}
+	}
+}
diff --git a/src/spdk/lib/blobfs/tree.h b/src/spdk/lib/blobfs/tree.h
new file mode 100644
index 00000000..9bde83c6
--- /dev/null
+++ b/src/spdk/lib/blobfs/tree.h
@@ -0,0 +1,77 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_TREE_H_
+#define SPDK_TREE_H_
+
+struct cache_buffer {
+	uint8_t			*buf;
+	uint64_t		offset;
+	uint32_t		buf_size;
+	uint32_t		bytes_filled;
+	uint32_t		bytes_flushed;
+	bool			in_progress;
+};
+
+extern uint32_t g_fs_cache_buffer_shift;
+
+#define CACHE_BUFFER_SHIFT_DEFAULT 18
+#define CACHE_BUFFER_SIZE (1U << g_fs_cache_buffer_shift)
+#define NEXT_CACHE_BUFFER_OFFSET(offset)	\
+	(((offset + CACHE_BUFFER_SIZE) >> g_fs_cache_buffer_shift) << g_fs_cache_buffer_shift)
+
+#define CACHE_TREE_SHIFT 6
+#define CACHE_TREE_WIDTH (1U << CACHE_TREE_SHIFT)
+#define CACHE_TREE_LEVEL_SHIFT(level)	(g_fs_cache_buffer_shift + (level) * CACHE_TREE_SHIFT)
+#define CACHE_TREE_LEVEL_SIZE(level)	(1ULL << CACHE_TREE_LEVEL_SHIFT(level))
+#define CACHE_TREE_LEVEL_MASK(level)	(CACHE_TREE_LEVEL_SIZE(level) - 1)
+#define CACHE_TREE_INDEX(level, offset)	((offset >> CACHE_TREE_LEVEL_SHIFT(level)) & (CACHE_TREE_WIDTH - 1))
+
+struct cache_tree {
+	uint8_t			level;
+	uint64_t		present_mask;
+	union {
+		struct cache_buffer	*buffer[CACHE_TREE_WIDTH];
+		struct cache_tree	*tree[CACHE_TREE_WIDTH];
+	} u;
+};
+
+void spdk_cache_buffer_free(struct cache_buffer *cache_buffer);
+
+struct cache_tree *spdk_tree_insert_buffer(struct cache_tree *root, struct cache_buffer *buffer);
+void spdk_tree_free_buffers(struct cache_tree *tree);
+struct cache_buffer *spdk_tree_find_buffer(struct cache_tree *tree, uint64_t offset);
+struct cache_buffer *spdk_tree_find_filled_buffer(struct cache_tree *tree, uint64_t offset);
+void spdk_tree_remove_buffer(struct cache_tree *tree, struct cache_buffer *buffer);
+
+#endif /* SPDK_TREE_H_ */
diff --git a/src/spdk/lib/conf/Makefile b/src/spdk/lib/conf/Makefile
new file mode 100644
index 00000000..0cdfda1d
--- /dev/null
+++ b/src/spdk/lib/conf/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = conf.c
+LIBNAME = conf
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/conf/conf.c b/src/spdk/lib/conf/conf.c
new file mode 100644
index 00000000..384b088c
--- /dev/null
+++ b/src/spdk/lib/conf/conf.c
@@ -0,0 +1,684 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/conf.h"
+#include "spdk/string.h"
+#include "spdk/log.h"
+
+struct spdk_conf_value {
+	struct spdk_conf_value *next;
+	char *value;
+};
+
+struct spdk_conf_item {
+	struct spdk_conf_item *next;
+	char *key;
+	struct spdk_conf_value *val;
+};
+
+struct spdk_conf_section {
+	struct spdk_conf_section *next;
+	char *name;
+	int num;
+	struct spdk_conf_item *item;
+};
+
+struct spdk_conf {
+	char *file;
+	struct spdk_conf_section *current_section;
+	struct spdk_conf_section *section;
+};
+
+#define CF_DELIM " \t"
+
+#define LIB_MAX_TMPBUF 1024
+
+static struct spdk_conf *default_config = NULL;
+
+struct spdk_conf *
+spdk_conf_allocate(void)
+{
+	return calloc(1, sizeof(struct spdk_conf));
+}
+
+static void
+free_conf_value(struct spdk_conf_value *vp)
+{
+	if (vp == NULL) {
+		return;
+	}
+
+	if (vp->value) {
+		free(vp->value);
+	}
+
+	free(vp);
+}
+
+static void
+free_all_conf_value(struct spdk_conf_value *vp)
+{
+	struct spdk_conf_value *next;
+
+	if (vp == NULL) {
+		return;
+	}
+
+	while (vp != NULL) {
+		next = vp->next;
+		free_conf_value(vp);
+		vp = next;
+	}
+}
+
+static void
+free_conf_item(struct spdk_conf_item *ip)
+{
+	if (ip == NULL) {
+		return;
+	}
+
+	if (ip->val != NULL) {
+		free_all_conf_value(ip->val);
+	}
+
+	if (ip->key != NULL) {
+		free(ip->key);
+	}
+
+	free(ip);
+}
+
+static void
+free_all_conf_item(struct spdk_conf_item *ip)
+{
+	struct spdk_conf_item *next;
+
+	if (ip == NULL) {
+		return;
+	}
+
+	while (ip != NULL) {
+		next = ip->next;
+		free_conf_item(ip);
+		ip = next;
+	}
+}
+
+static void
+free_conf_section(struct spdk_conf_section *sp)
+{
+	if (sp == NULL) {
+		return;
+	}
+
+	if (sp->item) {
+		free_all_conf_item(sp->item);
+	}
+
+	if (sp->name) {
+		free(sp->name);
+	}
+
+	free(sp);
+}
+
+static void
+free_all_conf_section(struct spdk_conf_section *sp)
+{
+	struct spdk_conf_section *next;
+
+	if (sp == NULL) {
+		return;
+	}
+
+	while (sp != NULL) {
+		next = sp->next;
+		free_conf_section(sp);
+		sp = next;
+	}
+}
+
+void
+spdk_conf_free(struct spdk_conf *cp)
+{
+	if (cp == NULL) {
+		return;
+	}
+
+	if (cp->section != NULL) {
+		free_all_conf_section(cp->section);
+	}
+
+	if (cp->file != NULL) {
+		free(cp->file);
+	}
+
+	free(cp);
+}
+
+static struct spdk_conf_section *
+allocate_cf_section(void)
+{
+	return calloc(1, sizeof(struct spdk_conf_section));
+}
+
+static struct spdk_conf_item *
+allocate_cf_item(void)
+{
+	return calloc(1, sizeof(struct spdk_conf_item));
+}
+
+static struct spdk_conf_value *
+allocate_cf_value(void)
+{
+	return calloc(1, sizeof(struct spdk_conf_value));
+}
+
+
+#define CHECK_CP_OR_USE_DEFAULT(cp) (((cp) == NULL) && (default_config != NULL)) ? default_config : (cp)
+
+struct spdk_conf_section *
+spdk_conf_find_section(struct spdk_conf *cp, const char *name)
+{
+	struct spdk_conf_section *sp;
+
+	if (name == NULL || name[0] == '\0') {
+		return NULL;
+	}
+
+	cp = CHECK_CP_OR_USE_DEFAULT(cp);
+	if (cp == NULL) {
+		return NULL;
+	}
+
+	for (sp = cp->section; sp != NULL; sp = sp->next) {
+		if (sp->name != NULL && sp->name[0] == name[0]
+		    && strcasecmp(sp->name, name) == 0) {
+			return sp;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_conf_section *
+spdk_conf_first_section(struct spdk_conf *cp)
+{
+	cp = CHECK_CP_OR_USE_DEFAULT(cp);
+	if (cp == NULL) {
+		return NULL;
+	}
+
+	return cp->section;
+}
+
+struct spdk_conf_section *
+spdk_conf_next_section(struct spdk_conf_section *sp)
+{
+	if (sp == NULL) {
+		return NULL;
+	}
+
+	return sp->next;
+}
+
+static void
+append_cf_section(struct spdk_conf *cp, struct spdk_conf_section *sp)
+{
+	struct spdk_conf_section *last;
+
+	cp = CHECK_CP_OR_USE_DEFAULT(cp);
+	if (cp == NULL) {
+		SPDK_ERRLOG("cp == NULL\n");
+		return;
+	}
+
+	if (cp->section == NULL) {
+		cp->section = sp;
+		return;
+	}
+
+	for (last = cp->section; last->next != NULL; last = last->next)
+		;
+	last->next = sp;
+}
+
+static struct spdk_conf_item *
+find_cf_nitem(struct spdk_conf_section *sp, const char *key, int idx)
+{
+	struct spdk_conf_item *ip;
+	int i;
+
+	if (key == NULL || key[0] == '\0') {
+		return NULL;
+	}
+
+	i = 0;
+	for (ip = sp->item; ip != NULL; ip = ip->next) {
+		if (ip->key != NULL && ip->key[0] == key[0]
+		    && strcasecmp(ip->key, key) == 0) {
+			if (i == idx) {
+				return ip;
+			}
+			i++;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+append_cf_item(struct spdk_conf_section *sp, struct spdk_conf_item *ip)
+{
+	struct spdk_conf_item *last;
+
+	if (sp == NULL) {
+		return;
+	}
+
+	if (sp->item == NULL) {
+		sp->item = ip;
+		return;
+	}
+
+	for (last = sp->item; last->next != NULL; last = last->next)
+		;
+	last->next = ip;
+}
+
+static void
+append_cf_value(struct spdk_conf_item *ip, struct spdk_conf_value *vp)
+{
+	struct spdk_conf_value *last;
+
+	if (ip == NULL) {
+		return;
+	}
+
+	if (ip->val == NULL) {
+		ip->val = vp;
+		return;
+	}
+
+	for (last = ip->val; last->next != NULL; last = last->next)
+		;
+	last->next = vp;
+}
+
+bool
+spdk_conf_section_match_prefix(const struct spdk_conf_section *sp, const char *name_prefix)
+{
+	return strncasecmp(sp->name, name_prefix, strlen(name_prefix)) == 0;
+}
+
+const char *
+spdk_conf_section_get_name(const struct spdk_conf_section *sp)
+{
+	return sp->name;
+}
+
+int
+spdk_conf_section_get_num(const struct spdk_conf_section *sp)
+{
+	return sp->num;
+}
+
+char *
+spdk_conf_section_get_nmval(struct spdk_conf_section *sp, const char *key, int idx1, int idx2)
+{
+	struct spdk_conf_item *ip;
+	struct spdk_conf_value *vp;
+	int i;
+
+	ip = find_cf_nitem(sp, key, idx1);
+	if (ip == NULL) {
+		return NULL;
+	}
+
+	vp = ip->val;
+	if (vp == NULL) {
+		return NULL;
+	}
+
+	for (i = 0; vp != NULL; vp = vp->next, i++) {
+		if (i == idx2) {
+			return vp->value;
+		}
+	}
+
+	return NULL;
+}
+
+char *
+spdk_conf_section_get_nval(struct spdk_conf_section *sp, const char *key, int idx)
+{
+	struct spdk_conf_item *ip;
+	struct spdk_conf_value *vp;
+
+	ip = find_cf_nitem(sp, key, idx);
+	if (ip == NULL) {
+		return NULL;
+	}
+
+	vp = ip->val;
+	if (vp == NULL) {
+		return NULL;
+	}
+
+	return vp->value;
+}
+
+char *
+spdk_conf_section_get_val(struct spdk_conf_section *sp, const char *key)
+{
+	return spdk_conf_section_get_nval(sp, key, 0);
+}
+
+int
+spdk_conf_section_get_intval(struct spdk_conf_section *sp, const char *key)
+{
+	const char *v;
+	int value;
+
+	v = spdk_conf_section_get_nval(sp, key, 0);
+	if (v == NULL) {
+		return -1;
+	}
+
+	value = (int)strtol(v, NULL, 10);
+	return value;
+}
+
+bool
+spdk_conf_section_get_boolval(struct spdk_conf_section *sp, const char *key, bool default_val)
+{
+	const char *v;
+
+	v = spdk_conf_section_get_nval(sp, key, 0);
+	if (v == NULL) {
+		return default_val;
+	}
+
+	if (!strcasecmp(v, "Yes") || !strcasecmp(v, "Y") || !strcasecmp(v, "True")) {
+		return true;
+	}
+
+	if (!strcasecmp(v, "No") || !strcasecmp(v, "N") || !strcasecmp(v, "False")) {
+		return false;
+	}
+
+	return default_val;
+}
+
+static int
+parse_line(struct spdk_conf *cp, char *lp)
+{
+	struct spdk_conf_section *sp;
+	struct spdk_conf_item *ip;
+	struct spdk_conf_value *vp;
+	char *arg;
+	char *key;
+	char *val;
+	char *p;
+	int num;
+
+	arg = spdk_str_trim(lp);
+	if (arg == NULL) {
+		SPDK_ERRLOG("no section\n");
+		return -1;
+	}
+
+	if (arg[0] == '[') {
+		/* section */
+		arg++;
+		key = spdk_strsepq(&arg, "]");
+		if (key == NULL || arg != NULL) {
+			SPDK_ERRLOG("broken section\n");
+			return -1;
+		}
+		/* determine section number */
+		for (p = key; *p != '\0' && !isdigit((int) *p); p++)
+			;
+		if (*p != '\0') {
+			num = (int)strtol(p, NULL, 10);
+		} else {
+			num = 0;
+		}
+
+		sp = spdk_conf_find_section(cp, key);
+		if (sp == NULL) {
+			sp = allocate_cf_section();
+			append_cf_section(cp, sp);
+
+			sp->name = strdup(key);
+			if (sp->name == NULL) {
+				SPDK_ERRLOG("cannot duplicate %s to sp->name\n", key);
+				return -1;
+			}
+		}
+		cp->current_section = sp;
+
+
+		sp->num = num;
+	} else {
+		/* parameters */
+		sp = cp->current_section;
+		if (sp == NULL) {
+			SPDK_ERRLOG("unknown section\n");
+			return -1;
+		}
+		key = spdk_strsepq(&arg, CF_DELIM);
+		if (key == NULL) {
+			SPDK_ERRLOG("broken key\n");
+			return -1;
+		}
+
+		ip = allocate_cf_item();
+		if (ip == NULL) {
+			SPDK_ERRLOG("cannot allocate cf item\n");
+			return -1;
+		}
+		append_cf_item(sp, ip);
+		ip->key = strdup(key);
+		if (ip->key == NULL) {
+			SPDK_ERRLOG("cannot make duplicate of %s\n", key);
+			return -1;
+		}
+		ip->val = NULL;
+		if (arg != NULL) {
+			/* key has value(s) */
+			while (arg != NULL) {
+				val = spdk_strsepq(&arg, CF_DELIM);
+				vp = allocate_cf_value();
+				if (vp == NULL) {
+					SPDK_ERRLOG("cannot allocate cf value\n");
+					return -1;
+				}
+				append_cf_value(ip, vp);
+				vp->value = strdup(val);
+				if (vp->value == NULL) {
+					SPDK_ERRLOG("cannot duplicate %s to vp->value\n", val);
+					return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static char *
+fgets_line(FILE *fp)
+{
+	char *dst, *dst2, *p;
+	size_t total, len;
+
+	dst = p = malloc(LIB_MAX_TMPBUF);
+	if (!dst) {
+		return NULL;
+	}
+
+	dst[0] = '\0';
+	total = 0;
+
+	while (fgets(p, LIB_MAX_TMPBUF, fp) != NULL) {
+		len = strlen(p);
+		total += len;
+		if (len + 1 < LIB_MAX_TMPBUF || dst[total - 1] == '\n') {
+			dst2 = realloc(dst, total + 1);
+			if (!dst2) {
+				free(dst);
+				return NULL;
+			} else {
+				return dst2;
+			}
+		}
+
+		dst2 = realloc(dst, total + LIB_MAX_TMPBUF);
+		if (!dst2) {
+			free(dst);
+			return NULL;
+		} else {
+			dst = dst2;
+		}
+
+		p = dst + total;
+	}
+
+	if (feof(fp) && total != 0) {
+		dst2 = realloc(dst, total + 2);
+		if (!dst2) {
+			free(dst);
+			return NULL;
+		} else {
+			dst = dst2;
+		}
+
+		dst[total] = '\n';
+		dst[total + 1] = '\0';
+		return dst;
+	}
+
+	free(dst);
+
+	return NULL;
+}
+
+int
+spdk_conf_read(struct spdk_conf *cp, const char *file)
+{
+	FILE *fp;
+	char *lp, *p;
+	char *lp2, *q;
+	int line;
+	int n, n2;
+
+	if (file == NULL || file[0] == '\0') {
+		return -1;
+	}
+
+	fp = fopen(file, "r");
+	if (fp == NULL) {
+		SPDK_ERRLOG("open error: %s\n", file);
+		return -1;
+	}
+
+	cp->file = strdup(file);
+	if (cp->file == NULL) {
+		SPDK_ERRLOG("cannot duplicate %s to cp->file\n", file);
+		fclose(fp);
+		return -1;
+	}
+
+	line = 1;
+	while ((lp = fgets_line(fp)) != NULL) {
+		/* skip spaces */
+		for (p = lp; *p != '\0' && isspace((int) *p); p++)
+			;
+		/* skip comment, empty line */
+		if (p[0] == '#' || p[0] == '\0') {
+			goto next_line;
+		}
+
+		/* concatenate line end with '\' */
+		n = strlen(p);
+		while (n > 2 && p[n - 1] == '\n' && p[n - 2] == '\\') {
+			n -= 2;
+			lp2 = fgets_line(fp);
+			if (lp2 == NULL) {
+				break;
+			}
+
+			line++;
+			n2 = strlen(lp2);
+
+			q = malloc(n + n2 + 1);
+			if (!q) {
+				free(lp2);
+				free(lp);
+				SPDK_ERRLOG("malloc failed at line %d of %s\n", line, cp->file);
+				fclose(fp);
+				return -1;
+			}
+
+			memcpy(q, p, n);
+			memcpy(q + n, lp2, n2);
+			q[n + n2] = '\0';
+			free(lp2);
+			free(lp);
+			p = lp = q;
+			n += n2;
+		}
+
+		/* parse one line */
+		if (parse_line(cp, p) < 0) {
+			SPDK_ERRLOG("parse error at line %d of %s\n", line, cp->file);
+		}
+next_line:
+		line++;
+		free(lp);
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+void
+spdk_conf_set_as_default(struct spdk_conf *cp)
+{
+	default_config = cp;
+}
diff --git a/src/spdk/lib/copy/Makefile b/src/spdk/lib/copy/Makefile
new file mode 100644
index 00000000..31f983b5
--- /dev/null
+++ b/src/spdk/lib/copy/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = copy
+C_SRCS = copy_engine.c
+
+DIRS-y = ioat
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/copy/copy_engine.c b/src/spdk/lib/copy/copy_engine.c
new file mode 100644
index 00000000..921e17fa
--- /dev/null
+++ b/src/spdk/lib/copy/copy_engine.c
@@ -0,0 +1,318 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/copy_engine.h"
+
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/log.h"
+#include "spdk/thread.h"
+
+static size_t g_max_copy_module_size = 0;
+
+static struct spdk_copy_engine *hw_copy_engine = NULL;
+/* Memcpy engine always exist */
+static struct spdk_copy_engine *mem_copy_engine = NULL;
+
+TAILQ_HEAD(, spdk_copy_module_if) spdk_copy_module_list =
+	TAILQ_HEAD_INITIALIZER(spdk_copy_module_list);
+
+struct copy_io_channel {
+	struct spdk_copy_engine	*engine;
+	struct spdk_io_channel	*ch;
+};
+
+struct spdk_copy_module_if *g_copy_engine_module = NULL;
+spdk_copy_fini_cb g_fini_cb_fn = NULL;
+void *g_fini_cb_arg = NULL;
+
+void
+spdk_copy_engine_register(struct spdk_copy_engine *copy_engine)
+{
+	assert(hw_copy_engine == NULL);
+	hw_copy_engine = copy_engine;
+}
+
+static void
+spdk_memcpy_register(struct spdk_copy_engine *copy_engine)
+{
+	assert(mem_copy_engine == NULL);
+	mem_copy_engine = copy_engine;
+}
+
+static void
+spdk_memcpy_unregister(void)
+{
+	mem_copy_engine = NULL;
+}
+
+static void
+copy_engine_done(void *ref, int status)
+{
+	struct spdk_copy_task *req = (struct spdk_copy_task *)ref;
+
+	req->cb(req, status);
+}
+
+int
+spdk_copy_submit(struct spdk_copy_task *copy_req, struct spdk_io_channel *ch,
+		 void *dst, void *src, uint64_t nbytes, spdk_copy_completion_cb cb)
+{
+	struct spdk_copy_task *req = copy_req;
+	struct copy_io_channel *copy_ch = spdk_io_channel_get_ctx(ch);
+
+	req->cb = cb;
+	return copy_ch->engine->copy(req->offload_ctx, copy_ch->ch, dst, src, nbytes,
+				     copy_engine_done);
+}
+
+int
+spdk_copy_submit_fill(struct spdk_copy_task *copy_req, struct spdk_io_channel *ch,
+		      void *dst, uint8_t fill, uint64_t nbytes, spdk_copy_completion_cb cb)
+{
+	struct spdk_copy_task *req = copy_req;
+	struct copy_io_channel *copy_ch = spdk_io_channel_get_ctx(ch);
+
+	req->cb = cb;
+	return copy_ch->engine->fill(req->offload_ctx, copy_ch->ch, dst, fill, nbytes,
+				     copy_engine_done);
+}
+
+/* memcpy default copy engine */
+static int
+mem_copy_submit(void *cb_arg, struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
+		spdk_copy_completion_cb cb)
+{
+	struct spdk_copy_task *copy_req;
+
+	memcpy(dst, src, (size_t)nbytes);
+
+	copy_req = (struct spdk_copy_task *)((uintptr_t)cb_arg -
+					     offsetof(struct spdk_copy_task, offload_ctx));
+	cb(copy_req, 0);
+	return 0;
+}
+
+static int
+mem_copy_fill(void *cb_arg, struct spdk_io_channel *ch, void *dst, uint8_t fill, uint64_t nbytes,
+	      spdk_copy_completion_cb cb)
+{
+	struct spdk_copy_task *copy_req;
+
+	memset(dst, fill, nbytes);
+	copy_req = (struct spdk_copy_task *)((uintptr_t)cb_arg -
+					     offsetof(struct spdk_copy_task, offload_ctx));
+	cb(copy_req, 0);
+
+	return 0;
+}
+
+static struct spdk_io_channel *mem_get_io_channel(void);
+
+static struct spdk_copy_engine memcpy_copy_engine = {
+	.copy		= mem_copy_submit,
+	.fill		= mem_copy_fill,
+	.get_io_channel	= mem_get_io_channel,
+};
+
+static int
+memcpy_create_cb(void *io_device, void *ctx_buf)
+{
+	return 0;
+}
+
+static void
+memcpy_destroy_cb(void *io_device, void *ctx_buf)
+{
+}
+
+static struct spdk_io_channel *mem_get_io_channel(void)
+{
+	return spdk_get_io_channel(&memcpy_copy_engine);
+}
+
+static size_t
+copy_engine_mem_get_ctx_size(void)
+{
+	return sizeof(struct spdk_copy_task);
+}
+
+size_t
+spdk_copy_task_size(void)
+{
+	return g_max_copy_module_size;
+}
+
+void spdk_copy_module_list_add(struct spdk_copy_module_if *copy_module)
+{
+	TAILQ_INSERT_TAIL(&spdk_copy_module_list, copy_module, tailq);
+	if (copy_module->get_ctx_size && copy_module->get_ctx_size() > g_max_copy_module_size) {
+		g_max_copy_module_size = copy_module->get_ctx_size();
+	}
+}
+
+static int
+copy_create_cb(void *io_device, void *ctx_buf)
+{
+	struct copy_io_channel	*copy_ch = ctx_buf;
+
+	if (hw_copy_engine != NULL) {
+		copy_ch->ch = hw_copy_engine->get_io_channel();
+		if (copy_ch->ch != NULL) {
+			copy_ch->engine = hw_copy_engine;
+			return 0;
+		}
+	}
+
+	copy_ch->ch = mem_copy_engine->get_io_channel();
+	assert(copy_ch->ch != NULL);
+	copy_ch->engine = mem_copy_engine;
+	return 0;
+}
+
+static void
+copy_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct copy_io_channel	*copy_ch = ctx_buf;
+
+	spdk_put_io_channel(copy_ch->ch);
+}
+
+struct spdk_io_channel *
+spdk_copy_engine_get_io_channel(void)
+{
+	return spdk_get_io_channel(&spdk_copy_module_list);
+}
+
+static int
+copy_engine_mem_init(void)
+{
+	spdk_memcpy_register(&memcpy_copy_engine);
+	spdk_io_device_register(&memcpy_copy_engine, memcpy_create_cb, memcpy_destroy_cb, 0,
+				"memcpy_engine");
+
+	return 0;
+}
+
+static void
+copy_engine_mem_fini(void *ctxt)
+{
+	spdk_io_device_unregister(&memcpy_copy_engine, NULL);
+	spdk_memcpy_unregister();
+
+	spdk_copy_engine_module_finish();
+}
+
+static void
+spdk_copy_engine_module_initialize(void)
+{
+	struct spdk_copy_module_if *copy_engine_module;
+
+	TAILQ_FOREACH(copy_engine_module, &spdk_copy_module_list, tailq) {
+		copy_engine_module->module_init();
+	}
+}
+
+int
+spdk_copy_engine_initialize(void)
+{
+	spdk_copy_engine_module_initialize();
+	/*
+	 * We need a unique identifier for the copy engine framework, so use the
+	 *  spdk_copy_module_list address for this purpose.
+	 */
+	spdk_io_device_register(&spdk_copy_module_list, copy_create_cb, copy_destroy_cb,
+				sizeof(struct copy_io_channel), "copy_module");
+
+	return 0;
+}
+
+static void
+spdk_copy_engine_module_finish_cb(void)
+{
+	spdk_copy_fini_cb cb_fn = g_fini_cb_fn;
+
+	cb_fn(g_fini_cb_arg);
+	g_fini_cb_fn = NULL;
+	g_fini_cb_arg = NULL;
+}
+
+void
+spdk_copy_engine_module_finish(void)
+{
+	if (!g_copy_engine_module) {
+		g_copy_engine_module = TAILQ_FIRST(&spdk_copy_module_list);
+	} else {
+		g_copy_engine_module = TAILQ_NEXT(g_copy_engine_module, tailq);
+	}
+
+	if (!g_copy_engine_module) {
+		spdk_copy_engine_module_finish_cb();
+		return;
+	}
+
+	if (g_copy_engine_module->module_fini) {
+		spdk_thread_send_msg(spdk_get_thread(), g_copy_engine_module->module_fini, NULL);
+	} else {
+		spdk_copy_engine_module_finish();
+	}
+}
+
+void
+spdk_copy_engine_finish(spdk_copy_fini_cb cb_fn, void *cb_arg)
+{
+	assert(cb_fn != NULL);
+
+	g_fini_cb_fn = cb_fn;
+	g_fini_cb_arg = cb_arg;
+
+	spdk_io_device_unregister(&spdk_copy_module_list, NULL);
+	spdk_copy_engine_module_finish();
+}
+
+void
+spdk_copy_engine_config_text(FILE *fp)
+{
+	struct spdk_copy_module_if *copy_engine_module;
+
+	TAILQ_FOREACH(copy_engine_module, &spdk_copy_module_list, tailq) {
+		if (copy_engine_module->config_text) {
+			copy_engine_module->config_text(fp);
+		}
+	}
+}
+
+SPDK_COPY_MODULE_REGISTER(copy_engine_mem_init, copy_engine_mem_fini,
+			  NULL, copy_engine_mem_get_ctx_size)
diff --git a/src/spdk/lib/copy/ioat/Makefile b/src/spdk/lib/copy/ioat/Makefile
new file mode 100644
index 00000000..3d19e38f
--- /dev/null
+++ b/src/spdk/lib/copy/ioat/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = copy_ioat
+C_SRCS = copy_engine_ioat.c copy_engine_ioat_rpc.c
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/copy/ioat/copy_engine_ioat.c b/src/spdk/lib/copy/ioat/copy_engine_ioat.c
new file mode 100644
index 00000000..40bc6cf5
--- /dev/null
+++ b/src/spdk/lib/copy/ioat/copy_engine_ioat.c
@@ -0,0 +1,421 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "copy_engine_ioat.h"
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/copy_engine.h"
+#include "spdk_internal/log.h"
+
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+#include "spdk/ioat.h"
+
+static bool g_ioat_enable = false;
+
+struct ioat_probe_ctx {
+	int num_whitelist_devices;
+	struct spdk_pci_addr whitelist[IOAT_MAX_CHANNELS];
+};
+
+static struct ioat_probe_ctx g_probe_ctx;
+
+struct ioat_device {
+	struct spdk_ioat_chan *ioat;
+	bool is_allocated;
+	/** linked list pointer for device list */
+	TAILQ_ENTRY(ioat_device) tailq;
+};
+
+static TAILQ_HEAD(, ioat_device) g_devices = TAILQ_HEAD_INITIALIZER(g_devices);
+static pthread_mutex_t g_ioat_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+struct ioat_io_channel {
+	struct spdk_ioat_chan	*ioat_ch;
+	struct ioat_device	*ioat_dev;
+	struct spdk_poller	*poller;
+};
+
+static int
+ioat_find_dev_by_whitelist_bdf(const struct spdk_pci_addr *pci_addr,
+			       const struct spdk_pci_addr *whitelist,
+			       int num_whitelist_devices)
+{
+	int i;
+
+	for (i = 0; i < num_whitelist_devices; i++) {
+		if (spdk_pci_addr_compare(pci_addr, &whitelist[i]) == 0) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct ioat_device *
+ioat_allocate_device(void)
+{
+	struct ioat_device *dev;
+
+	pthread_mutex_lock(&g_ioat_mutex);
+	TAILQ_FOREACH(dev, &g_devices, tailq) {
+		if (!dev->is_allocated) {
+			dev->is_allocated = true;
+			pthread_mutex_unlock(&g_ioat_mutex);
+			return dev;
+		}
+	}
+	pthread_mutex_unlock(&g_ioat_mutex);
+
+	return NULL;
+}
+
+static void
+ioat_free_device(struct ioat_device *dev)
+{
+	pthread_mutex_lock(&g_ioat_mutex);
+	dev->is_allocated = false;
+	pthread_mutex_unlock(&g_ioat_mutex);
+}
+
+struct ioat_task {
+	spdk_copy_completion_cb	cb;
+};
+
+static int copy_engine_ioat_init(void);
+static void copy_engine_ioat_exit(void *ctx);
+static void copy_engine_ioat_config_text(FILE *fp);
+
+static size_t
+copy_engine_ioat_get_ctx_size(void)
+{
+	return sizeof(struct ioat_task) + sizeof(struct spdk_copy_task);
+}
+
+SPDK_COPY_MODULE_REGISTER(copy_engine_ioat_init, copy_engine_ioat_exit,
+			  copy_engine_ioat_config_text,
+			  copy_engine_ioat_get_ctx_size)
+
+static void
+copy_engine_ioat_exit(void *ctx)
+{
+	struct ioat_device *dev;
+
+	while (!TAILQ_EMPTY(&g_devices)) {
+		dev = TAILQ_FIRST(&g_devices);
+		TAILQ_REMOVE(&g_devices, dev, tailq);
+		spdk_ioat_detach(dev->ioat);
+		ioat_free_device(dev);
+		spdk_dma_free(dev);
+	}
+	spdk_copy_engine_module_finish();
+}
+
+static void
+ioat_done(void *cb_arg)
+{
+	struct spdk_copy_task *copy_req;
+	struct ioat_task *ioat_task = cb_arg;
+
+	copy_req = (struct spdk_copy_task *)
+		   ((uintptr_t)ioat_task -
+		    offsetof(struct spdk_copy_task, offload_ctx));
+
+	ioat_task->cb(copy_req, 0);
+}
+
+static int
+ioat_copy_submit(void *cb_arg, struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
+		 spdk_copy_completion_cb cb)
+{
+	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
+	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
+
+	assert(ioat_ch->ioat_ch != NULL);
+
+	ioat_task->cb = cb;
+
+	return spdk_ioat_submit_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
+}
+
+static int
+ioat_copy_submit_fill(void *cb_arg, struct spdk_io_channel *ch, void *dst, uint8_t fill,
+		      uint64_t nbytes, spdk_copy_completion_cb cb)
+{
+	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
+	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
+	uint64_t fill64 = 0x0101010101010101ULL * fill;
+
+	assert(ioat_ch->ioat_ch != NULL);
+
+	ioat_task->cb = cb;
+
+	return spdk_ioat_submit_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill64, nbytes);
+}
+
+static int
+ioat_poll(void *arg)
+{
+	struct spdk_ioat_chan *chan = arg;
+
+	spdk_ioat_process_events(chan);
+
+	return -1;
+}
+
+static struct spdk_io_channel *ioat_get_io_channel(void);
+
+static struct spdk_copy_engine ioat_copy_engine = {
+	.copy		= ioat_copy_submit,
+	.fill		= ioat_copy_submit_fill,
+	.get_io_channel	= ioat_get_io_channel,
+};
+
+static int
+ioat_create_cb(void *io_device, void *ctx_buf)
+{
+	struct ioat_io_channel *ch = ctx_buf;
+	struct ioat_device *ioat_dev;
+
+	ioat_dev = ioat_allocate_device();
+	if (ioat_dev == NULL) {
+		return -1;
+	}
+
+	ch->ioat_dev = ioat_dev;
+	ch->ioat_ch = ioat_dev->ioat;
+	ch->poller = spdk_poller_register(ioat_poll, ch->ioat_ch, 0);
+	return 0;
+}
+
+static void
+ioat_destroy_cb(void *io_device, void *ctx_buf)
+{
+	struct ioat_io_channel *ch = ctx_buf;
+
+	ioat_free_device(ch->ioat_dev);
+	spdk_poller_unregister(&ch->poller);
+}
+
+static struct spdk_io_channel *
+ioat_get_io_channel(void)
+{
+	return spdk_get_io_channel(&ioat_copy_engine);
+}
+
+static bool
+probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev)
+{
+	struct ioat_probe_ctx *ctx = cb_ctx;
+	struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr(pci_dev);
+
+	SPDK_INFOLOG(SPDK_LOG_COPY_IOAT,
+		     " Found matching device at %04x:%02x:%02x.%x vendor:0x%04x device:0x%04x\n",
+		     pci_addr.domain,
+		     pci_addr.bus,
+		     pci_addr.dev,
+		     pci_addr.func,
+		     spdk_pci_device_get_vendor_id(pci_dev),
+		     spdk_pci_device_get_device_id(pci_dev));
+
+	if (ctx->num_whitelist_devices > 0 &&
+	    !ioat_find_dev_by_whitelist_bdf(&pci_addr, ctx->whitelist, ctx->num_whitelist_devices)) {
+		return false;
+	}
+
+	/* Claim the device in case conflict with other process */
+	if (spdk_pci_device_claim(&pci_addr) < 0) {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat)
+{
+	struct ioat_device *dev;
+
+	dev = spdk_dma_zmalloc(sizeof(*dev), 0, NULL);
+	if (dev == NULL) {
+		SPDK_ERRLOG("Failed to allocate device struct\n");
+		return;
+	}
+
+	dev->ioat = ioat;
+	TAILQ_INSERT_TAIL(&g_devices, dev, tailq);
+}
+
+void
+copy_engine_ioat_enable_probe(void)
+{
+	g_ioat_enable = true;
+}
+
+static int
+copy_engine_ioat_add_whitelist_device(const char *pci_bdf)
+{
+	if (pci_bdf == NULL) {
+		return -1;
+	}
+
+	if (g_probe_ctx.num_whitelist_devices >= IOAT_MAX_CHANNELS) {
+		SPDK_ERRLOG("Ioat whitelist is full (max size is %d)\n",
+			    IOAT_MAX_CHANNELS);
+		return -1;
+	}
+
+	if (spdk_pci_addr_parse(&g_probe_ctx.whitelist[g_probe_ctx.num_whitelist_devices],
+				pci_bdf) < 0) {
+		SPDK_ERRLOG("Invalid address %s\n", pci_bdf);
+		return -1;
+	}
+
+	g_probe_ctx.num_whitelist_devices++;
+
+	return 0;
+}
+
+int
+copy_engine_ioat_add_whitelist_devices(const char *pci_bdfs[], size_t num_pci_bdfs)
+{
+	size_t i;
+
+	for (i = 0; i < num_pci_bdfs; i++) {
+		if (copy_engine_ioat_add_whitelist_device(pci_bdfs[i]) < 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+copy_engine_ioat_read_config_file_params(struct spdk_conf_section *sp)
+{
+	int i;
+	char *val, *pci_bdf;
+
+	if (spdk_conf_section_get_boolval(sp, "Enable", false)) {
+		g_ioat_enable = true;
+		/* Enable Ioat */
+	}
+
+	val = spdk_conf_section_get_val(sp, "Disable");
+	if (val != NULL) {
+		SPDK_WARNLOG("\"Disable\" option is deprecated and will be removed in a future release.\n");
+		SPDK_WARNLOG("IOAT is now disabled by default. It may be enabled by \"Enable Yes\"\n");
+
+		if (g_ioat_enable && (strcasecmp(val, "Yes") == 0)) {
+			SPDK_ERRLOG("\"Enable Yes\" and \"Disable Yes\" cannot be set at the same time\n");
+			return -1;
+		}
+	}
+
+	/* Init the whitelist */
+	for (i = 0; ; i++) {
+		pci_bdf = spdk_conf_section_get_nmval(sp, "Whitelist", i, 0);
+		if (!pci_bdf) {
+			break;
+		}
+
+		if (copy_engine_ioat_add_whitelist_device(pci_bdf) < 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+copy_engine_ioat_init(void)
+{
+	struct spdk_conf_section *sp;
+	int rc;
+
+	sp = spdk_conf_find_section(NULL, "Ioat");
+	if (sp != NULL) {
+		rc = copy_engine_ioat_read_config_file_params(sp);
+		if (rc != 0) {
+			SPDK_ERRLOG("copy_engine_ioat_read_config_file_params() failed\n");
+			return rc;
+		}
+	}
+
+	if (!g_ioat_enable) {
+		return 0;
+	}
+
+	if (spdk_ioat_probe(&g_probe_ctx, probe_cb, attach_cb) != 0) {
+		SPDK_ERRLOG("spdk_ioat_probe() failed\n");
+		return -1;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_COPY_IOAT, "Ioat Copy Engine Offload Enabled\n");
+	spdk_copy_engine_register(&ioat_copy_engine);
+	spdk_io_device_register(&ioat_copy_engine, ioat_create_cb, ioat_destroy_cb,
+				sizeof(struct ioat_io_channel), "ioat_copy_engine");
+	return 0;
+}
+
+#define COPY_ENGINE_IOAT_HEADER_TMPL \
+"[Ioat]\n" \
+"  # Users may not want to use offload even it is available.\n" \
+"  # Users may use the whitelist to initialize specified devices, IDS\n" \
+"  #  uses BUS:DEVICE.FUNCTION to identify each Ioat channel.\n"
+
+#define COPY_ENGINE_IOAT_ENABLE_TMPL \
+"  Enable %s\n"
+
+#define COPY_ENGINE_IOAT_WHITELIST_TMPL \
+"  Whitelist %.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 "\n"
+
+static void
+copy_engine_ioat_config_text(FILE *fp)
+{
+	int i;
+	struct spdk_pci_addr *dev;
+
+	fprintf(fp, COPY_ENGINE_IOAT_HEADER_TMPL);
+	fprintf(fp, COPY_ENGINE_IOAT_ENABLE_TMPL, g_ioat_enable ? "Yes" : "No");
+
+	for (i = 0; i < g_probe_ctx.num_whitelist_devices; i++) {
+		dev = &g_probe_ctx.whitelist[i];
+		fprintf(fp, COPY_ENGINE_IOAT_WHITELIST_TMPL,
+			dev->domain, dev->bus, dev->dev, dev->func);
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("copy_ioat", SPDK_LOG_COPY_IOAT)
diff --git a/src/spdk/lib/copy/ioat/copy_engine_ioat.h b/src/spdk/lib/copy/ioat/copy_engine_ioat.h
new file mode 100644
index 00000000..ae69fb2d
--- /dev/null
+++ b/src/spdk/lib/copy/ioat/copy_engine_ioat.h
@@ -0,0 +1,44 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_COPY_ENGINE_IOAT_H
+#define SPDK_COPY_ENGINE_IOAT_H
+
+#include "spdk/stdinc.h"
+
+#define IOAT_MAX_CHANNELS	64
+
+int copy_engine_ioat_add_whitelist_devices(const char *pci_bdfs[], size_t num_pci_bdfs);
+void copy_engine_ioat_enable_probe(void);
+
+#endif /* SPDK_COPY_ENGINE_IOAT_H */
diff --git a/src/spdk/lib/copy/ioat/copy_engine_ioat_rpc.c b/src/spdk/lib/copy/ioat/copy_engine_ioat_rpc.c
new file mode 100644
index 00000000..ae03fdb1
--- /dev/null
+++ b/src/spdk/lib/copy/ioat/copy_engine_ioat_rpc.c
@@ -0,0 +1,118 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "copy_engine_ioat.h"
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/event.h"
+
+struct rpc_pci_whitelist {
+	size_t num_bdfs;
+	char *bdfs[IOAT_MAX_CHANNELS];
+};
+
+static int
+decode_rpc_pci_whitelist(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_pci_whitelist *pci_whitelist = out;
+
+	return spdk_json_decode_array(val, spdk_json_decode_string, pci_whitelist->bdfs,
+				      IOAT_MAX_CHANNELS, &pci_whitelist->num_bdfs, sizeof(char *));
+}
+
+static void
+free_rpc_pci_whitelist(struct rpc_pci_whitelist *list)
+{
+	size_t i;
+
+	for (i = 0; i < list->num_bdfs; i++) {
+		free(list->bdfs[i]);
+	}
+}
+
+struct rpc_copy_engine_ioat {
+	struct rpc_pci_whitelist pci_whitelist;
+};
+
+static void
+free_rpc_copy_engine_ioat(struct rpc_copy_engine_ioat *p)
+{
+	free_rpc_pci_whitelist(&p->pci_whitelist);
+}
+
+static const struct spdk_json_object_decoder rpc_copy_engine_ioat_decoder[] = {
+	{"pci_whitelist", offsetof(struct rpc_copy_engine_ioat, pci_whitelist), decode_rpc_pci_whitelist},
+};
+
+static void
+spdk_rpc_scan_copy_engine_ioat(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_copy_engine_ioat req = {};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_copy_engine_ioat_decoder,
+					    SPDK_COUNTOF(rpc_copy_engine_ioat_decoder),
+					    &req)) {
+			free_rpc_copy_engine_ioat(&req);
+			SPDK_ERRLOG("spdk_json_decode_object() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+
+		rc = copy_engine_ioat_add_whitelist_devices((const char **)req.pci_whitelist.bdfs,
+				req.pci_whitelist.num_bdfs);
+		free_rpc_copy_engine_ioat(&req);
+		if (rc < 0) {
+			SPDK_ERRLOG("copy_engine_ioat_add_whitelist_devices() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+	}
+
+	copy_engine_ioat_enable_probe();
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("scan_ioat_copy_engine", spdk_rpc_scan_copy_engine_ioat, SPDK_RPC_STARTUP)
diff --git a/src/spdk/lib/env_dpdk/Makefile b/src/spdk/lib/env_dpdk/Makefile
new file mode 100644
index 00000000..b7a6961f
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(ENV_CFLAGS)
+C_SRCS = env.c memory.c pci.c vtophys.c init.c threads.c
+C_SRCS += pci_nvme.c pci_ioat.c pci_virtio.c
+LIBNAME = env_dpdk
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/env_dpdk/env.c b/src/spdk/lib/env_dpdk/env.c
new file mode 100644
index 00000000..a5238e54
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/env.c
@@ -0,0 +1,419 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+
+#include <rte_config.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_memzone.h>
+#include <rte_version.h>
+
+static uint64_t
+virt_to_phys(void *vaddr)
+{
+	uint64_t ret;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	ret = rte_malloc_virt2iova(vaddr);
+	if (ret != RTE_BAD_IOVA) {
+		return ret;
+	}
+#else
+	ret = rte_malloc_virt2phy(vaddr);
+	if (ret != RTE_BAD_PHYS_ADDR) {
+		return ret;
+	}
+#endif
+
+	return spdk_vtophys(vaddr);
+}
+
+void *
+spdk_malloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags)
+{
+	if (flags == 0) {
+		return NULL;
+	}
+
+	void *buf = rte_malloc_socket(NULL, size, align, socket_id);
+	if (buf && phys_addr) {
+		*phys_addr = virt_to_phys(buf);
+	}
+	return buf;
+}
+
+void *
+spdk_zmalloc(size_t size, size_t align, uint64_t *phys_addr, int socket_id, uint32_t flags)
+{
+	void *buf = spdk_malloc(size, align, phys_addr, socket_id, flags);
+	if (buf) {
+		memset(buf, 0, size);
+	}
+	return buf;
+}
+
+void
+spdk_free(void *buf)
+{
+	rte_free(buf);
+}
+
+void *
+spdk_dma_malloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id)
+{
+	return spdk_malloc(size, align, phys_addr, socket_id, (SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE));
+}
+
+void *
+spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *phys_addr, int socket_id)
+{
+	return spdk_zmalloc(size, align, phys_addr, socket_id, (SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE));
+}
+
+void *
+spdk_dma_malloc(size_t size, size_t align, uint64_t *phys_addr)
+{
+	return spdk_dma_malloc_socket(size, align, phys_addr, SPDK_ENV_SOCKET_ID_ANY);
+}
+
+void *
+spdk_dma_zmalloc(size_t size, size_t align, uint64_t *phys_addr)
+{
+	return spdk_dma_zmalloc_socket(size, align, phys_addr, SPDK_ENV_SOCKET_ID_ANY);
+}
+
+void *
+spdk_dma_realloc(void *buf, size_t size, size_t align, uint64_t *phys_addr)
+{
+	void *new_buf = rte_realloc(buf, size, align);
+	if (new_buf && phys_addr) {
+		*phys_addr = virt_to_phys(new_buf);
+	}
+	return new_buf;
+}
+
+void
+spdk_dma_free(void *buf)
+{
+	spdk_free(buf);
+}
+
+void *
+spdk_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+			     unsigned flags, unsigned align)
+{
+	const struct rte_memzone *mz;
+	unsigned dpdk_flags = 0;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0)
+	/* Older DPDKs do not offer such flag since their
+	 * memzones are iova-contiguous by default.
+	 */
+	if ((flags & SPDK_MEMZONE_NO_IOVA_CONTIG) == 0) {
+		dpdk_flags |= RTE_MEMZONE_IOVA_CONTIG;
+	}
+#endif
+
+	if (socket_id == SPDK_ENV_SOCKET_ID_ANY) {
+		socket_id = SOCKET_ID_ANY;
+	}
+
+	mz = rte_memzone_reserve_aligned(name, len, socket_id, dpdk_flags, align);
+
+	if (mz != NULL) {
+		memset(mz->addr, 0, len);
+		return mz->addr;
+	} else {
+		return NULL;
+	}
+}
+
+void *
+spdk_memzone_reserve(const char *name, size_t len, int socket_id, unsigned flags)
+{
+	return spdk_memzone_reserve_aligned(name, len, socket_id, flags,
+					    RTE_CACHE_LINE_SIZE);
+}
+
+void *
+spdk_memzone_lookup(const char *name)
+{
+	const struct rte_memzone *mz = rte_memzone_lookup(name);
+
+	if (mz != NULL) {
+		return mz->addr;
+	} else {
+		return NULL;
+	}
+}
+
+int
+spdk_memzone_free(const char *name)
+{
+	const struct rte_memzone *mz = rte_memzone_lookup(name);
+
+	if (mz != NULL) {
+		return rte_memzone_free(mz);
+	}
+
+	return -1;
+}
+
+void
+spdk_memzone_dump(FILE *f)
+{
+	rte_memzone_dump(f);
+}
+
+struct spdk_mempool *
+spdk_mempool_create_ctor(const char *name, size_t count,
+			 size_t ele_size, size_t cache_size, int socket_id,
+			 spdk_mempool_obj_cb_t *obj_init, void *obj_init_arg)
+{
+	struct rte_mempool *mp;
+	size_t tmp;
+
+	if (socket_id == SPDK_ENV_SOCKET_ID_ANY) {
+		socket_id = SOCKET_ID_ANY;
+	}
+
+	/* No more than half of all elements can be in cache */
+	tmp = (count / 2) / rte_lcore_count();
+	if (cache_size > tmp) {
+		cache_size = tmp;
+	}
+
+	if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+		cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+	}
+
+	mp = rte_mempool_create(name, count, ele_size, cache_size,
+				0, NULL, NULL, (rte_mempool_obj_cb_t *)obj_init, obj_init_arg,
+				socket_id, MEMPOOL_F_NO_PHYS_CONTIG);
+
+	return (struct spdk_mempool *)mp;
+}
+
+
+struct spdk_mempool *
+spdk_mempool_create(const char *name, size_t count,
+		    size_t ele_size, size_t cache_size, int socket_id)
+{
+	return spdk_mempool_create_ctor(name, count, ele_size, cache_size, socket_id,
+					NULL, NULL);
+}
+
+char *
+spdk_mempool_get_name(struct spdk_mempool *mp)
+{
+	return ((struct rte_mempool *)mp)->name;
+}
+
+void
+spdk_mempool_free(struct spdk_mempool *mp)
+{
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 1)
+	rte_mempool_free((struct rte_mempool *)mp);
+#endif
+}
+
+void *
+spdk_mempool_get(struct spdk_mempool *mp)
+{
+	void *ele = NULL;
+	int rc;
+
+	rc = rte_mempool_get((struct rte_mempool *)mp, &ele);
+	if (rc != 0) {
+		return NULL;
+	}
+	return ele;
+}
+
+int
+spdk_mempool_get_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count)
+{
+	return rte_mempool_get_bulk((struct rte_mempool *)mp, ele_arr, count);
+}
+
+void
+spdk_mempool_put(struct spdk_mempool *mp, void *ele)
+{
+	rte_mempool_put((struct rte_mempool *)mp, ele);
+}
+
+void
+spdk_mempool_put_bulk(struct spdk_mempool *mp, void **ele_arr, size_t count)
+{
+	rte_mempool_put_bulk((struct rte_mempool *)mp, ele_arr, count);
+}
+
+size_t
+spdk_mempool_count(const struct spdk_mempool *pool)
+{
+#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
+	return rte_mempool_count((struct rte_mempool *)pool);
+#else
+	return rte_mempool_avail_count((struct rte_mempool *)pool);
+#endif
+}
+
+bool
+spdk_process_is_primary(void)
+{
+	return (rte_eal_process_type() == RTE_PROC_PRIMARY);
+}
+
+uint64_t spdk_get_ticks(void)
+{
+	return rte_get_timer_cycles();
+}
+
+uint64_t spdk_get_ticks_hz(void)
+{
+	return rte_get_timer_hz();
+}
+
+void spdk_delay_us(unsigned int us)
+{
+	rte_delay_us(us);
+}
+
+void
+spdk_unaffinitize_thread(void)
+{
+	rte_cpuset_t new_cpuset;
+	long num_cores, i;
+
+	CPU_ZERO(&new_cpuset);
+
+	num_cores = sysconf(_SC_NPROCESSORS_CONF);
+
+	/* Create a mask containing all CPUs */
+	for (i = 0; i < num_cores; i++) {
+		CPU_SET(i, &new_cpuset);
+	}
+
+	rte_thread_set_affinity(&new_cpuset);
+}
+
+void *
+spdk_call_unaffinitized(void *cb(void *arg), void *arg)
+{
+	rte_cpuset_t orig_cpuset;
+	void *ret;
+
+	if (cb == NULL) {
+		return NULL;
+	}
+
+	rte_thread_get_affinity(&orig_cpuset);
+
+	spdk_unaffinitize_thread();
+
+	ret = cb(arg);
+
+	rte_thread_set_affinity(&orig_cpuset);
+
+	return ret;
+}
+
+struct spdk_ring *
+spdk_ring_create(enum spdk_ring_type type, size_t count, int socket_id)
+{
+	char ring_name[64];
+	static uint32_t ring_num = 0;
+	unsigned flags = 0;
+
+	switch (type) {
+	case SPDK_RING_TYPE_SP_SC:
+		flags = RING_F_SP_ENQ | RING_F_SC_DEQ;
+		break;
+	case SPDK_RING_TYPE_MP_SC:
+		flags = RING_F_SC_DEQ;
+		break;
+	case SPDK_RING_TYPE_MP_MC:
+		flags = 0;
+		break;
+	default:
+		return NULL;
+	}
+
+	snprintf(ring_name, sizeof(ring_name), "ring_%u_%d",
+		 __sync_fetch_and_add(&ring_num, 1), getpid());
+
+	return (struct spdk_ring *)rte_ring_create(ring_name, count, socket_id, flags);
+}
+
+void
+spdk_ring_free(struct spdk_ring *ring)
+{
+	rte_ring_free((struct rte_ring *)ring);
+}
+
+size_t
+spdk_ring_count(struct spdk_ring *ring)
+{
+	return rte_ring_count((struct rte_ring *)ring);
+}
+
+size_t
+spdk_ring_enqueue(struct spdk_ring *ring, void **objs, size_t count)
+{
+	int rc;
+#if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 0)
+	rc = rte_ring_enqueue_bulk((struct rte_ring *)ring, objs, count);
+	if (rc == 0) {
+		return count;
+	}
+
+	return 0;
+#else
+	rc = rte_ring_enqueue_bulk((struct rte_ring *)ring, objs, count, NULL);
+	return rc;
+#endif
+}
+
+size_t
+spdk_ring_dequeue(struct spdk_ring *ring, void **objs, size_t count)
+{
+#if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 0)
+	return rte_ring_dequeue_burst((struct rte_ring *)ring, objs, count);
+#else
+	return rte_ring_dequeue_burst((struct rte_ring *)ring, objs, count, NULL);
+#endif
+}
diff --git a/src/spdk/lib/env_dpdk/env.mk b/src/spdk/lib/env_dpdk/env.mk
new file mode 100644
index 00000000..989bdd11
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/env.mk
@@ -0,0 +1,112 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# This makefile snippet must define the following flags:
+# ENV_CFLAGS
+# ENV_CXXFLAGS
+# ENV_LIBS
+# ENV_LINKER_ARGS
+
+DPDK_DIR = $(CONFIG_DPDK_DIR)
+
+export DPDK_ABS_DIR = $(abspath $(DPDK_DIR))
+
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/include/rte_config.h))
+DPDK_INC_DIR := $(DPDK_ABS_DIR)/include
+else
+DPDK_INC_DIR := $(DPDK_ABS_DIR)/include/dpdk
+endif
+DPDK_INC := -I$(DPDK_INC_DIR)
+
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/lib/librte_eal.a))
+DPDK_LIB_EXT = .a
+else
+DPDK_LIB_EXT = .so
+endif
+
+DPDK_LIB_LIST = rte_eal rte_mempool rte_ring
+
+# librte_mempool_ring was new added from DPDK 17.05. Link this library used for
+#   ring based mempool management API.
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/lib/librte_mempool_ring.*))
+DPDK_LIB_LIST += rte_mempool_ring
+endif
+
+# librte_malloc was removed after DPDK 2.1.  Link this library conditionally based on its
+#  existence to maintain backward compatibility.
+ifneq ($(wildcard $(DPDK_ABS_DIR)/lib/librte_malloc.*),)
+DPDK_LIB_LIST += rte_malloc
+endif
+
+# librte_pci and librte_bus_pci were added in DPDK 17.11. Link these libraries conditionally
+# based on their existence to maintain backward compatibility.
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/lib/librte_pci.*))
+DPDK_LIB_LIST += rte_pci
+endif
+
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/lib/librte_bus_pci.*))
+DPDK_LIB_LIST += rte_bus_pci
+endif
+
+ifeq ($(CONFIG_CRYPTO),y)
+DPDK_LIB_LIST += rte_cryptodev rte_reorder rte_bus_vdev rte_pmd_aesni_mb rte_pmd_qat rte_mbuf
+endif
+
+ifneq (, $(wildcard $(DPDK_ABS_DIR)/lib/librte_kvargs.*))
+DPDK_LIB_LIST += rte_kvargs
+endif
+
+DPDK_LIB = $(DPDK_LIB_LIST:%=$(DPDK_ABS_DIR)/lib/lib%$(DPDK_LIB_EXT))
+ifeq ($(CONFIG_CRYPTO),y)
+DPDK_LIB += $(SPDK_ROOT_DIR)/intel-ipsec-mb/libIPSec_MB.a
+endif
+
+# SPDK memory registration requires experimental (deprecated) rte_memory API for DPDK 18.05
+ENV_CFLAGS = $(DPDK_INC) -Wno-deprecated-declarations
+ENV_CXXFLAGS = $(ENV_CFLAGS)
+ENV_DPDK_FILE = $(call spdk_lib_list_to_static_libs,env_dpdk)
+ENV_LIBS = $(ENV_DPDK_FILE) $(DPDK_LIB)
+ENV_LINKER_ARGS = $(ENV_DPDK_FILE) -Wl,--whole-archive $(DPDK_LIB) -Wl,--no-whole-archive
+
+ifneq (,$(wildcard $(DPDK_INC_DIR)/rte_config.h))
+ifneq (,$(shell grep -e "define RTE_LIBRTE_VHOST_NUMA 1" -e "define RTE_EAL_NUMA_AWARE_HUGEPAGES 1" $(DPDK_INC_DIR)/rte_config.h))
+ENV_LINKER_ARGS += -lnuma
+endif
+endif
+
+ifeq ($(OS),Linux)
+ENV_LINKER_ARGS += -ldl
+endif
+ifeq ($(OS),FreeBSD)
+ENV_LINKER_ARGS += -lexecinfo
+endif
diff --git a/src/spdk/lib/env_dpdk/env_internal.h b/src/spdk/lib/env_dpdk/env_internal.h
new file mode 100644
index 00000000..d95084ea
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/env_internal.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ENV_INTERNAL_H
+#define SPDK_ENV_INTERNAL_H
+
+#include "spdk/stdinc.h"
+
+#define spdk_pci_device rte_pci_device
+
+#include "spdk/env.h"
+
+#include <rte_config.h>
+#include <rte_version.h>
+#include <rte_eal.h>
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 0)
+#include <rte_bus.h>
+extern struct rte_pci_bus rte_pci_bus;
+#endif
+#include <rte_pci.h>
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 1)
+#include <rte_bus_pci.h>
+#endif
+#include <rte_dev.h>
+
+/* x86-64 and ARM userspace virtual addresses use only the low 48 bits [0..47],
+ * which is enough to cover 256 TB.
+ */
+#define SHIFT_256TB	48 /* (1 << 48) == 256 TB */
+#define MASK_256TB	((1ULL << SHIFT_256TB) - 1)
+
+#define SHIFT_1GB	30 /* (1 << 30) == 1 GB */
+#define MASK_1GB	((1ULL << SHIFT_1GB) - 1)
+
+#define SHIFT_2MB	21 /* (1 << 21) == 2MB */
+#define MASK_2MB	((1ULL << SHIFT_2MB) - 1)
+#define VALUE_2MB	(1 << SHIFT_2MB)
+
+#define SHIFT_4KB	12 /* (1 << 12) == 4KB */
+#define MASK_4KB	((1ULL << SHIFT_4KB) - 1)
+
+struct spdk_pci_enum_ctx {
+	struct rte_pci_driver	driver;
+	spdk_pci_enum_cb	cb_fn;
+	void			*cb_arg;
+	pthread_mutex_t		mtx;
+	bool			is_registered;
+};
+
+int spdk_pci_device_init(struct rte_pci_driver *driver, struct rte_pci_device *device);
+int spdk_pci_device_fini(struct rte_pci_device *device);
+
+int spdk_pci_enumerate(struct spdk_pci_enum_ctx *ctx, spdk_pci_enum_cb enum_cb, void *enum_ctx);
+int spdk_pci_device_attach(struct spdk_pci_enum_ctx *ctx, spdk_pci_enum_cb enum_cb, void *enum_ctx,
+			   struct spdk_pci_addr *pci_address);
+
+int spdk_mem_map_init(void);
+int spdk_vtophys_init(void);
+
+/**
+ * Report a DMA-capable PCI device to the vtophys translation code.
+ * Increases the refcount of active DMA-capable devices managed by SPDK.
+ * This must be called after a `rte_pci_device` is created.
+ */
+void spdk_vtophys_pci_device_added(struct rte_pci_device *pci_device);
+
+/**
+ * Report the removal of a DMA-capable PCI device to the vtophys translation code.
+ * Decreases the refcount of active DMA-capable devices managed by SPDK.
+ * This must be called before a `rte_pci_device` is destroyed.
+ */
+void spdk_vtophys_pci_device_removed(struct rte_pci_device *pci_device);
+
+#endif
diff --git a/src/spdk/lib/env_dpdk/init.c b/src/spdk/lib/env_dpdk/init.c
new file mode 100644
index 00000000..1a2fafe1
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/init.c
@@ -0,0 +1,401 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "env_internal.h"
+
+#include "spdk/version.h"
+
+#include <rte_config.h>
+#include <rte_eal.h>
+
+#define SPDK_ENV_DPDK_DEFAULT_NAME		"spdk"
+#define SPDK_ENV_DPDK_DEFAULT_SHM_ID		-1
+#define SPDK_ENV_DPDK_DEFAULT_MEM_SIZE		-1
+#define SPDK_ENV_DPDK_DEFAULT_MASTER_CORE	-1
+#define SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL	-1
+#define SPDK_ENV_DPDK_DEFAULT_CORE_MASK		"0x1"
+
+static char **eal_cmdline;
+static int eal_cmdline_argcount;
+
+static char *
+_sprintf_alloc(const char *format, ...)
+{
+	va_list args;
+	va_list args_copy;
+	char *buf;
+	size_t bufsize;
+	int rc;
+
+	va_start(args, format);
+
+	/* Try with a small buffer first. */
+	bufsize = 32;
+
+	/* Limit maximum buffer size to something reasonable so we don't loop forever. */
+	while (bufsize <= 1024 * 1024) {
+		buf = malloc(bufsize);
+		if (buf == NULL) {
+			va_end(args);
+			return NULL;
+		}
+
+		va_copy(args_copy, args);
+		rc = vsnprintf(buf, bufsize, format, args_copy);
+		va_end(args_copy);
+
+		/*
+		 * If vsnprintf() returned a count within our current buffer size, we are done.
+		 * The count does not include the \0 terminator, so rc == bufsize is not OK.
+		 */
+		if (rc >= 0 && (size_t)rc < bufsize) {
+			va_end(args);
+			return buf;
+		}
+
+		/*
+		 * vsnprintf() should return the required space, but some libc versions do not
+		 * implement this correctly, so just double the buffer size and try again.
+		 *
+		 * We don't need the data in buf, so rather than realloc(), use free() and malloc()
+		 * again to avoid a copy.
+		 */
+		free(buf);
+		bufsize *= 2;
+	}
+
+	va_end(args);
+	return NULL;
+}
+
+static void
+spdk_env_unlink_shared_files(void)
+{
+	/* Starting with DPDK 18.05, there are more files with unpredictable paths
+	 * and filenames. The --no-shconf option prevents from creating them, but
+	 * only for DPDK 18.08+. For DPDK 18.05 we just leave them be.
+	 */
+#if RTE_VERSION < RTE_VERSION_NUM(18, 05, 0, 0)
+	char buffer[PATH_MAX];
+
+	snprintf(buffer, PATH_MAX, "/var/run/.spdk_pid%d_hugepage_info", getpid());
+	if (unlink(buffer)) {
+		fprintf(stderr, "Unable to unlink shared memory file: %s. Error code: %d\n", buffer, errno);
+	}
+#endif
+}
+
+void
+spdk_env_opts_init(struct spdk_env_opts *opts)
+{
+	if (!opts) {
+		return;
+	}
+
+	memset(opts, 0, sizeof(*opts));
+
+	opts->name = SPDK_ENV_DPDK_DEFAULT_NAME;
+	opts->core_mask = SPDK_ENV_DPDK_DEFAULT_CORE_MASK;
+	opts->shm_id = SPDK_ENV_DPDK_DEFAULT_SHM_ID;
+	opts->mem_size = SPDK_ENV_DPDK_DEFAULT_MEM_SIZE;
+	opts->master_core = SPDK_ENV_DPDK_DEFAULT_MASTER_CORE;
+	opts->mem_channel = SPDK_ENV_DPDK_DEFAULT_MEM_CHANNEL;
+}
+
+static void
+spdk_free_args(char **args, int argcount)
+{
+	int i;
+
+	for (i = 0; i < argcount; i++) {
+		free(args[i]);
+	}
+
+	if (argcount) {
+		free(args);
+	}
+}
+
+static char **
+spdk_push_arg(char *args[], int *argcount, char *arg)
+{
+	char **tmp;
+
+	if (arg == NULL) {
+		fprintf(stderr, "%s: NULL arg supplied\n", __func__);
+		spdk_free_args(args, *argcount);
+		return NULL;
+	}
+
+	tmp = realloc(args, sizeof(char *) * (*argcount + 1));
+	if (tmp == NULL) {
+		spdk_free_args(args, *argcount);
+		return NULL;
+	}
+
+	tmp[*argcount] = arg;
+	(*argcount)++;
+
+	return tmp;
+}
+
+static void
+spdk_destruct_eal_cmdline(void)
+{
+	spdk_free_args(eal_cmdline, eal_cmdline_argcount);
+}
+
+
+static int
+spdk_build_eal_cmdline(const struct spdk_env_opts *opts)
+{
+	int argcount = 0;
+	char **args;
+
+	args = NULL;
+
+	/* set the program name */
+	args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", opts->name));
+	if (args == NULL) {
+		return -1;
+	}
+
+	/* disable shared configuration files when in single process mode. This allows for cleaner shutdown */
+	if (opts->shm_id < 0) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s", "--no-shconf"));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* set the coremask */
+	/* NOTE: If coremask starts with '[' and ends with ']' it is a core list
+	 */
+	if (opts->core_mask[0] == '[') {
+		char *l_arg = _sprintf_alloc("-l %s", opts->core_mask + 1);
+		int len = strlen(l_arg);
+		if (l_arg[len - 1] == ']') {
+			l_arg[len - 1] = '\0';
+		}
+		args = spdk_push_arg(args, &argcount, l_arg);
+	} else {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-c %s", opts->core_mask));
+	}
+
+	if (args == NULL) {
+		return -1;
+	}
+
+	/* set the memory channel number */
+	if (opts->mem_channel > 0) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-n %d", opts->mem_channel));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* set the memory size */
+	if (opts->mem_size >= 0) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("-m %d", opts->mem_size));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* set the master core */
+	if (opts->master_core > 0) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--master-lcore=%d",
+				     opts->master_core));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* set no pci  if enabled */
+	if (opts->no_pci) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--no-pci"));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* create just one hugetlbfs file */
+	if (opts->hugepage_single_segments) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--single-file-segments"));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+	/* unlink hugepages after initialization */
+	if (opts->unlink_hugepage) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--huge-unlink"));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) && RTE_VERSION < RTE_VERSION_NUM(18, 5, 1, 0)
+	/* Dynamic memory management is buggy in DPDK 18.05.0. Don't use it. */
+	args = spdk_push_arg(args, &argcount, _sprintf_alloc("--legacy-mem"));
+	if (args == NULL) {
+		return -1;
+	}
+#endif
+
+	if (opts->num_pci_addr) {
+		size_t i;
+		char bdf[32];
+		struct spdk_pci_addr *pci_addr =
+				opts->pci_blacklist ? opts->pci_blacklist : opts->pci_whitelist;
+
+		for (i = 0; i < opts->num_pci_addr; i++) {
+			spdk_pci_addr_fmt(bdf, 32, &pci_addr[i]);
+			args = spdk_push_arg(args, &argcount, _sprintf_alloc("%s=%s",
+					     (opts->pci_blacklist ? "--pci-blacklist" : "--pci-whitelist"),
+					     bdf));
+			if (args == NULL) {
+				return -1;
+			}
+		}
+	}
+
+#ifdef __linux__
+	if (opts->shm_id < 0) {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk_pid%d",
+				     getpid()));
+		if (args == NULL) {
+			return -1;
+		}
+	} else {
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--file-prefix=spdk%d",
+				     opts->shm_id));
+		if (args == NULL) {
+			return -1;
+		}
+
+		/* Set the base virtual address - it must be an address that is not in the
+		 * ASAN shadow region, otherwise ASAN-enabled builds will ignore the
+		 * mmap hint.
+		 *
+		 * Ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
+		 */
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--base-virtaddr=0x200000000000"));
+		if (args == NULL) {
+			return -1;
+		}
+
+		/* set the process type */
+		args = spdk_push_arg(args, &argcount, _sprintf_alloc("--proc-type=auto"));
+		if (args == NULL) {
+			return -1;
+		}
+	}
+#endif
+
+	eal_cmdline = args;
+	eal_cmdline_argcount = argcount;
+	if (atexit(spdk_destruct_eal_cmdline) != 0) {
+		fprintf(stderr, "Failed to register cleanup handler\n");
+	}
+
+	return argcount;
+}
+
+int spdk_env_init(const struct spdk_env_opts *opts)
+{
+	char **dpdk_args = NULL;
+	int i, rc;
+	int orig_optind;
+
+	rc = spdk_build_eal_cmdline(opts);
+	if (rc < 0) {
+		fprintf(stderr, "Invalid arguments to initialize DPDK\n");
+		return -1;
+	}
+
+	printf("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version());
+	printf("[ DPDK EAL parameters: ");
+	for (i = 0; i < eal_cmdline_argcount; i++) {
+		printf("%s ", eal_cmdline[i]);
+	}
+	printf("]\n");
+
+	/* DPDK rearranges the array we pass to it, so make a copy
+	 * before passing so we can still free the individual strings
+	 * correctly.
+	 */
+	dpdk_args = calloc(eal_cmdline_argcount, sizeof(char *));
+	if (dpdk_args == NULL) {
+		fprintf(stderr, "Failed to allocate dpdk_args\n");
+		return -1;
+	}
+	memcpy(dpdk_args, eal_cmdline, sizeof(char *) * eal_cmdline_argcount);
+
+	fflush(stdout);
+	orig_optind = optind;
+	optind = 1;
+	rc = rte_eal_init(eal_cmdline_argcount, dpdk_args);
+	optind = orig_optind;
+
+	free(dpdk_args);
+
+	if (rc < 0) {
+		fprintf(stderr, "Failed to initialize DPDK\n");
+		return -1;
+	}
+
+	if (opts->shm_id < 0 && !opts->hugepage_single_segments) {
+		/*
+		 * Unlink hugepage and config info files after init.  This will ensure they get
+		 *  deleted on app exit, even if the app crashes and does not exit normally.
+		 *  Only do this when not in multi-process mode, since for multi-process other
+		 *  apps will need to open these files. These files are not created for
+		 *  "single file segments".
+		 */
+		spdk_env_unlink_shared_files();
+	}
+
+	if (spdk_mem_map_init() < 0) {
+		fprintf(stderr, "Failed to allocate mem_map\n");
+		return -1;
+	}
+	if (spdk_vtophys_init() < 0) {
+		fprintf(stderr, "Failed to initialize vtophys\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/env_dpdk/memory.c b/src/spdk/lib/env_dpdk/memory.c
new file mode 100644
index 00000000..eaeccb90
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/memory.c
@@ -0,0 +1,712 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "env_internal.h"
+
+#include <rte_config.h>
+#include <rte_eal_memconfig.h>
+
+#include "spdk_internal/assert.h"
+
+#include "spdk/assert.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+#if DEBUG
+#define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DEBUG_PRINT(...)
+#endif
+
+#define FN_2MB_TO_4KB(fn)	(fn << (SHIFT_2MB - SHIFT_4KB))
+#define FN_4KB_TO_2MB(fn)	(fn >> (SHIFT_2MB - SHIFT_4KB))
+
+#define MAP_256TB_IDX(vfn_2mb)	((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB))
+#define MAP_1GB_IDX(vfn_2mb)	((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1))
+
+/* Page is registered */
+#define REG_MAP_REGISTERED	(1ULL << 62)
+
+/* A notification region barrier. The 2MB translation entry that's marked
+ * with this flag must be unregistered separately. This allows contiguous
+ * regions to be unregistered in the same chunks they were registered.
+ */
+#define REG_MAP_NOTIFY_START	(1ULL << 63)
+
+/* Translation of a single 2MB page. */
+struct map_2mb {
+	uint64_t translation_2mb;
+};
+
+/* Second-level map table indexed by bits [21..29] of the virtual address.
+ * Each entry contains the address translation or error for entries that haven't
+ * been retrieved yet.
+ */
+struct map_1gb {
+	struct map_2mb map[1ULL << (SHIFT_1GB - SHIFT_2MB)];
+};
+
+/* Top-level map table indexed by bits [30..47] of the virtual address.
+ * Each entry points to a second-level map table or NULL.
+ */
+struct map_256tb {
+	struct map_1gb *map[1ULL << (SHIFT_256TB - SHIFT_1GB)];
+};
+
+/* Page-granularity memory address translation */
+struct spdk_mem_map {
+	struct map_256tb map_256tb;
+	pthread_mutex_t mutex;
+	uint64_t default_translation;
+	struct spdk_mem_map_ops ops;
+	void *cb_ctx;
+	TAILQ_ENTRY(spdk_mem_map) tailq;
+};
+
+/* Registrations map. The 64 bit translations are bit fields with the
+ * following layout (starting with the low bits):
+ *    0 - 61 : reserved
+ *   62 - 63 : flags
+ */
+static struct spdk_mem_map *g_mem_reg_map;
+static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps);
+static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Walk the currently registered memory via the main memory registration map
+ * and call the new map's notify callback for each virtually contiguous region.
+ */
+static int
+spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_action action)
+{
+	size_t idx_256tb;
+	uint64_t idx_1gb;
+	uint64_t contig_start = UINT64_MAX;
+	uint64_t contig_end = UINT64_MAX;
+	struct map_1gb *map_1gb;
+	int rc;
+
+	if (!g_mem_reg_map) {
+		return -EINVAL;
+	}
+
+	/* Hold the memory registration map mutex so no new registrations can be added while we are looping. */
+	pthread_mutex_lock(&g_mem_reg_map->mutex);
+
+	for (idx_256tb = 0;
+	     idx_256tb < sizeof(g_mem_reg_map->map_256tb.map) / sizeof(g_mem_reg_map->map_256tb.map[0]);
+	     idx_256tb++) {
+		map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb];
+
+		if (!map_1gb) {
+			if (contig_start != UINT64_MAX) {
+				/* End of of a virtually contiguous range */
+				rc = map->ops.notify_cb(map->cb_ctx, map, action,
+							(void *)contig_start,
+							contig_end - contig_start + VALUE_2MB);
+				/* Don't bother handling unregister failures. It can't be any worse */
+				if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) {
+					goto err_unregister;
+				}
+			}
+			contig_start = UINT64_MAX;
+			continue;
+		}
+
+		for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) {
+			if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) &&
+			    (contig_start == UINT64_MAX ||
+			     (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
+				/* Rebuild the virtual address from the indexes */
+				uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB);
+
+				if (contig_start == UINT64_MAX) {
+					contig_start = vaddr;
+				}
+
+				contig_end = vaddr;
+			} else {
+				if (contig_start != UINT64_MAX) {
+					/* End of of a virtually contiguous range */
+					rc = map->ops.notify_cb(map->cb_ctx, map, action,
+								(void *)contig_start,
+								contig_end - contig_start + VALUE_2MB);
+					/* Don't bother handling unregister failures. It can't be any worse */
+					if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) {
+						goto err_unregister;
+					}
+
+					/* This page might be a part of a neighbour region, so process
+					 * it again. The idx_1gb will be incremented immediately.
+					 */
+					idx_1gb--;
+				}
+				contig_start = UINT64_MAX;
+			}
+		}
+	}
+
+	pthread_mutex_unlock(&g_mem_reg_map->mutex);
+	return 0;
+
+err_unregister:
+	/* Unwind to the first empty translation so we don't unregister
+	 * a region that just failed to register.
+	 */
+	idx_256tb = MAP_256TB_IDX((contig_start >> SHIFT_2MB) - 1);
+	idx_1gb = MAP_1GB_IDX((contig_start >> SHIFT_2MB) - 1);
+	contig_start = UINT64_MAX;
+	contig_end = UINT64_MAX;
+
+	/* Unregister any memory we managed to register before the failure */
+	for (; idx_256tb < SIZE_MAX; idx_256tb--) {
+		map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb];
+
+		if (!map_1gb) {
+			if (contig_end != UINT64_MAX) {
+				/* End of of a virtually contiguous range */
+				map->ops.notify_cb(map->cb_ctx, map,
+						   SPDK_MEM_MAP_NOTIFY_UNREGISTER,
+						   (void *)contig_start,
+						   contig_end - contig_start + VALUE_2MB);
+			}
+			contig_end = UINT64_MAX;
+			continue;
+		}
+
+		for (; idx_1gb < UINT64_MAX; idx_1gb--) {
+			if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) &&
+			    (contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
+				/* Rebuild the virtual address from the indexes */
+				uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB);
+
+				if (contig_end == UINT64_MAX) {
+					contig_end = vaddr;
+				}
+				contig_start = vaddr;
+			} else {
+				if (contig_end != UINT64_MAX) {
+					/* End of of a virtually contiguous range */
+					map->ops.notify_cb(map->cb_ctx, map,
+							   SPDK_MEM_MAP_NOTIFY_UNREGISTER,
+							   (void *)contig_start,
+							   contig_end - contig_start + VALUE_2MB);
+					idx_1gb++;
+				}
+				contig_end = UINT64_MAX;
+			}
+		}
+		idx_1gb = sizeof(map_1gb->map) / sizeof(map_1gb->map[0]) - 1;
+	}
+
+	pthread_mutex_unlock(&g_mem_reg_map->mutex);
+	return rc;
+}
+
+struct spdk_mem_map *
+spdk_mem_map_alloc(uint64_t default_translation, const struct spdk_mem_map_ops *ops, void *cb_ctx)
+{
+	struct spdk_mem_map *map;
+	int rc;
+
+	map = calloc(1, sizeof(*map));
+	if (map == NULL) {
+		return NULL;
+	}
+
+	if (pthread_mutex_init(&map->mutex, NULL)) {
+		free(map);
+		return NULL;
+	}
+
+	map->default_translation = default_translation;
+	map->cb_ctx = cb_ctx;
+	if (ops) {
+		map->ops = *ops;
+	}
+
+	if (ops && ops->notify_cb) {
+		pthread_mutex_lock(&g_spdk_mem_map_mutex);
+		rc = spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_REGISTER);
+		if (rc != 0) {
+			pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+			DEBUG_PRINT("Initial mem_map notify failed\n");
+			pthread_mutex_destroy(&map->mutex);
+			free(map);
+			return NULL;
+		}
+		TAILQ_INSERT_TAIL(&g_spdk_mem_maps, map, tailq);
+		pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+	}
+
+	return map;
+}
+
+void
+spdk_mem_map_free(struct spdk_mem_map **pmap)
+{
+	struct spdk_mem_map *map;
+	size_t i;
+
+	if (!pmap) {
+		return;
+	}
+
+	map = *pmap;
+
+	if (!map) {
+		return;
+	}
+
+	if (map->ops.notify_cb) {
+		pthread_mutex_lock(&g_spdk_mem_map_mutex);
+		spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_UNREGISTER);
+		TAILQ_REMOVE(&g_spdk_mem_maps, map, tailq);
+		pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+	}
+
+	for (i = 0; i < sizeof(map->map_256tb.map) / sizeof(map->map_256tb.map[0]); i++) {
+		free(map->map_256tb.map[i]);
+	}
+
+	pthread_mutex_destroy(&map->mutex);
+
+	free(map);
+	*pmap = NULL;
+}
+
+int
+spdk_mem_register(void *vaddr, size_t len)
+{
+	struct spdk_mem_map *map;
+	int rc;
+	void *seg_vaddr;
+	size_t seg_len;
+	uint64_t reg;
+
+	if ((uintptr_t)vaddr & ~MASK_256TB) {
+		DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr);
+		return -EINVAL;
+	}
+
+	if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) {
+		DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n",
+			    __func__, vaddr, len);
+		return -EINVAL;
+	}
+
+	if (len == 0) {
+		return 0;
+	}
+
+	pthread_mutex_lock(&g_spdk_mem_map_mutex);
+
+	seg_vaddr = vaddr;
+	seg_len = len;
+	while (seg_len > 0) {
+		reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
+		if (reg & REG_MAP_REGISTERED) {
+			pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+			return -EBUSY;
+		}
+		seg_vaddr += VALUE_2MB;
+		seg_len -= VALUE_2MB;
+	}
+
+	seg_vaddr = vaddr;
+	seg_len = 0;
+	while (len > 0) {
+		spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB,
+					     seg_len == 0 ? REG_MAP_REGISTERED | REG_MAP_NOTIFY_START : REG_MAP_REGISTERED);
+		seg_len += VALUE_2MB;
+		vaddr += VALUE_2MB;
+		len -= VALUE_2MB;
+	}
+
+	TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
+		rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len);
+		if (rc != 0) {
+			pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+			return rc;
+		}
+	}
+
+	pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+	return 0;
+}
+
+int
+spdk_mem_unregister(void *vaddr, size_t len)
+{
+	struct spdk_mem_map *map;
+	int rc;
+	void *seg_vaddr;
+	size_t seg_len;
+	uint64_t reg, newreg;
+
+	if ((uintptr_t)vaddr & ~MASK_256TB) {
+		DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr);
+		return -EINVAL;
+	}
+
+	if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) {
+		DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n",
+			    __func__, vaddr, len);
+		return -EINVAL;
+	}
+
+	pthread_mutex_lock(&g_spdk_mem_map_mutex);
+
+	/* The first page must be a start of a region. Also check if it's
+	 * registered to make sure we don't return -ERANGE for non-registered
+	 * regions.
+	 */
+	reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
+	if ((reg & REG_MAP_REGISTERED) && (reg & REG_MAP_NOTIFY_START) == 0) {
+		pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+		return -ERANGE;
+	}
+
+	seg_vaddr = vaddr;
+	seg_len = len;
+	while (seg_len > 0) {
+		reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
+		if ((reg & REG_MAP_REGISTERED) == 0) {
+			pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+			return -EINVAL;
+		}
+		seg_vaddr += VALUE_2MB;
+		seg_len -= VALUE_2MB;
+	}
+
+	newreg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
+	/* If the next page is registered, it must be a start of a region as well,
+	 * otherwise we'd be unregistering only a part of a region.
+	 */
+	if ((newreg & REG_MAP_NOTIFY_START) == 0 && (newreg & REG_MAP_REGISTERED)) {
+		pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+		return -ERANGE;
+	}
+	seg_vaddr = vaddr;
+	seg_len = 0;
+
+	while (len > 0) {
+		reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
+		spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 0);
+
+		if (seg_len > 0 && (reg & REG_MAP_NOTIFY_START)) {
+			TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
+				rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len);
+				if (rc != 0) {
+					pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+					return rc;
+				}
+			}
+
+			seg_vaddr = vaddr;
+			seg_len = VALUE_2MB;
+		} else {
+			seg_len += VALUE_2MB;
+		}
+
+		vaddr += VALUE_2MB;
+		len -= VALUE_2MB;
+	}
+
+	if (seg_len > 0) {
+		TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
+			rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len);
+			if (rc != 0) {
+				pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+				return rc;
+			}
+		}
+	}
+
+	pthread_mutex_unlock(&g_spdk_mem_map_mutex);
+	return 0;
+}
+
+static struct map_1gb *
+spdk_mem_map_get_map_1gb(struct spdk_mem_map *map, uint64_t vfn_2mb)
+{
+	struct map_1gb *map_1gb;
+	uint64_t idx_256tb = MAP_256TB_IDX(vfn_2mb);
+	size_t i;
+
+	if (spdk_unlikely(idx_256tb >= SPDK_COUNTOF(map->map_256tb.map))) {
+		return NULL;
+	}
+
+	map_1gb = map->map_256tb.map[idx_256tb];
+
+	if (!map_1gb) {
+		pthread_mutex_lock(&map->mutex);
+
+		/* Recheck to make sure nobody else got the mutex first. */
+		map_1gb = map->map_256tb.map[idx_256tb];
+		if (!map_1gb) {
+			map_1gb = malloc(sizeof(struct map_1gb));
+			if (map_1gb) {
+				/* initialize all entries to default translation */
+				for (i = 0; i < SPDK_COUNTOF(map_1gb->map); i++) {
+					map_1gb->map[i].translation_2mb = map->default_translation;
+				}
+				map->map_256tb.map[idx_256tb] = map_1gb;
+			}
+		}
+
+		pthread_mutex_unlock(&map->mutex);
+
+		if (!map_1gb) {
+			DEBUG_PRINT("allocation failed\n");
+			return NULL;
+		}
+	}
+
+	return map_1gb;
+}
+
+int
+spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size,
+			     uint64_t translation)
+{
+	uint64_t vfn_2mb;
+	struct map_1gb *map_1gb;
+	uint64_t idx_1gb;
+	struct map_2mb *map_2mb;
+
+	if ((uintptr_t)vaddr & ~MASK_256TB) {
+		DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr);
+		return -EINVAL;
+	}
+
+	/* For now, only 2 MB-aligned registrations are supported */
+	if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) {
+		DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n",
+			    __func__, vaddr, size);
+		return -EINVAL;
+	}
+
+	vfn_2mb = vaddr >> SHIFT_2MB;
+
+	while (size) {
+		map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb);
+		if (!map_1gb) {
+			DEBUG_PRINT("could not get %p map\n", (void *)vaddr);
+			return -ENOMEM;
+		}
+
+		idx_1gb = MAP_1GB_IDX(vfn_2mb);
+		map_2mb = &map_1gb->map[idx_1gb];
+		map_2mb->translation_2mb = translation;
+
+		size -= VALUE_2MB;
+		vfn_2mb++;
+	}
+
+	return 0;
+}
+
+int
+spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size)
+{
+	uint64_t vfn_2mb;
+	struct map_1gb *map_1gb;
+	uint64_t idx_1gb;
+	struct map_2mb *map_2mb;
+
+	if ((uintptr_t)vaddr & ~MASK_256TB) {
+		DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr);
+		return -EINVAL;
+	}
+
+	/* For now, only 2 MB-aligned registrations are supported */
+	if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) {
+		DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n",
+			    __func__, vaddr, size);
+		return -EINVAL;
+	}
+
+	vfn_2mb = vaddr >> SHIFT_2MB;
+
+	while (size) {
+		map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb);
+		if (!map_1gb) {
+			DEBUG_PRINT("could not get %p map\n", (void *)vaddr);
+			return -ENOMEM;
+		}
+
+		idx_1gb = MAP_1GB_IDX(vfn_2mb);
+		map_2mb = &map_1gb->map[idx_1gb];
+		map_2mb->translation_2mb = map->default_translation;
+
+		size -= VALUE_2MB;
+		vfn_2mb++;
+	}
+
+	return 0;
+}
+
+uint64_t
+spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size)
+{
+	const struct map_1gb *map_1gb;
+	const struct map_2mb *map_2mb;
+	uint64_t idx_256tb;
+	uint64_t idx_1gb;
+	uint64_t vfn_2mb;
+	uint64_t total_size = 0;
+	uint64_t cur_size;
+	uint64_t prev_translation;
+
+	if (size != NULL) {
+		total_size = *size;
+		*size = 0;
+	}
+
+	if (spdk_unlikely(vaddr & ~MASK_256TB)) {
+		DEBUG_PRINT("invalid usermode virtual address %p\n", (void *)vaddr);
+		return map->default_translation;
+	}
+
+	vfn_2mb = vaddr >> SHIFT_2MB;
+	idx_256tb = MAP_256TB_IDX(vfn_2mb);
+	idx_1gb = MAP_1GB_IDX(vfn_2mb);
+
+	map_1gb = map->map_256tb.map[idx_256tb];
+	if (spdk_unlikely(!map_1gb)) {
+		return map->default_translation;
+	}
+
+	cur_size = VALUE_2MB;
+	if (size != NULL) {
+		*size = VALUE_2MB;
+	}
+
+	map_2mb = &map_1gb->map[idx_1gb];
+	if (size == NULL || map->ops.are_contiguous == NULL ||
+	    map_2mb->translation_2mb == map->default_translation) {
+		return map_2mb->translation_2mb;
+	}
+
+	prev_translation = map_2mb->translation_2mb;;
+	while (cur_size < total_size) {
+		vfn_2mb++;
+		idx_256tb = MAP_256TB_IDX(vfn_2mb);
+		idx_1gb = MAP_1GB_IDX(vfn_2mb);
+
+		map_1gb = map->map_256tb.map[idx_256tb];
+		if (spdk_unlikely(!map_1gb)) {
+			break;
+		}
+
+		map_2mb = &map_1gb->map[idx_1gb];
+		if (!map->ops.are_contiguous(prev_translation, map_2mb->translation_2mb)) {
+			break;
+		}
+
+		cur_size += VALUE_2MB;
+		prev_translation = map_2mb->translation_2mb;
+	}
+
+	*size = cur_size;
+	return prev_translation;
+}
+
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0)
+static void
+memory_hotplug_cb(enum rte_mem_event event_type,
+		  const void *addr, size_t len, void *arg)
+{
+	if (event_type == RTE_MEM_EVENT_ALLOC) {
+		while (len > 0) {
+			struct rte_memseg *seg;
+
+			seg = rte_mem_virt2memseg(addr, NULL);
+			assert(seg != NULL);
+			assert(len >= seg->hugepage_sz);
+
+			spdk_mem_register((void *)seg->addr, seg->hugepage_sz);
+			addr = (void *)((uintptr_t)addr + seg->hugepage_sz);
+			len -= seg->hugepage_sz;
+		}
+	} else if (event_type == RTE_MEM_EVENT_FREE) {
+		spdk_mem_unregister((void *)addr, len);
+	}
+}
+
+static int
+memory_iter_cb(const struct rte_memseg_list *msl,
+	       const struct rte_memseg *ms, size_t len, void *arg)
+{
+	return spdk_mem_register(ms->addr, len);
+}
+#endif
+
+int
+spdk_mem_map_init(void)
+{
+	g_mem_reg_map = spdk_mem_map_alloc(0, NULL, NULL);
+	if (g_mem_reg_map == NULL) {
+		DEBUG_PRINT("memory registration map allocation failed\n");
+		return -1;
+	}
+
+	/*
+	 * Walk all DPDK memory segments and register them
+	 * with the master memory map
+	 */
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0)
+	rte_mem_event_callback_register("spdk", memory_hotplug_cb, NULL);
+	rte_memseg_contig_walk(memory_iter_cb, NULL);
+#else
+	struct rte_mem_config *mcfg;
+	size_t seg_idx;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) {
+		struct rte_memseg *seg = &mcfg->memseg[seg_idx];
+
+		if (seg->addr == NULL) {
+			break;
+		}
+
+		spdk_mem_register(seg->addr, seg->len);
+	}
+#endif
+	return 0;
+}
diff --git a/src/spdk/lib/env_dpdk/pci.c b/src/spdk/lib/env_dpdk/pci.c
new file mode 100644
index 00000000..4153ac93
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/pci.c
@@ -0,0 +1,551 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "env_internal.h"
+
+#include "spdk/env.h"
+
+#define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
+
+#define PCI_CFG_SIZE		256
+#define PCI_EXT_CAP_ID_SN	0x03
+
+int
+spdk_pci_device_init(struct rte_pci_driver *driver,
+		     struct rte_pci_device *device)
+{
+	struct spdk_pci_enum_ctx *ctx = (struct spdk_pci_enum_ctx *)driver;
+	int rc;
+
+	if (!ctx->cb_fn) {
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+		rte_pci_unmap_device(device);
+#elif RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+		rte_eal_pci_unmap_device(device);
+#endif
+
+		/* Return a positive value to indicate that this device does not belong to this driver, but
+		 * this isn't an error. */
+		return 1;
+	}
+
+	rc = ctx->cb_fn(ctx->cb_arg, (struct spdk_pci_device *)device);
+	if (rc != 0) {
+		return rc;
+	}
+
+	spdk_vtophys_pci_device_added(device);
+	return 0;
+}
+
+int
+spdk_pci_device_fini(struct rte_pci_device *device)
+{
+	spdk_vtophys_pci_device_removed(device);
+	return 0;
+}
+
+void
+spdk_pci_device_detach(struct spdk_pci_device *device)
+{
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+#if RTE_VERSION < RTE_VERSION_NUM(17, 05, 0, 0)
+	rte_eal_device_remove(&device->device);
+#endif
+#endif
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	struct spdk_pci_addr	addr;
+	char			bdf[32];
+
+	addr.domain = device->addr.domain;
+	addr.bus = device->addr.bus;
+	addr.dev = device->addr.devid;
+	addr.func = device->addr.function;
+
+	spdk_pci_addr_fmt(bdf, sizeof(bdf), &addr);
+	if (rte_eal_dev_detach(&device->device) < 0) {
+		fprintf(stderr, "Failed to detach PCI device %s (device already removed?).\n", bdf);
+	}
+#elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+	rte_pci_detach(&device->addr);
+#else
+	rte_eal_pci_detach(&device->addr);
+#endif
+}
+
+int
+spdk_pci_device_attach(struct spdk_pci_enum_ctx *ctx,
+		       spdk_pci_enum_cb enum_cb,
+		       void *enum_ctx, struct spdk_pci_addr *pci_address)
+{
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	char				bdf[32];
+
+	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
+#else
+	struct rte_pci_addr		addr;
+
+	addr.domain = pci_address->domain;
+	addr.bus = pci_address->bus;
+	addr.devid = pci_address->dev;
+	addr.function = pci_address->func;
+#endif
+
+	pthread_mutex_lock(&ctx->mtx);
+
+	if (!ctx->is_registered) {
+		ctx->is_registered = true;
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+		rte_pci_register(&ctx->driver);
+#else
+		rte_eal_pci_register(&ctx->driver);
+#endif
+	}
+
+	ctx->cb_fn = enum_cb;
+	ctx->cb_arg = enum_ctx;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	if (rte_eal_dev_attach(bdf, "") != 0) {
+#elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+	if (rte_pci_probe_one(&addr) != 0) {
+#else
+	if (rte_eal_pci_probe_one(&addr) != 0) {
+#endif
+		ctx->cb_arg = NULL;
+		ctx->cb_fn = NULL;
+		pthread_mutex_unlock(&ctx->mtx);
+		return -1;
+	}
+
+	ctx->cb_arg = NULL;
+	ctx->cb_fn = NULL;
+	pthread_mutex_unlock(&ctx->mtx);
+
+	return 0;
+}
+
+/* Note: You can call spdk_pci_enumerate from more than one thread
+ *       simultaneously safely, but you cannot call spdk_pci_enumerate
+ *       and rte_eal_pci_probe simultaneously.
+ */
+int
+spdk_pci_enumerate(struct spdk_pci_enum_ctx *ctx,
+		   spdk_pci_enum_cb enum_cb,
+		   void *enum_ctx)
+{
+	pthread_mutex_lock(&ctx->mtx);
+
+	if (!ctx->is_registered) {
+		ctx->is_registered = true;
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+		rte_pci_register(&ctx->driver);
+#else
+		rte_eal_pci_register(&ctx->driver);
+#endif
+	}
+
+	ctx->cb_fn = enum_cb;
+	ctx->cb_arg = enum_ctx;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	if (rte_bus_probe() != 0) {
+#elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+	if (rte_pci_probe() != 0) {
+#else
+	if (rte_eal_pci_probe() != 0) {
+#endif
+		ctx->cb_arg = NULL;
+		ctx->cb_fn = NULL;
+		pthread_mutex_unlock(&ctx->mtx);
+		return -1;
+	}
+
+	ctx->cb_arg = NULL;
+	ctx->cb_fn = NULL;
+	pthread_mutex_unlock(&ctx->mtx);
+
+	return 0;
+}
+
+int
+spdk_pci_device_map_bar(struct spdk_pci_device *device, uint32_t bar,
+			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
+{
+	struct rte_pci_device *dev = device;
+
+	*mapped_addr = dev->mem_resource[bar].addr;
+	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
+	*size = (uint64_t)dev->mem_resource[bar].len;
+
+	return 0;
+}
+
+int
+spdk_pci_device_unmap_bar(struct spdk_pci_device *device, uint32_t bar, void *addr)
+{
+	return 0;
+}
+
+uint32_t
+spdk_pci_device_get_domain(struct spdk_pci_device *dev)
+{
+	return dev->addr.domain;
+}
+
+uint8_t
+spdk_pci_device_get_bus(struct spdk_pci_device *dev)
+{
+	return dev->addr.bus;
+}
+
+uint8_t
+spdk_pci_device_get_dev(struct spdk_pci_device *dev)
+{
+	return dev->addr.devid;
+}
+
+uint8_t
+spdk_pci_device_get_func(struct spdk_pci_device *dev)
+{
+	return dev->addr.function;
+}
+
+uint16_t
+spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
+{
+	return dev->id.vendor_id;
+}
+
+uint16_t
+spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
+{
+	return dev->id.device_id;
+}
+
+uint16_t
+spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
+{
+	return dev->id.subsystem_vendor_id;
+}
+
+uint16_t
+spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
+{
+	return dev->id.subsystem_device_id;
+}
+
+struct spdk_pci_id
+spdk_pci_device_get_id(struct spdk_pci_device *pci_dev)
+{
+	struct spdk_pci_id pci_id;
+
+	pci_id.vendor_id = spdk_pci_device_get_vendor_id(pci_dev);
+	pci_id.device_id = spdk_pci_device_get_device_id(pci_dev);
+	pci_id.subvendor_id = spdk_pci_device_get_subvendor_id(pci_dev);
+	pci_id.subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev);
+
+	return pci_id;
+}
+
+int
+spdk_pci_device_get_socket_id(struct spdk_pci_device *pci_dev)
+{
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+	return pci_dev->device.numa_node;
+#else
+	return pci_dev->numa_node;
+#endif
+}
+
+int
+spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
+{
+	int rc;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+	rc = rte_pci_read_config(dev, value, len, offset);
+#else
+	rc = rte_eal_pci_read_config(dev, value, len, offset);
+#endif
+	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
+}
+
+int
+spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
+{
+	int rc;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
+	rc = rte_pci_write_config(dev, value, len, offset);
+#else
+	rc = rte_eal_pci_write_config(dev, value, len, offset);
+#endif
+	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
+}
+
+int
+spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_read(dev, value, 1, offset);
+}
+
+int
+spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
+}
+
+int
+spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_read(dev, value, 2, offset);
+}
+
+int
+spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
+}
+
+int
+spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_read(dev, value, 4, offset);
+}
+
+int
+spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
+{
+	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
+}
+
+int
+spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
+{
+	int err;
+	uint32_t pos, header = 0;
+	uint32_t i, buf[2];
+
+	if (len < 17) {
+		return -1;
+	}
+
+	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
+	if (err || !header) {
+		return -1;
+	}
+
+	pos = PCI_CFG_SIZE;
+	while (1) {
+		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
+			if (pos) {
+				/* skip the header */
+				pos += 4;
+				for (i = 0; i < 2; i++) {
+					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
+					if (err) {
+						return -1;
+					}
+				}
+				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
+				return 0;
+			}
+		}
+		pos = (header >> 20) & 0xffc;
+		/* 0 if no other items exist */
+		if (pos < PCI_CFG_SIZE) {
+			return -1;
+		}
+		err = spdk_pci_device_cfg_read32(dev, &header, pos);
+		if (err) {
+			return -1;
+		}
+	}
+	return -1;
+}
+
+struct spdk_pci_addr
+spdk_pci_device_get_addr(struct spdk_pci_device *pci_dev)
+{
+	struct spdk_pci_addr pci_addr;
+
+	pci_addr.domain = spdk_pci_device_get_domain(pci_dev);
+	pci_addr.bus = spdk_pci_device_get_bus(pci_dev);
+	pci_addr.dev = spdk_pci_device_get_dev(pci_dev);
+	pci_addr.func = spdk_pci_device_get_func(pci_dev);
+
+	return pci_addr;
+}
+
+int
+spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
+{
+	if (a1->domain > a2->domain) {
+		return 1;
+	} else if (a1->domain < a2->domain) {
+		return -1;
+	} else if (a1->bus > a2->bus) {
+		return 1;
+	} else if (a1->bus < a2->bus) {
+		return -1;
+	} else if (a1->dev > a2->dev) {
+		return 1;
+	} else if (a1->dev < a2->dev) {
+		return -1;
+	} else if (a1->func > a2->func) {
+		return 1;
+	} else if (a1->func < a2->func) {
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef __linux__
+int
+spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
+{
+	int dev_fd;
+	char dev_name[64];
+	int pid;
+	void *dev_map;
+	struct flock pcidev_lock = {
+		.l_type = F_WRLCK,
+		.l_whence = SEEK_SET,
+		.l_start = 0,
+		.l_len = 0,
+	};
+
+	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", pci_addr->domain,
+		 pci_addr->bus,
+		 pci_addr->dev, pci_addr->func);
+
+	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+	if (dev_fd == -1) {
+		fprintf(stderr, "could not open %s\n", dev_name);
+		return -1;
+	}
+
+	if (ftruncate(dev_fd, sizeof(int)) != 0) {
+		fprintf(stderr, "could not truncate %s\n", dev_name);
+		close(dev_fd);
+		return -1;
+	}
+
+	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
+		       MAP_SHARED, dev_fd, 0);
+	if (dev_map == MAP_FAILED) {
+		fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno);
+		close(dev_fd);
+		return -1;
+	}
+
+	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
+		pid = *(int *)dev_map;
+		fprintf(stderr, "Cannot create lock on device %s, probably"
+			" process %d has claimed it\n", dev_name, pid);
+		munmap(dev_map, sizeof(int));
+		close(dev_fd);
+		return -1;
+	}
+
+	*(int *)dev_map = (int)getpid();
+	munmap(dev_map, sizeof(int));
+	/* Keep dev_fd open to maintain the lock. */
+	return dev_fd;
+}
+#endif /* __linux__ */
+
+#ifdef __FreeBSD__
+int
+spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
+{
+	/* TODO */
+	return 0;
+}
+#endif /* __FreeBSD__ */
+
+int
+spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
+{
+	unsigned domain, bus, dev, func;
+
+	if (addr == NULL || bdf == NULL) {
+		return -EINVAL;
+	}
+
+	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
+	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
+		/* Matched a full address - all variables are initialized */
+	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
+		func = 0;
+	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
+		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
+		domain = 0;
+	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
+		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
+		domain = 0;
+		func = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	if (bus > 0xFF || dev > 0x1F || func > 7) {
+		return -EINVAL;
+	}
+
+	addr->domain = domain;
+	addr->bus = bus;
+	addr->dev = dev;
+	addr->func = func;
+
+	return 0;
+}
+
+int
+spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
+{
+	int rc;
+
+	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
+		      addr->domain, addr->bus,
+		      addr->dev, addr->func);
+
+	if (rc > 0 && (size_t)rc < sz) {
+		return 0;
+	}
+
+	return -1;
+}
diff --git a/src/spdk/lib/env_dpdk/pci_ioat.c b/src/spdk/lib/env_dpdk/pci_ioat.c
new file mode 100644
index 00000000..b9640283
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/pci_ioat.c
@@ -0,0 +1,123 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "env_internal.h"
+
+#include "spdk/pci_ids.h"
+
+#define SPDK_IOAT_PCI_DEVICE(DEVICE_ID) RTE_PCI_DEVICE(SPDK_PCI_VID_INTEL, DEVICE_ID)
+static struct rte_pci_id ioat_driver_id[] = {
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB4)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB5)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB6)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB7)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SNB8)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB4)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB5)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB6)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB7)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB8)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_IVB9)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW4)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW5)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW6)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW7)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW8)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_HSW9)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BWD0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BWD1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BWD2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BWD3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDXDE0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDXDE1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDXDE2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDXDE3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX0)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX1)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX2)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX3)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX4)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX5)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX6)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX7)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX8)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_BDX9)},
+	{SPDK_IOAT_PCI_DEVICE(PCI_DEVICE_ID_INTEL_IOAT_SKX)},
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct spdk_pci_enum_ctx g_ioat_pci_drv = {
+	.driver = {
+		.drv_flags	= RTE_PCI_DRV_NEED_MAPPING,
+		.id_table	= ioat_driver_id,
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+		.probe		= spdk_pci_device_init,
+		.remove		= spdk_pci_device_fini,
+		.driver.name	= "spdk_ioat",
+#else
+		.devinit	= spdk_pci_device_init,
+		.devuninit	= spdk_pci_device_fini,
+		.name		= "spdk_ioat",
+#endif
+	},
+
+	.cb_fn = NULL,
+	.cb_arg = NULL,
+	.mtx = PTHREAD_MUTEX_INITIALIZER,
+	.is_registered = false,
+};
+
+int
+spdk_pci_ioat_device_attach(spdk_pci_enum_cb enum_cb, void *enum_ctx,
+			    struct spdk_pci_addr *pci_address)
+{
+	return spdk_pci_device_attach(&g_ioat_pci_drv, enum_cb, enum_ctx, pci_address);
+}
+
+int
+spdk_pci_ioat_enumerate(spdk_pci_enum_cb enum_cb, void *enum_ctx)
+{
+	return spdk_pci_enumerate(&g_ioat_pci_drv, enum_cb, enum_ctx);
+}
diff --git a/src/spdk/lib/env_dpdk/pci_nvme.c b/src/spdk/lib/env_dpdk/pci_nvme.c
new file mode 100644
index 00000000..4f3b84d1
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/pci_nvme.c
@@ -0,0 +1,89 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "env_internal.h"
+
+#include "spdk/pci_ids.h"
+
+static struct rte_pci_id nvme_pci_driver_id[] = {
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 1)
+	{
+		.class_id = SPDK_PCI_CLASS_NVME,
+		.vendor_id = PCI_ANY_ID,
+		.device_id = PCI_ANY_ID,
+		.subsystem_vendor_id = PCI_ANY_ID,
+		.subsystem_device_id = PCI_ANY_ID,
+	},
+#else
+	{RTE_PCI_DEVICE(0x8086, 0x0953)},
+#endif
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct spdk_pci_enum_ctx g_nvme_pci_drv = {
+	.driver = {
+		.drv_flags	= RTE_PCI_DRV_NEED_MAPPING
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 8, 0, 0)
+		| RTE_PCI_DRV_WC_ACTIVATE
+#endif
+		,
+		.id_table	= nvme_pci_driver_id,
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+		.probe		= spdk_pci_device_init,
+		.remove		= spdk_pci_device_fini,
+		.driver.name	= "spdk_nvme",
+#else
+		.devinit	= spdk_pci_device_init,
+		.devuninit	= spdk_pci_device_fini,
+		.name		= "spdk_nvme",
+#endif
+	},
+
+	.cb_fn = NULL,
+	.cb_arg = NULL,
+	.mtx = PTHREAD_MUTEX_INITIALIZER,
+	.is_registered = false,
+};
+
+int
+spdk_pci_nvme_device_attach(spdk_pci_enum_cb enum_cb,
+			    void *enum_ctx, struct spdk_pci_addr *pci_address)
+{
+	return spdk_pci_device_attach(&g_nvme_pci_drv, enum_cb, enum_ctx, pci_address);
+}
+
+int
+spdk_pci_nvme_enumerate(spdk_pci_enum_cb enum_cb, void *enum_ctx)
+{
+	return spdk_pci_enumerate(&g_nvme_pci_drv, enum_cb, enum_ctx);
+}
diff --git a/src/spdk/lib/env_dpdk/pci_virtio.c b/src/spdk/lib/env_dpdk/pci_virtio.c
new file mode 100644
index 00000000..1fcb80d7
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/pci_virtio.c
@@ -0,0 +1,80 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "env_internal.h"
+
+#include "spdk/pci_ids.h"
+
+static struct rte_pci_id virtio_pci_driver_id[] = {
+	{ RTE_PCI_DEVICE(SPDK_PCI_VID_VIRTIO, PCI_DEVICE_ID_VIRTIO_SCSI_MODERN) },
+	{ RTE_PCI_DEVICE(SPDK_PCI_VID_VIRTIO, PCI_DEVICE_ID_VIRTIO_BLK_MODERN) },
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct spdk_pci_enum_ctx g_virtio_pci_drv = {
+	.driver = {
+		.drv_flags	= RTE_PCI_DRV_NEED_MAPPING
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 8, 0, 0)
+		| RTE_PCI_DRV_WC_ACTIVATE
+#endif
+		,
+		.id_table	= virtio_pci_driver_id,
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
+		.probe		= spdk_pci_device_init,
+		.remove		= spdk_pci_device_fini,
+		.driver.name	= "spdk_virtio",
+#else
+		.devinit	= spdk_pci_device_init,
+		.devuninit	= spdk_pci_device_fini,
+		.name		= "spdk_virtio",
+#endif
+	},
+
+	.cb_fn = NULL,
+	.cb_arg = NULL,
+	.mtx = PTHREAD_MUTEX_INITIALIZER,
+	.is_registered = false,
+};
+
+int
+spdk_pci_virtio_device_attach(spdk_pci_enum_cb enum_cb,
+			      void *enum_ctx, struct spdk_pci_addr *pci_address)
+{
+	return spdk_pci_device_attach(&g_virtio_pci_drv, enum_cb, enum_ctx, pci_address);
+}
+
+int
+spdk_pci_virtio_enumerate(spdk_pci_enum_cb enum_cb, void *enum_ctx)
+{
+	return spdk_pci_enumerate(&g_virtio_pci_drv, enum_cb, enum_ctx);
+}
diff --git a/src/spdk/lib/env_dpdk/threads.c b/src/spdk/lib/env_dpdk/threads.c
new file mode 100644
index 00000000..55b0bbb6
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/threads.c
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+
+#include <rte_config.h>
+#include <rte_lcore.h>
+
+uint32_t
+spdk_env_get_core_count(void)
+{
+	return rte_lcore_count();
+}
+
+uint32_t
+spdk_env_get_current_core(void)
+{
+	return rte_lcore_id();
+}
+
+uint32_t
+spdk_env_get_first_core(void)
+{
+	return rte_get_next_lcore(-1, 0, 0);
+}
+
+uint32_t
+spdk_env_get_last_core(void)
+{
+	uint32_t i;
+	uint32_t last_core = UINT32_MAX;
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		last_core = i;
+	}
+
+	assert(last_core != UINT32_MAX);
+
+	return last_core;
+}
+
+uint32_t
+spdk_env_get_next_core(uint32_t prev_core)
+{
+	unsigned lcore;
+
+	lcore = rte_get_next_lcore(prev_core, 0, 0);
+	if (lcore == RTE_MAX_LCORE) {
+		return UINT32_MAX;
+	}
+	return lcore;
+}
+
+uint32_t
+spdk_env_get_socket_id(uint32_t core)
+{
+	if (core >= RTE_MAX_LCORE) {
+		return SPDK_ENV_SOCKET_ID_ANY;
+	}
+
+	return rte_lcore_to_socket_id(core);
+}
+
+int
+spdk_env_thread_launch_pinned(uint32_t core, thread_start_fn fn, void *arg)
+{
+	int rc;
+
+	rc = rte_eal_remote_launch(fn, arg, core);
+
+	return rc;
+}
+
+void
+spdk_env_thread_wait_all(void)
+{
+	rte_eal_mp_wait_lcore();
+}
diff --git a/src/spdk/lib/env_dpdk/vtophys.c b/src/spdk/lib/env_dpdk/vtophys.c
new file mode 100644
index 00000000..00e8bb6d
--- /dev/null
+++ b/src/spdk/lib/env_dpdk/vtophys.c
@@ -0,0 +1,691 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "env_internal.h"
+
+#include <rte_config.h>
+#include <rte_eal_memconfig.h>
+
+#include "spdk_internal/assert.h"
+
+#include "spdk/assert.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+#ifdef __FreeBSD__
+#define SPDK_VFIO_ENABLED 0
+#else
+#include <linux/version.h>
+/*
+ * DPDK versions before 17.11 don't provide a way to get VFIO information in the public API,
+ * and we can't link to internal symbols when built against shared library DPDK,
+ * so disable VFIO entirely in that case.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) && \
+    (RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3) || !defined(RTE_BUILD_SHARED_LIB))
+
+#define SPDK_VFIO_ENABLED 1
+#include <linux/vfio.h>
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+#include <rte_vfio.h>
+#else
+/* Internal DPDK function forward declaration */
+int pci_vfio_is_enabled(void);
+#endif
+
+struct spdk_vfio_dma_map {
+	struct vfio_iommu_type1_dma_map map;
+	struct vfio_iommu_type1_dma_unmap unmap;
+	TAILQ_ENTRY(spdk_vfio_dma_map) tailq;
+};
+
+struct vfio_cfg {
+	int fd;
+	bool enabled;
+	unsigned device_ref;
+	TAILQ_HEAD(, spdk_vfio_dma_map) maps;
+	pthread_mutex_t mutex;
+};
+
+static struct vfio_cfg g_vfio = {
+	.fd = -1,
+	.enabled = false,
+	.device_ref = 0,
+	.maps = TAILQ_HEAD_INITIALIZER(g_vfio.maps),
+	.mutex = PTHREAD_MUTEX_INITIALIZER
+};
+
+#else
+#define SPDK_VFIO_ENABLED 0
+#endif
+#endif
+
+#if DEBUG
+#define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DEBUG_PRINT(...)
+#endif
+
+struct spdk_vtophys_pci_device {
+	struct rte_pci_device *pci_device;
+	TAILQ_ENTRY(spdk_vtophys_pci_device) tailq;
+	uint64_t ref;
+};
+
+static pthread_mutex_t g_vtophys_pci_devices_mutex = PTHREAD_MUTEX_INITIALIZER;
+static TAILQ_HEAD(, spdk_vtophys_pci_device) g_vtophys_pci_devices =
+	TAILQ_HEAD_INITIALIZER(g_vtophys_pci_devices);
+
+static struct spdk_mem_map *g_vtophys_map;
+
+#if SPDK_VFIO_ENABLED
+static int
+vtophys_iommu_map_dma(uint64_t vaddr, uint64_t iova, uint64_t size)
+{
+	struct spdk_vfio_dma_map *dma_map;
+	int ret;
+
+	dma_map = calloc(1, sizeof(*dma_map));
+	if (dma_map == NULL) {
+		return -ENOMEM;
+	}
+
+	dma_map->map.argsz = sizeof(dma_map->map);
+	dma_map->map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+	dma_map->map.vaddr = vaddr;
+	dma_map->map.iova = iova;
+	dma_map->map.size = size;
+
+	dma_map->unmap.argsz = sizeof(dma_map->unmap);
+	dma_map->unmap.flags = 0;
+	dma_map->unmap.iova = iova;
+	dma_map->unmap.size = size;
+
+	pthread_mutex_lock(&g_vfio.mutex);
+	if (g_vfio.device_ref == 0) {
+		/* VFIO requires at least one device (IOMMU group) to be added to
+		 * a VFIO container before it is possible to perform any IOMMU
+		 * operations on that container. This memory will be mapped once
+		 * the first device (IOMMU group) is hotplugged.
+		 *
+		 * Since the vfio container is managed internally by DPDK, it is
+		 * also possible that some device is already in that container, but
+		 * it's not managed by SPDK -  e.g. an NIC attached internally
+		 * inside DPDK. We could map the memory straight away in such
+		 * scenario, but there's no need to do it. DPDK devices clearly
+		 * don't need our mappings and hence we defer the mapping
+		 * unconditionally until the first SPDK-managed device is
+		 * hotplugged.
+		 */
+		goto out_insert;
+	}
+
+	ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map);
+	if (ret) {
+		DEBUG_PRINT("Cannot set up DMA mapping, error %d\n", errno);
+		pthread_mutex_unlock(&g_vfio.mutex);
+		free(dma_map);
+		return ret;
+	}
+
+out_insert:
+	TAILQ_INSERT_TAIL(&g_vfio.maps, dma_map, tailq);
+	pthread_mutex_unlock(&g_vfio.mutex);
+	return 0;
+}
+
+static int
+vtophys_iommu_unmap_dma(uint64_t iova, uint64_t size)
+{
+	struct spdk_vfio_dma_map *dma_map;
+	int ret;
+
+	pthread_mutex_lock(&g_vfio.mutex);
+	TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
+		if (dma_map->map.iova == iova) {
+			break;
+		}
+	}
+
+	if (dma_map == NULL) {
+		DEBUG_PRINT("Cannot clear DMA mapping for IOVA %"PRIx64" - it's not mapped\n", iova);
+		pthread_mutex_unlock(&g_vfio.mutex);
+		return -ENXIO;
+	}
+
+	/** don't support partial or multiple-page unmap for now */
+	assert(dma_map->map.size == size);
+
+	if (g_vfio.device_ref == 0) {
+		/* Memory is not mapped anymore, just remove it's references */
+		goto out_remove;
+	}
+
+
+	ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_map->unmap);
+	if (ret) {
+		DEBUG_PRINT("Cannot clear DMA mapping, error %d\n", errno);
+		pthread_mutex_unlock(&g_vfio.mutex);
+		return ret;
+	}
+
+out_remove:
+	TAILQ_REMOVE(&g_vfio.maps, dma_map, tailq);
+	pthread_mutex_unlock(&g_vfio.mutex);
+	free(dma_map);
+	return 0;
+}
+#endif
+
+static uint64_t
+vtophys_get_paddr_memseg(uint64_t vaddr)
+{
+	uintptr_t paddr;
+	struct rte_memseg *seg;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0)
+	seg = rte_mem_virt2memseg((void *)(uintptr_t)vaddr, NULL);
+	if (seg != NULL) {
+		paddr = seg->phys_addr;
+		if (paddr == RTE_BAD_IOVA) {
+			return SPDK_VTOPHYS_ERROR;
+		}
+		paddr += (vaddr - (uintptr_t)seg->addr);
+		return paddr;
+	}
+#else
+	struct rte_mem_config *mcfg;
+	uint32_t seg_idx;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) {
+		seg = &mcfg->memseg[seg_idx];
+		if (seg->addr == NULL) {
+			break;
+		}
+
+		if (vaddr >= (uintptr_t)seg->addr &&
+		    vaddr < ((uintptr_t)seg->addr + seg->len)) {
+			paddr = seg->phys_addr;
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+			if (paddr == RTE_BAD_IOVA) {
+#else
+			if (paddr == RTE_BAD_PHYS_ADDR) {
+#endif
+				return SPDK_VTOPHYS_ERROR;
+			}
+			paddr += (vaddr - (uintptr_t)seg->addr);
+			return paddr;
+		}
+	}
+#endif
+
+	return SPDK_VTOPHYS_ERROR;
+}
+
+/* Try to get the paddr from /proc/self/pagemap */
+static uint64_t
+vtophys_get_paddr_pagemap(uint64_t vaddr)
+{
+	uintptr_t paddr;
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+#define BAD_ADDR RTE_BAD_IOVA
+#define VTOPHYS rte_mem_virt2iova
+#else
+#define BAD_ADDR RTE_BAD_PHYS_ADDR
+#define VTOPHYS rte_mem_virt2phy
+#endif
+
+	/*
+	 * Note: the virt2phy/virt2iova functions have changed over time, such
+	 * that older versions may return 0 while recent versions will never
+	 * return 0 but RTE_BAD_PHYS_ADDR/IOVA instead.  To support older and
+	 * newer versions, check for both return values.
+	 */
+	paddr = VTOPHYS((void *)vaddr);
+	if (paddr == 0 || paddr == BAD_ADDR) {
+		/*
+		 * The vaddr may be valid but doesn't have a backing page
+		 * assigned yet.  Touch the page to ensure a backing page
+		 * gets assigned, then try to translate again.
+		 */
+		rte_atomic64_read((rte_atomic64_t *)vaddr);
+		paddr = VTOPHYS((void *)vaddr);
+	}
+	if (paddr == 0 || paddr == BAD_ADDR) {
+		/* Unable to get to the physical address. */
+		return SPDK_VTOPHYS_ERROR;
+	}
+
+#undef BAD_ADDR
+#undef VTOPHYS
+
+	return paddr;
+}
+
+/* Try to get the paddr from pci devices */
+static uint64_t
+vtophys_get_paddr_pci(uint64_t vaddr)
+{
+	struct spdk_vtophys_pci_device *vtophys_dev;
+	uintptr_t paddr;
+	struct rte_pci_device	*dev;
+#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 1)
+	struct rte_mem_resource *res;
+#else
+	struct rte_pci_resource *res;
+#endif
+	unsigned r;
+
+	pthread_mutex_lock(&g_vtophys_pci_devices_mutex);
+	TAILQ_FOREACH(vtophys_dev, &g_vtophys_pci_devices, tailq) {
+		dev = vtophys_dev->pci_device;
+
+		for (r = 0; r < PCI_MAX_RESOURCE; r++) {
+			res = &dev->mem_resource[r];
+			if (res->phys_addr && vaddr >= (uint64_t)res->addr &&
+			    vaddr < (uint64_t)res->addr + res->len) {
+				paddr = res->phys_addr + (vaddr - (uint64_t)res->addr);
+				DEBUG_PRINT("%s: %p -> %p\n", __func__, (void *)vaddr,
+					    (void *)paddr);
+				pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
+				return paddr;
+			}
+		}
+	}
+	pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
+
+	return  SPDK_VTOPHYS_ERROR;
+}
+
+static int
+spdk_vtophys_notify(void *cb_ctx, struct spdk_mem_map *map,
+		    enum spdk_mem_map_notify_action action,
+		    void *vaddr, size_t len)
+{
+	int rc = 0, pci_phys = 0;
+	uint64_t paddr;
+
+	if ((uintptr_t)vaddr & ~MASK_256TB) {
+		DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr);
+		return -EINVAL;
+	}
+
+	if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) {
+		DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n",
+			    __func__, vaddr, len);
+		return -EINVAL;
+	}
+
+	while (len > 0) {
+		/* Get the physical address from the DPDK memsegs */
+		paddr = vtophys_get_paddr_memseg((uint64_t)vaddr);
+
+		switch (action) {
+		case SPDK_MEM_MAP_NOTIFY_REGISTER:
+			if (paddr == SPDK_VTOPHYS_ERROR) {
+				/* This is not an address that DPDK is managing. */
+#if SPDK_VFIO_ENABLED
+				if (g_vfio.enabled) {
+					/* We'll use the virtual address as the iova. DPDK
+					 * currently uses physical addresses as the iovas (or counts
+					 * up from 0 if it can't get physical addresses), so
+					 * the range of user space virtual addresses and physical
+					 * addresses will never overlap.
+					 */
+					paddr = (uint64_t)vaddr;
+					rc = vtophys_iommu_map_dma((uint64_t)vaddr, paddr, VALUE_2MB);
+					if (rc) {
+						return -EFAULT;
+					}
+				} else
+#endif
+				{
+					/* Get the physical address from /proc/self/pagemap. */
+					paddr = vtophys_get_paddr_pagemap((uint64_t)vaddr);
+					if (paddr == SPDK_VTOPHYS_ERROR) {
+						/* Get the physical address from PCI devices */
+						paddr = vtophys_get_paddr_pci((uint64_t)vaddr);
+						if (paddr == SPDK_VTOPHYS_ERROR) {
+							DEBUG_PRINT("could not get phys addr for %p\n", vaddr);
+							return -EFAULT;
+						}
+						pci_phys = 1;
+					}
+				}
+			}
+			/* Since PCI paddr can break the 2MiB physical alignment skip this check for that. */
+			if (!pci_phys && (paddr & MASK_2MB)) {
+				DEBUG_PRINT("invalid paddr 0x%" PRIx64 " - must be 2MB aligned\n", paddr);
+				return -EINVAL;
+			}
+
+			rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, VALUE_2MB, paddr);
+			break;
+		case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
+#if SPDK_VFIO_ENABLED
+			if (paddr == SPDK_VTOPHYS_ERROR) {
+				/*
+				 * This is not an address that DPDK is managing. If vfio is enabled,
+				 * we need to unmap the range from the IOMMU
+				 */
+				if (g_vfio.enabled) {
+					uint64_t buffer_len;
+					paddr = spdk_mem_map_translate(map, (uint64_t)vaddr, &buffer_len);
+					if (buffer_len != VALUE_2MB) {
+						return -EINVAL;
+					}
+					rc = vtophys_iommu_unmap_dma(paddr, VALUE_2MB);
+					if (rc) {
+						return -EFAULT;
+					}
+				}
+			}
+#endif
+			rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, VALUE_2MB);
+			break;
+		default:
+			SPDK_UNREACHABLE();
+		}
+
+		if (rc != 0) {
+			return rc;
+		}
+		vaddr += VALUE_2MB;
+		len -= VALUE_2MB;
+	}
+
+	return rc;
+}
+
+#if SPDK_VFIO_ENABLED
+
+static bool
+spdk_vfio_enabled(void)
+{
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	return rte_vfio_is_enabled("vfio_pci");
+#else
+	return pci_vfio_is_enabled();
+#endif
+}
+
+static void
+spdk_vtophys_iommu_init(void)
+{
+	char proc_fd_path[PATH_MAX + 1];
+	char link_path[PATH_MAX + 1];
+	const char vfio_path[] = "/dev/vfio/vfio";
+	DIR *dir;
+	struct dirent *d;
+
+	if (!spdk_vfio_enabled()) {
+		return;
+	}
+
+	dir = opendir("/proc/self/fd");
+	if (!dir) {
+		DEBUG_PRINT("Failed to open /proc/self/fd (%d)\n", errno);
+		return;
+	}
+
+	while ((d = readdir(dir)) != NULL) {
+		if (d->d_type != DT_LNK) {
+			continue;
+		}
+
+		snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%s", d->d_name);
+		if (readlink(proc_fd_path, link_path, sizeof(link_path)) != (sizeof(vfio_path) - 1)) {
+			continue;
+		}
+
+		if (memcmp(link_path, vfio_path, sizeof(vfio_path) - 1) == 0) {
+			sscanf(d->d_name, "%d", &g_vfio.fd);
+			break;
+		}
+	}
+
+	closedir(dir);
+
+	if (g_vfio.fd < 0) {
+		DEBUG_PRINT("Failed to discover DPDK VFIO container fd.\n");
+		return;
+	}
+
+	g_vfio.enabled = true;
+
+	return;
+}
+#endif
+
+void
+spdk_vtophys_pci_device_added(struct rte_pci_device *pci_device)
+{
+	struct spdk_vtophys_pci_device *vtophys_dev;
+	bool found = false;
+
+	pthread_mutex_lock(&g_vtophys_pci_devices_mutex);
+	TAILQ_FOREACH(vtophys_dev, &g_vtophys_pci_devices, tailq) {
+		if (vtophys_dev->pci_device == pci_device) {
+			vtophys_dev->ref++;
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		vtophys_dev = calloc(1, sizeof(*vtophys_dev));
+		if (vtophys_dev) {
+			vtophys_dev->pci_device = pci_device;
+			vtophys_dev->ref = 1;
+			TAILQ_INSERT_TAIL(&g_vtophys_pci_devices, vtophys_dev, tailq);
+		} else {
+			DEBUG_PRINT("Memory allocation error\n");
+		}
+	}
+	pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
+
+#if SPDK_VFIO_ENABLED
+	struct spdk_vfio_dma_map *dma_map;
+	int ret;
+
+	if (!g_vfio.enabled) {
+		return;
+	}
+
+	pthread_mutex_lock(&g_vfio.mutex);
+	g_vfio.device_ref++;
+	if (g_vfio.device_ref > 1) {
+		pthread_mutex_unlock(&g_vfio.mutex);
+		return;
+	}
+
+	/* This is the first SPDK device using DPDK vfio. This means that the first
+	 * IOMMU group might have been just been added to the DPDK vfio container.
+	 * From this point it is certain that the memory can be mapped now.
+	 */
+	TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
+		ret = ioctl(g_vfio.fd, VFIO_IOMMU_MAP_DMA, &dma_map->map);
+		if (ret) {
+			DEBUG_PRINT("Cannot update DMA mapping, error %d\n", errno);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_vfio.mutex);
+#endif
+}
+
+void
+spdk_vtophys_pci_device_removed(struct rte_pci_device *pci_device)
+{
+	struct spdk_vtophys_pci_device *vtophys_dev;
+
+	pthread_mutex_lock(&g_vtophys_pci_devices_mutex);
+	TAILQ_FOREACH(vtophys_dev, &g_vtophys_pci_devices, tailq) {
+		if (vtophys_dev->pci_device == pci_device) {
+			assert(vtophys_dev->ref > 0);
+			if (--vtophys_dev->ref == 0) {
+				TAILQ_REMOVE(&g_vtophys_pci_devices, vtophys_dev, tailq);
+				free(vtophys_dev);
+			}
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_vtophys_pci_devices_mutex);
+
+#if SPDK_VFIO_ENABLED
+	struct spdk_vfio_dma_map *dma_map;
+	int ret;
+
+	if (!g_vfio.enabled) {
+		return;
+	}
+
+	pthread_mutex_lock(&g_vfio.mutex);
+	assert(g_vfio.device_ref > 0);
+	g_vfio.device_ref--;
+	if (g_vfio.device_ref > 0) {
+		pthread_mutex_unlock(&g_vfio.mutex);
+		return;
+	}
+
+	/* This is the last SPDK device using DPDK vfio. If DPDK doesn't have
+	 * any additional devices using it's vfio container, all the mappings
+	 * will be automatically removed by the Linux vfio driver. We unmap
+	 * the memory manually to be able to easily re-map it later regardless
+	 * of other, external factors.
+	 */
+	TAILQ_FOREACH(dma_map, &g_vfio.maps, tailq) {
+		ret = ioctl(g_vfio.fd, VFIO_IOMMU_UNMAP_DMA, &dma_map->unmap);
+		if (ret) {
+			DEBUG_PRINT("Cannot unmap DMA memory, error %d\n", errno);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_vfio.mutex);
+#endif
+}
+
+int
+spdk_vtophys_init(void)
+{
+	const struct spdk_mem_map_ops vtophys_map_ops = {
+		.notify_cb = spdk_vtophys_notify,
+		.are_contiguous = NULL
+	};
+
+#if SPDK_VFIO_ENABLED
+	spdk_vtophys_iommu_init();
+#endif
+
+	g_vtophys_map = spdk_mem_map_alloc(SPDK_VTOPHYS_ERROR, &vtophys_map_ops, NULL);
+	if (g_vtophys_map == NULL) {
+		DEBUG_PRINT("vtophys map allocation failed\n");
+		return -1;
+	}
+	return 0;
+}
+
+uint64_t
+spdk_vtophys(void *buf)
+{
+	uint64_t vaddr, paddr_2mb;
+
+	vaddr = (uint64_t)buf;
+
+	paddr_2mb = spdk_mem_map_translate(g_vtophys_map, vaddr, NULL);
+
+	/*
+	 * SPDK_VTOPHYS_ERROR has all bits set, so if the lookup returned SPDK_VTOPHYS_ERROR,
+	 * we will still bitwise-or it with the buf offset below, but the result will still be
+	 * SPDK_VTOPHYS_ERROR. However now that we do + rather than | (due to PCI vtophys being
+	 * unaligned) we must now check the return value before addition.
+	 */
+	SPDK_STATIC_ASSERT(SPDK_VTOPHYS_ERROR == UINT64_C(-1), "SPDK_VTOPHYS_ERROR should be all 1s");
+	if (paddr_2mb == SPDK_VTOPHYS_ERROR) {
+		return SPDK_VTOPHYS_ERROR;
+	} else {
+		return paddr_2mb + ((uint64_t)buf & MASK_2MB);
+	}
+}
+
+static int
+spdk_bus_scan(void)
+{
+	return 0;
+}
+
+static int
+spdk_bus_probe(void)
+{
+	return 0;
+}
+
+static struct rte_device *
+spdk_bus_find_device(const struct rte_device *start,
+		     rte_dev_cmp_t cmp, const void *data)
+{
+	return NULL;
+}
+
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+static enum rte_iova_mode
+spdk_bus_get_iommu_class(void) {
+	/* Since we register our PCI drivers after EAL init, we have no chance
+	 * of switching into RTE_IOVA_VA (virtual addresses as iova) iommu
+	 * class. DPDK uses RTE_IOVA_PA by default because for some platforms
+	 * it's the only supported mode, but then SPDK does not support those
+	 * platforms and doesn't mind defaulting to RTE_IOVA_VA. The rte_pci bus
+	 * will force RTE_IOVA_PA if RTE_IOVA_VA simply can not be used
+	 * (i.e. at least one device on the system is bound to uio_pci_generic),
+	 * so we simply return RTE_IOVA_VA here.
+	 */
+	return RTE_IOVA_VA;
+}
+#endif
+
+struct rte_bus spdk_bus = {
+	.scan = spdk_bus_scan,
+	.probe = spdk_bus_probe,
+	.find_device = spdk_bus_find_device,
+#if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
+	.get_iommu_class = spdk_bus_get_iommu_class,
+#endif
+};
+
+RTE_REGISTER_BUS(spdk, spdk_bus);
diff --git a/src/spdk/lib/event/Makefile b/src/spdk/lib/event/Makefile
new file mode 100644
index 00000000..659b85e9
--- /dev/null
+++ b/src/spdk/lib/event/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = event
+C_SRCS = app.c reactor.c rpc.c subsystem.c
+
+DIRS-y = rpc subsystems
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/app.c b/src/spdk/lib/event/app.c
new file mode 100644
index 00000000..012e2920
--- /dev/null
+++ b/src/spdk/lib/event/app.c
@@ -0,0 +1,998 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/event.h"
+
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/conf.h"
+#include "spdk/thread.h"
+#include "spdk/trace.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#define SPDK_APP_DEFAULT_LOG_LEVEL		SPDK_LOG_NOTICE
+#define SPDK_APP_DEFAULT_LOG_PRINT_LEVEL	SPDK_LOG_INFO
+#define SPDK_APP_DEFAULT_BACKTRACE_LOG_LEVEL	SPDK_LOG_ERROR
+
+#define SPDK_APP_DPDK_DEFAULT_MEM_SIZE		-1
+#define SPDK_APP_DPDK_DEFAULT_MASTER_CORE	-1
+#define SPDK_APP_DPDK_DEFAULT_MEM_CHANNEL	-1
+#define SPDK_APP_DPDK_DEFAULT_CORE_MASK		"0x1"
+
+struct spdk_app {
+	struct spdk_conf		*config;
+	int				shm_id;
+	spdk_app_shutdown_cb		shutdown_cb;
+	int				rc;
+};
+
+static struct spdk_app g_spdk_app;
+static struct spdk_event *g_app_start_event = NULL;
+static struct spdk_event *g_shutdown_event = NULL;
+static int g_init_lcore;
+static bool g_delay_subsystem_init = false;
+static bool g_shutdown_sig_received = false;
+static char *g_executable_name;
+static struct spdk_app_opts g_default_opts;
+
+int
+spdk_app_get_shm_id(void)
+{
+	return g_spdk_app.shm_id;
+}
+
+/* append one empty option to indicate the end of the array */
+static const struct option g_cmdline_options[] = {
+#define CONFIG_FILE_OPT_IDX	'c'
+	{"config",			required_argument,	NULL, CONFIG_FILE_OPT_IDX},
+#define LIMIT_COREDUMP_OPT_IDX 'd'
+	{"limit-coredump",		no_argument,		NULL, LIMIT_COREDUMP_OPT_IDX},
+#define TPOINT_GROUP_MASK_OPT_IDX 'e'
+	{"tpoint-group-mask",		required_argument,	NULL, TPOINT_GROUP_MASK_OPT_IDX},
+#define SINGLE_FILE_SEGMENTS_OPT_IDX 'g'
+	{"single-file-segments",	no_argument,		NULL, SINGLE_FILE_SEGMENTS_OPT_IDX},
+#define HELP_OPT_IDX		'h'
+	{"help",			no_argument,		NULL, HELP_OPT_IDX},
+#define SHM_ID_OPT_IDX		'i'
+	{"shm-id",			required_argument,	NULL, SHM_ID_OPT_IDX},
+#define CPUMASK_OPT_IDX		'm'
+	{"cpumask",			required_argument,	NULL, CPUMASK_OPT_IDX},
+#define MEM_CHANNELS_OPT_IDX	'n'
+	{"mem-channels",		required_argument,	NULL, MEM_CHANNELS_OPT_IDX},
+#define MASTER_CORE_OPT_IDX	'p'
+	{"master-core",			required_argument,	NULL, MASTER_CORE_OPT_IDX},
+#define RPC_SOCKET_OPT_IDX	'r'
+	{"rpc-socket",			required_argument,	NULL, RPC_SOCKET_OPT_IDX},
+#define MEM_SIZE_OPT_IDX	's'
+	{"mem-size",			required_argument,	NULL, MEM_SIZE_OPT_IDX},
+#define NO_PCI_OPT_IDX		'u'
+	{"no-pci",			no_argument,		NULL, NO_PCI_OPT_IDX},
+#define PCI_BLACKLIST_OPT_IDX	'B'
+	{"pci-blacklist",		required_argument,	NULL, PCI_BLACKLIST_OPT_IDX},
+#define TRACEFLAG_OPT_IDX	'L'
+	{"traceflag",			required_argument,	NULL, TRACEFLAG_OPT_IDX},
+#define HUGE_UNLINK_OPT_IDX	'R'
+	{"huge-unlink",			no_argument,		NULL, HUGE_UNLINK_OPT_IDX},
+#define PCI_WHITELIST_OPT_IDX	'W'
+	{"pci-whitelist",		required_argument,	NULL, PCI_WHITELIST_OPT_IDX},
+#define SILENCE_NOTICELOG_OPT_IDX 257
+	{"silence-noticelog",		no_argument,		NULL, SILENCE_NOTICELOG_OPT_IDX},
+#define WAIT_FOR_RPC_OPT_IDX	258
+	{"wait-for-rpc",		no_argument,		NULL, WAIT_FOR_RPC_OPT_IDX},
+};
+
+/* Global section */
+#define GLOBAL_CONFIG_TMPL \
+"# Configuration file\n" \
+"#\n" \
+"# Please write all parameters using ASCII.\n" \
+"# The parameter must be quoted if it includes whitespace.\n" \
+"#\n" \
+"# Configuration syntax:\n" \
+"# Spaces at head of line are deleted, other spaces are as separator\n" \
+"# Lines starting with '#' are comments and not evaluated.\n" \
+"# Lines ending with '\\' are concatenated with the next line.\n" \
+"# Bracketed keys are section keys grouping the following value keys.\n" \
+"# Number of section key is used as a tag number.\n" \
+"#  Ex. [TargetNode1] = TargetNode section key with tag number 1\n" \
+"[Global]\n" \
+"  Comment \"Global section\"\n" \
+"\n" \
+"  # Users can restrict work items to only run on certain cores by\n" \
+"  #  specifying a ReactorMask.  Default is to allow work items to run\n" \
+"  #  on all cores.  Core 0 must be set in the mask if one is specified.\n" \
+"  # Default: 0xFFFF (cores 0-15)\n" \
+"  ReactorMask \"0x%s\"\n" \
+"\n" \
+"  # Tracepoint group mask for spdk trace buffers\n" \
+"  # Default: 0x0 (all tracepoint groups disabled)\n" \
+"  # Set to 0xFFFF to enable all tracepoint groups.\n" \
+"  TpointGroupMask \"0x%" PRIX64 "\"\n" \
+"\n" \
+
+static void
+spdk_app_config_dump_global_section(FILE *fp)
+{
+	struct spdk_cpuset *coremask;
+
+	if (NULL == fp) {
+		return;
+	}
+
+	coremask = spdk_app_get_core_mask();
+
+	fprintf(fp, GLOBAL_CONFIG_TMPL, spdk_cpuset_fmt(coremask),
+		spdk_trace_get_tpoint_group_mask());
+}
+
+int
+spdk_app_get_running_config(char **config_str, char *name)
+{
+	FILE *fp = NULL;
+	int fd = -1;
+	long length = 0, ret = 0;
+	char vbuf[BUFSIZ];
+	char config_template[64];
+
+	snprintf(config_template, sizeof(config_template), "/tmp/%s.XXXXXX", name);
+	/* Create temporary file to hold config */
+	fd = mkstemp(config_template);
+	if (fd == -1) {
+		SPDK_ERRLOG("mkstemp failed\n");
+		return -1;
+	}
+	fp = fdopen(fd, "wb+");
+	if (NULL == fp) {
+		SPDK_ERRLOG("error opening tmpfile fd = %d\n", fd);
+		return -1;
+	}
+
+	/* Buffered IO */
+	setvbuf(fp, vbuf, _IOFBF, BUFSIZ);
+
+	spdk_app_config_dump_global_section(fp);
+	spdk_subsystem_config(fp);
+
+	length = ftell(fp);
+
+	*config_str = malloc(length + 1);
+	if (!*config_str) {
+		SPDK_ERRLOG("out-of-memory for config\n");
+		fclose(fp);
+		return -1;
+	}
+	fseek(fp, 0, SEEK_SET);
+	ret = fread(*config_str, sizeof(char), length, fp);
+	if (ret < length) {
+		SPDK_ERRLOG("short read\n");
+	}
+	fclose(fp);
+	(*config_str)[length] = '\0';
+
+	return 0;
+}
+
+void
+spdk_app_start_shutdown(void)
+{
+	if (g_shutdown_event != NULL) {
+		spdk_event_call(g_shutdown_event);
+		g_shutdown_event = NULL;
+	} else {
+		spdk_app_stop(0);
+	}
+}
+
+static void
+__shutdown_signal(int signo)
+{
+	if (!g_shutdown_sig_received) {
+		g_shutdown_sig_received = true;
+		spdk_app_start_shutdown();
+	}
+}
+
+static void
+__shutdown_event_cb(void *arg1, void *arg2)
+{
+	g_spdk_app.shutdown_cb();
+}
+
+static int
+spdk_app_opts_validate(const char *app_opts)
+{
+	int i = 0, j;
+
+	for (i = 0; app_opts[i] != '\0'; i++) {
+		/* ignore getopt control characters */
+		if (app_opts[i] == ':' || app_opts[i] == '+' || app_opts[i] == '-') {
+			continue;
+		}
+
+		for (j = 0; SPDK_APP_GETOPT_STRING[j] != '\0'; j++) {
+			if (app_opts[i] == SPDK_APP_GETOPT_STRING[j]) {
+				return app_opts[i];
+			}
+		}
+	}
+	return 0;
+}
+
+void
+spdk_app_opts_init(struct spdk_app_opts *opts)
+{
+	if (!opts) {
+		return;
+	}
+
+	memset(opts, 0, sizeof(*opts));
+
+	opts->enable_coredump = true;
+	opts->shm_id = -1;
+	opts->mem_size = SPDK_APP_DPDK_DEFAULT_MEM_SIZE;
+	opts->master_core = SPDK_APP_DPDK_DEFAULT_MASTER_CORE;
+	opts->mem_channel = SPDK_APP_DPDK_DEFAULT_MEM_CHANNEL;
+	opts->reactor_mask = NULL;
+	opts->max_delay_us = 0;
+	opts->print_level = SPDK_APP_DEFAULT_LOG_PRINT_LEVEL;
+	opts->rpc_addr = SPDK_DEFAULT_RPC_ADDR;
+	opts->delay_subsystem_init = false;
+}
+
+static int
+spdk_app_setup_signal_handlers(struct spdk_app_opts *opts)
+{
+	struct sigaction	sigact;
+	sigset_t		sigmask;
+	int			rc;
+
+	/* Set up custom shutdown handling if the user requested it. */
+	if (opts->shutdown_cb != NULL) {
+		g_shutdown_event = spdk_event_allocate(spdk_env_get_current_core(),
+						       __shutdown_event_cb,
+						       NULL, NULL);
+	}
+
+	sigemptyset(&sigmask);
+	memset(&sigact, 0, sizeof(sigact));
+	sigemptyset(&sigact.sa_mask);
+
+	sigact.sa_handler = SIG_IGN;
+	rc = sigaction(SIGPIPE, &sigact, NULL);
+	if (rc < 0) {
+		SPDK_ERRLOG("sigaction(SIGPIPE) failed\n");
+		return rc;
+	}
+
+	/* Install the same handler for SIGINT and SIGTERM */
+	sigact.sa_handler = __shutdown_signal;
+
+	rc = sigaction(SIGINT, &sigact, NULL);
+	if (rc < 0) {
+		SPDK_ERRLOG("sigaction(SIGINT) failed\n");
+		return rc;
+	}
+	sigaddset(&sigmask, SIGINT);
+
+	rc = sigaction(SIGTERM, &sigact, NULL);
+	if (rc < 0) {
+		SPDK_ERRLOG("sigaction(SIGTERM) failed\n");
+		return rc;
+	}
+	sigaddset(&sigmask, SIGTERM);
+
+	if (opts->usr1_handler != NULL) {
+		sigact.sa_handler = opts->usr1_handler;
+		rc = sigaction(SIGUSR1, &sigact, NULL);
+		if (rc < 0) {
+			SPDK_ERRLOG("sigaction(SIGUSR1) failed\n");
+			return rc;
+		}
+		sigaddset(&sigmask, SIGUSR1);
+	}
+
+	pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL);
+
+	return 0;
+}
+
+static void
+spdk_app_start_application(void)
+{
+	spdk_rpc_set_state(SPDK_RPC_RUNTIME);
+	spdk_event_call(g_app_start_event);
+}
+
+static void
+spdk_app_start_rpc(void *arg1, void *arg2)
+{
+	const char *rpc_addr = arg1;
+
+	spdk_rpc_initialize(rpc_addr);
+	if (!g_delay_subsystem_init) {
+		spdk_app_start_application();
+	}
+}
+
+static struct spdk_conf *
+spdk_app_setup_conf(const char *config_file)
+{
+	struct spdk_conf *config;
+	int rc;
+
+	config = spdk_conf_allocate();
+	assert(config != NULL);
+	if (config_file) {
+		rc = spdk_conf_read(config, config_file);
+		if (rc != 0) {
+			SPDK_ERRLOG("Could not read config file %s\n", config_file);
+			goto error;
+		}
+		if (spdk_conf_first_section(config) == NULL) {
+			SPDK_ERRLOG("Invalid config file %s\n", config_file);
+			goto error;
+		}
+	}
+	spdk_conf_set_as_default(config);
+	return config;
+
+error:
+	spdk_conf_free(config);
+	return NULL;
+}
+
+static int
+spdk_app_opts_add_pci_addr(struct spdk_app_opts *opts, struct spdk_pci_addr **list, char *bdf)
+{
+	struct spdk_pci_addr *tmp = *list;
+	size_t i = opts->num_pci_addr;
+
+	tmp = realloc(tmp, sizeof(*tmp) * (i + 1));
+	if (tmp == NULL) {
+		SPDK_ERRLOG("realloc error\n");
+		return -ENOMEM;
+	}
+
+	*list = tmp;
+	if (spdk_pci_addr_parse(*list + i, bdf) < 0) {
+		SPDK_ERRLOG("Invalid address %s\n", bdf);
+		return -EINVAL;
+	}
+
+	opts->num_pci_addr++;
+	return 0;
+}
+
+static int
+spdk_app_read_config_file_global_params(struct spdk_app_opts *opts)
+{
+	struct spdk_conf_section *sp;
+	char *bdf;
+	int i, rc = 0;
+
+	sp = spdk_conf_find_section(NULL, "Global");
+
+	if (opts->shm_id == -1) {
+		if (sp != NULL) {
+			opts->shm_id = spdk_conf_section_get_intval(sp, "SharedMemoryID");
+		}
+	}
+
+	if (opts->reactor_mask == NULL) {
+		if (sp && spdk_conf_section_get_val(sp, "ReactorMask")) {
+			SPDK_ERRLOG("ReactorMask config option is deprecated.  Use -m/--cpumask\n"
+				    "command line parameter instead.\n");
+			opts->reactor_mask = spdk_conf_section_get_val(sp, "ReactorMask");
+		} else {
+			opts->reactor_mask = SPDK_APP_DPDK_DEFAULT_CORE_MASK;
+		}
+	}
+
+	if (!opts->no_pci && sp) {
+		opts->no_pci = spdk_conf_section_get_boolval(sp, "NoPci", false);
+	}
+
+	if (opts->tpoint_group_mask == NULL) {
+		if (sp != NULL) {
+			opts->tpoint_group_mask = spdk_conf_section_get_val(sp, "TpointGroupMask");
+		}
+	}
+
+	if (sp == NULL) {
+		return 0;
+	}
+
+	for (i = 0; ; i++) {
+		bdf = spdk_conf_section_get_nmval(sp, "PciBlacklist", i, 0);
+		if (!bdf) {
+			break;
+		}
+
+		rc = spdk_app_opts_add_pci_addr(opts, &opts->pci_blacklist, bdf);
+		if (rc != 0) {
+			free(opts->pci_blacklist);
+			return rc;
+		}
+	}
+
+	for (i = 0; ; i++) {
+		bdf = spdk_conf_section_get_nmval(sp, "PciWhitelist", i, 0);
+		if (!bdf) {
+			break;
+		}
+
+		if (opts->pci_blacklist != NULL) {
+			SPDK_ERRLOG("PciBlacklist and PciWhitelist cannot be used at the same time\n");
+			free(opts->pci_blacklist);
+			return -EINVAL;
+		}
+
+		rc = spdk_app_opts_add_pci_addr(opts, &opts->pci_whitelist, bdf);
+		if (rc != 0) {
+			free(opts->pci_whitelist);
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static int
+spdk_app_setup_env(struct spdk_app_opts *opts)
+{
+	struct spdk_env_opts env_opts = {};
+	int rc;
+
+	spdk_env_opts_init(&env_opts);
+
+	env_opts.name = opts->name;
+	env_opts.core_mask = opts->reactor_mask;
+	env_opts.shm_id = opts->shm_id;
+	env_opts.mem_channel = opts->mem_channel;
+	env_opts.master_core = opts->master_core;
+	env_opts.mem_size = opts->mem_size;
+	env_opts.hugepage_single_segments = opts->hugepage_single_segments;
+	env_opts.unlink_hugepage = opts->unlink_hugepage;
+	env_opts.no_pci = opts->no_pci;
+	env_opts.num_pci_addr = opts->num_pci_addr;
+	env_opts.pci_blacklist = opts->pci_blacklist;
+	env_opts.pci_whitelist = opts->pci_whitelist;
+
+	rc = spdk_env_init(&env_opts);
+	free(env_opts.pci_blacklist);
+	free(env_opts.pci_whitelist);
+
+	if (rc < 0) {
+		fprintf(stderr, "Unable to initialize SPDK env\n");
+	}
+
+	return rc;
+}
+
+static int
+spdk_app_setup_trace(struct spdk_app_opts *opts)
+{
+	char		shm_name[64];
+	uint64_t	tpoint_group_mask;
+	char		*end;
+
+	if (opts->shm_id >= 0) {
+		snprintf(shm_name, sizeof(shm_name), "/%s_trace.%d", opts->name, opts->shm_id);
+	} else {
+		snprintf(shm_name, sizeof(shm_name), "/%s_trace.pid%d", opts->name, (int)getpid());
+	}
+
+	if (spdk_trace_init(shm_name) != 0) {
+		return -1;
+	}
+
+	if (opts->tpoint_group_mask != NULL) {
+		errno = 0;
+		tpoint_group_mask = strtoull(opts->tpoint_group_mask, &end, 16);
+		if (*end != '\0' || errno) {
+			SPDK_ERRLOG("invalid tpoint mask %s\n", opts->tpoint_group_mask);
+		} else {
+			SPDK_NOTICELOG("Tracepoint Group Mask %s specified.\n", opts->tpoint_group_mask);
+			SPDK_NOTICELOG("Use 'spdk_trace -s %s %s %d' to capture a snapshot of events at runtime.\n",
+				       opts->name,
+				       opts->shm_id >= 0 ? "-i" : "-p",
+				       opts->shm_id >= 0 ? opts->shm_id : getpid());
+#if defined(__linux__)
+			SPDK_NOTICELOG("Or copy /dev/shm%s for offline analysis/debug.\n", shm_name);
+#endif
+			spdk_trace_set_tpoint_group_mask(tpoint_group_mask);
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_app_start(struct spdk_app_opts *opts, spdk_event_fn start_fn,
+	       void *arg1, void *arg2)
+{
+	struct spdk_conf	*config = NULL;
+	int			rc;
+	struct spdk_event	*rpc_start_event;
+	char			*tty;
+
+	if (!opts) {
+		SPDK_ERRLOG("opts should not be NULL\n");
+		return 1;
+	}
+
+	if (!start_fn) {
+		SPDK_ERRLOG("start_fn should not be NULL\n");
+		return 1;
+	}
+
+	tty = ttyname(STDERR_FILENO);
+	if (opts->print_level > SPDK_LOG_WARN &&
+	    isatty(STDERR_FILENO) &&
+	    tty &&
+	    !strncmp(tty, "/dev/tty", strlen("/dev/tty"))) {
+		printf("Warning: printing stderr to console terminal without -q option specified.\n");
+		printf("Suggest using --silence-noticelog to disable logging to stderr and\n");
+		printf("monitor syslog, or redirect stderr to a file.\n");
+		printf("(Delaying for 10 seconds...)\n");
+		sleep(10);
+	}
+
+	spdk_log_set_print_level(opts->print_level);
+
+#ifndef SPDK_NO_RLIMIT
+	if (opts->enable_coredump) {
+		struct rlimit core_limits;
+
+		core_limits.rlim_cur = core_limits.rlim_max = RLIM_INFINITY;
+		setrlimit(RLIMIT_CORE, &core_limits);
+	}
+#endif
+
+	config = spdk_app_setup_conf(opts->config_file);
+	if (config == NULL) {
+		goto app_start_setup_conf_err;
+	}
+
+	if (spdk_app_read_config_file_global_params(opts) < 0) {
+		goto app_start_setup_conf_err;
+	}
+
+	spdk_log_set_level(SPDK_APP_DEFAULT_LOG_LEVEL);
+	spdk_log_set_backtrace_level(SPDK_APP_DEFAULT_BACKTRACE_LOG_LEVEL);
+
+	if (spdk_app_setup_env(opts) < 0) {
+		goto app_start_setup_conf_err;
+	}
+
+	spdk_log_open();
+	SPDK_NOTICELOG("Total cores available: %d\n", spdk_env_get_core_count());
+
+	spdk_thread_lib_init();
+
+	/*
+	 * If mask not specified on command line or in configuration file,
+	 *  reactor_mask will be 0x1 which will enable core 0 to run one
+	 *  reactor.
+	 */
+	if ((rc = spdk_reactors_init(opts->max_delay_us)) != 0) {
+		SPDK_ERRLOG("Invalid reactor mask.\n");
+		goto app_start_log_close_err;
+	}
+
+	/*
+	 * Note the call to spdk_app_setup_trace() is located here
+	 * ahead of spdk_app_setup_signal_handlers().
+	 * That's because there is not an easy/direct clean
+	 * way of unwinding alloc'd resources that can occur
+	 * in spdk_app_setup_signal_handlers().
+	 */
+	if (spdk_app_setup_trace(opts) != 0) {
+		goto app_start_log_close_err;
+	}
+
+	if ((rc = spdk_app_setup_signal_handlers(opts)) != 0) {
+		goto app_start_trace_cleanup_err;
+	}
+
+	memset(&g_spdk_app, 0, sizeof(g_spdk_app));
+	g_spdk_app.config = config;
+	g_spdk_app.shm_id = opts->shm_id;
+	g_spdk_app.shutdown_cb = opts->shutdown_cb;
+	g_spdk_app.rc = 0;
+	g_init_lcore = spdk_env_get_current_core();
+	g_delay_subsystem_init = opts->delay_subsystem_init;
+	g_app_start_event = spdk_event_allocate(g_init_lcore, start_fn, arg1, arg2);
+
+	rpc_start_event = spdk_event_allocate(g_init_lcore, spdk_app_start_rpc,
+					      (void *)opts->rpc_addr, NULL);
+
+	if (!g_delay_subsystem_init) {
+		spdk_subsystem_init(rpc_start_event);
+	} else {
+		spdk_event_call(rpc_start_event);
+	}
+
+	/* This blocks until spdk_app_stop is called */
+	spdk_reactors_start();
+
+	return g_spdk_app.rc;
+
+app_start_trace_cleanup_err:
+	spdk_trace_cleanup();
+
+app_start_log_close_err:
+	spdk_log_close();
+
+app_start_setup_conf_err:
+	return 1;
+}
+
+void
+spdk_app_fini(void)
+{
+	spdk_trace_cleanup();
+	spdk_reactors_fini();
+	spdk_conf_free(g_spdk_app.config);
+	spdk_log_close();
+	spdk_thread_lib_fini();
+}
+
+static void
+_spdk_app_stop(void *arg1, void *arg2)
+{
+	struct spdk_event *app_stop_event;
+
+	spdk_rpc_finish();
+
+	app_stop_event = spdk_event_allocate(spdk_env_get_current_core(), spdk_reactors_stop, NULL, NULL);
+	spdk_subsystem_fini(app_stop_event);
+}
+
+void
+spdk_app_stop(int rc)
+{
+	if (rc) {
+		SPDK_WARNLOG("spdk_app_stop'd on non-zero\n");
+	}
+	g_spdk_app.rc = rc;
+	/*
+	 * We want to run spdk_subsystem_fini() from the same lcore where spdk_subsystem_init()
+	 * was called.
+	 */
+	spdk_event_call(spdk_event_allocate(g_init_lcore, _spdk_app_stop, NULL, NULL));
+}
+
+static void
+usage(void (*app_usage)(void))
+{
+	printf("%s [options]\n", g_executable_name);
+	printf("options:\n");
+	printf(" -c, --config <config>     config file (default %s)\n", g_default_opts.config_file);
+	printf(" -d, --limit-coredump      do not set max coredump size to RLIM_INFINITY\n");
+	printf(" -e, --tpoint-group-mask <mask>\n");
+	printf("                           tracepoint group mask for spdk trace buffers (default 0x0)\n");
+	printf(" -g, --single-file-segments\n");
+	printf("                           force creating just one hugetlbfs file\n");
+	printf(" -h, --help                show this usage\n");
+	printf(" -i, --shm-id <id>         shared memory ID (optional)\n");
+	printf(" -m, --cpumask <mask>      core mask for DPDK\n");
+	printf(" -n, --mem-channels <num>  channel number of memory channels used for DPDK\n");
+	printf(" -p, --master-core <id>    master (primary) core for DPDK\n");
+	printf(" -r, --rpc-socket <path>   RPC listen address (default %s)\n", SPDK_DEFAULT_RPC_ADDR);
+	printf(" -s, --mem-size <size>     memory size in MB for DPDK (default: ");
+	if (g_default_opts.mem_size > 0) {
+		printf("%dMB)\n", g_default_opts.mem_size);
+	} else {
+		printf("all hugepage memory)\n");
+	}
+	printf("     --silence-noticelog   disable notice level logging to stderr\n");
+	printf(" -u, --no-pci              disable PCI access\n");
+	printf("     --wait-for-rpc        wait for RPCs to initialize subsystems\n");
+	printf(" -B, --pci-blacklist <bdf>\n");
+	printf("                           pci addr to blacklist (can be used more than once)\n");
+	printf(" -R, --huge-unlink         unlink huge files after initialization\n");
+	printf(" -W, --pci-whitelist <bdf>\n");
+	printf("                           pci addr to whitelist (-B and -W cannot be used at the same time)\n");
+	spdk_tracelog_usage(stdout, "-L");
+	if (app_usage) {
+		app_usage();
+	}
+}
+
+spdk_app_parse_args_rvals_t
+spdk_app_parse_args(int argc, char **argv, struct spdk_app_opts *opts,
+		    const char *app_getopt_str, struct option *app_long_opts,
+		    void (*app_parse)(int ch, char *arg),
+		    void (*app_usage)(void))
+{
+	int ch, rc, opt_idx, global_long_opts_len, app_long_opts_len;
+	struct option *cmdline_options;
+	char *cmdline_short_opts = NULL;
+	enum spdk_app_parse_args_rvals retval = SPDK_APP_PARSE_ARGS_FAIL;
+
+	memcpy(&g_default_opts, opts, sizeof(g_default_opts));
+
+	if (opts->config_file && access(opts->config_file, F_OK) != 0) {
+		opts->config_file = NULL;
+	}
+
+	if (app_long_opts == NULL) {
+		app_long_opts_len = 0;
+	} else {
+		for (app_long_opts_len = 0;
+		     app_long_opts[app_long_opts_len].name != NULL;
+		     app_long_opts_len++);
+	}
+
+	global_long_opts_len = SPDK_COUNTOF(g_cmdline_options);
+
+	cmdline_options = calloc(global_long_opts_len + app_long_opts_len + 1, sizeof(*cmdline_options));
+	if (!cmdline_options) {
+		fprintf(stderr, "Out of memory\n");
+		return SPDK_APP_PARSE_ARGS_FAIL;
+	}
+
+	memcpy(&cmdline_options[0], g_cmdline_options, sizeof(g_cmdline_options));
+	if (app_long_opts) {
+		memcpy(&cmdline_options[global_long_opts_len], app_long_opts,
+		       app_long_opts_len * sizeof(*app_long_opts));
+	}
+
+	if (app_getopt_str != NULL) {
+		ch = spdk_app_opts_validate(app_getopt_str);
+		if (ch) {
+			fprintf(stderr, "Duplicated option '%c' between the generic and application specific spdk opts.\n",
+				ch);
+			goto out;
+		}
+	}
+
+	cmdline_short_opts = spdk_sprintf_alloc("%s%s", app_getopt_str, SPDK_APP_GETOPT_STRING);
+	if (!cmdline_short_opts) {
+		fprintf(stderr, "Out of memory\n");
+		goto out;
+	}
+
+	g_executable_name = argv[0];
+
+	while ((ch = getopt_long(argc, argv, cmdline_short_opts, cmdline_options, &opt_idx)) != -1) {
+		switch (ch) {
+		case CONFIG_FILE_OPT_IDX:
+			opts->config_file = optarg;
+			break;
+		case LIMIT_COREDUMP_OPT_IDX:
+			opts->enable_coredump = false;
+			break;
+		case TPOINT_GROUP_MASK_OPT_IDX:
+			opts->tpoint_group_mask = optarg;
+			break;
+		case SINGLE_FILE_SEGMENTS_OPT_IDX:
+			opts->hugepage_single_segments = true;
+			break;
+		case HELP_OPT_IDX:
+			usage(app_usage);
+			retval = SPDK_APP_PARSE_ARGS_HELP;
+			goto out;
+		case SHM_ID_OPT_IDX:
+			if (optarg == NULL) {
+				goto out;
+			}
+			opts->shm_id = atoi(optarg);
+			break;
+		case CPUMASK_OPT_IDX:
+			opts->reactor_mask = optarg;
+			break;
+		case MEM_CHANNELS_OPT_IDX:
+			if (optarg == NULL) {
+				goto out;
+			}
+			opts->mem_channel = atoi(optarg);
+			break;
+		case MASTER_CORE_OPT_IDX:
+			if (optarg == NULL) {
+				goto out;
+			}
+			opts->master_core = atoi(optarg);
+			break;
+		case SILENCE_NOTICELOG_OPT_IDX:
+			opts->print_level = SPDK_LOG_WARN;
+			break;
+		case RPC_SOCKET_OPT_IDX:
+			opts->rpc_addr = optarg;
+			break;
+		case MEM_SIZE_OPT_IDX: {
+			uint64_t mem_size_mb;
+			bool mem_size_has_prefix;
+
+			rc = spdk_parse_capacity(optarg, &mem_size_mb, &mem_size_has_prefix);
+			if (rc != 0) {
+				fprintf(stderr, "invalid memory pool size `-s %s`\n", optarg);
+				usage(app_usage);
+				goto out;
+			}
+
+			if (mem_size_has_prefix) {
+				/* the mem size is in MB by default, so if a prefix was
+				 * specified, we need to manually convert to MB.
+				 */
+				mem_size_mb /= 1024 * 1024;
+			}
+
+			if (mem_size_mb > INT_MAX) {
+				fprintf(stderr, "invalid memory pool size `-s %s`\n", optarg);
+				usage(app_usage);
+				goto out;
+			}
+
+			opts->mem_size = (int) mem_size_mb;
+			break;
+		}
+		case NO_PCI_OPT_IDX:
+			opts->no_pci = true;
+			break;
+		case WAIT_FOR_RPC_OPT_IDX:
+			opts->delay_subsystem_init = true;
+			break;
+		case PCI_BLACKLIST_OPT_IDX:
+			if (opts->pci_whitelist) {
+				free(opts->pci_whitelist);
+				opts->pci_whitelist = NULL;
+				fprintf(stderr, "-B and -W cannot be used at the same time\n");
+				usage(app_usage);
+				goto out;
+			}
+
+			rc = spdk_app_opts_add_pci_addr(opts, &opts->pci_blacklist, optarg);
+			if (rc != 0) {
+				free(opts->pci_blacklist);
+				opts->pci_blacklist = NULL;
+				goto out;
+			}
+			break;
+		case TRACEFLAG_OPT_IDX:
+#ifndef DEBUG
+			fprintf(stderr, "%s must be built with CONFIG_DEBUG=y for -L flag\n",
+				argv[0]);
+			usage(app_usage);
+			goto out;
+#else
+			rc = spdk_log_set_trace_flag(optarg);
+			if (rc < 0) {
+				fprintf(stderr, "unknown flag\n");
+				usage(app_usage);
+				goto out;
+			}
+			opts->print_level = SPDK_LOG_DEBUG;
+			break;
+#endif
+		case HUGE_UNLINK_OPT_IDX:
+			opts->unlink_hugepage = true;
+			break;
+		case PCI_WHITELIST_OPT_IDX:
+			if (opts->pci_blacklist) {
+				free(opts->pci_blacklist);
+				opts->pci_blacklist = NULL;
+				fprintf(stderr, "-B and -W cannot be used at the same time\n");
+				usage(app_usage);
+				goto out;
+			}
+
+			rc = spdk_app_opts_add_pci_addr(opts, &opts->pci_whitelist, optarg);
+			if (rc != 0) {
+				free(opts->pci_whitelist);
+				opts->pci_whitelist = NULL;
+				goto out;
+			}
+			break;
+		case '?':
+			/*
+			 * In the event getopt() above detects an option
+			 * in argv that is NOT in the getopt_str,
+			 * getopt() will return a '?' indicating failure.
+			 */
+			usage(app_usage);
+			goto out;
+		default:
+			app_parse(ch, optarg);
+		}
+	}
+
+	/* TBD: Replace warning by failure when RPCs for startup are prepared. */
+	if (opts->config_file && opts->delay_subsystem_init) {
+		fprintf(stderr,
+			"WARNING: --wait-for-rpc and config file are used at the same time. "
+			"- Please be careful one options might overwrite others.\n");
+	}
+
+	retval = SPDK_APP_PARSE_ARGS_SUCCESS;
+out:
+	if (retval != SPDK_APP_PARSE_ARGS_SUCCESS) {
+		free(opts->pci_blacklist);
+		opts->pci_blacklist = NULL;
+		free(opts->pci_whitelist);
+		opts->pci_whitelist = NULL;
+	}
+	free(cmdline_short_opts);
+	free(cmdline_options);
+	return retval;
+}
+
+void
+spdk_app_usage(void)
+{
+	if (g_executable_name == NULL) {
+		fprintf(stderr, "%s not valid before calling spdk_app_parse_args()\n", __func__);
+		return;
+	}
+
+	usage(NULL);
+}
+
+static void
+spdk_rpc_start_subsystem_init_cpl(void *arg1, void *arg2)
+{
+	struct spdk_jsonrpc_request *request = arg1;
+	struct spdk_json_write_ctx *w;
+
+	spdk_app_start_application();
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_start_subsystem_init(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct spdk_event *cb_event;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "start_subsystem_init requires no parameters");
+		return;
+	}
+
+	cb_event = spdk_event_allocate(g_init_lcore, spdk_rpc_start_subsystem_init_cpl,
+				       request, NULL);
+	spdk_subsystem_init(cb_event);
+}
+SPDK_RPC_REGISTER("start_subsystem_init", spdk_rpc_start_subsystem_init, SPDK_RPC_STARTUP)
diff --git a/src/spdk/lib/event/reactor.c b/src/spdk/lib/event/reactor.c
new file mode 100644
index 00000000..d9ba9f6b
--- /dev/null
+++ b/src/spdk/lib/event/reactor.c
@@ -0,0 +1,804 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/likely.h"
+
+#include "spdk_internal/event.h"
+#include "spdk_internal/log.h"
+
+#include "spdk/log.h"
+#include "spdk/thread.h"
+#include "spdk/env.h"
+#include "spdk/util.h"
+
+#define SPDK_MAX_SOCKET		64
+
+#define SPDK_EVENT_BATCH_SIZE		8
+
+enum spdk_poller_state {
+	/* The poller is registered with a reactor but not currently executing its fn. */
+	SPDK_POLLER_STATE_WAITING,
+
+	/* The poller is currently running its fn. */
+	SPDK_POLLER_STATE_RUNNING,
+
+	/* The poller was unregistered during the execution of its fn. */
+	SPDK_POLLER_STATE_UNREGISTERED,
+};
+
+struct spdk_poller {
+	TAILQ_ENTRY(spdk_poller)	tailq;
+	uint32_t			lcore;
+
+	/* Current state of the poller; should only be accessed from the poller's thread. */
+	enum spdk_poller_state		state;
+
+	uint64_t			period_ticks;
+	uint64_t			next_run_tick;
+	spdk_poller_fn			fn;
+	void				*arg;
+};
+
+enum spdk_reactor_state {
+	SPDK_REACTOR_STATE_INVALID = 0,
+	SPDK_REACTOR_STATE_INITIALIZED = 1,
+	SPDK_REACTOR_STATE_RUNNING = 2,
+	SPDK_REACTOR_STATE_EXITING = 3,
+	SPDK_REACTOR_STATE_SHUTDOWN = 4,
+};
+
+struct spdk_reactor {
+	/* Logical core number for this reactor. */
+	uint32_t					lcore;
+
+	/* Socket ID for this reactor. */
+	uint32_t					socket_id;
+
+	/* Poller for get the rusage for the reactor. */
+	struct spdk_poller				*rusage_poller;
+
+	/* Reactor tsc stats */
+	struct spdk_reactor_tsc_stats			tsc_stats;
+
+	uint64_t					tsc_last;
+
+	/* The last known rusage values */
+	struct rusage					rusage;
+
+	/*
+	 * Contains pollers actively running on this reactor.  Pollers
+	 *  are run round-robin. The reactor takes one poller from the head
+	 *  of the ring, executes it, then puts it back at the tail of
+	 *  the ring.
+	 */
+	TAILQ_HEAD(, spdk_poller)			active_pollers;
+
+	/**
+	 * Contains pollers running on this reactor with a periodic timer.
+	 */
+	TAILQ_HEAD(timer_pollers_head, spdk_poller)	timer_pollers;
+
+	struct spdk_ring				*events;
+
+	/* Pointer to the per-socket g_spdk_event_mempool for this reactor. */
+	struct spdk_mempool				*event_mempool;
+
+	uint64_t					max_delay_us;
+} __attribute__((aligned(64)));
+
+static struct spdk_reactor *g_reactors;
+
+static enum spdk_reactor_state	g_reactor_state = SPDK_REACTOR_STATE_INVALID;
+
+static bool g_context_switch_monitor_enabled = true;
+
+static void spdk_reactor_construct(struct spdk_reactor *w, uint32_t lcore,
+				   uint64_t max_delay_us);
+
+static struct spdk_mempool *g_spdk_event_mempool[SPDK_MAX_SOCKET];
+
+static struct spdk_cpuset *g_spdk_app_core_mask;
+
+static struct spdk_reactor *
+spdk_reactor_get(uint32_t lcore)
+{
+	struct spdk_reactor *reactor;
+	reactor = spdk_likely(g_reactors) ? &g_reactors[lcore] : NULL;
+	return reactor;
+}
+
+struct spdk_event *
+spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
+{
+	struct spdk_event *event = NULL;
+	struct spdk_reactor *reactor = spdk_reactor_get(lcore);
+
+	if (!reactor) {
+		assert(false);
+		return NULL;
+	}
+
+	event = spdk_mempool_get(reactor->event_mempool);
+	if (event == NULL) {
+		assert(false);
+		return NULL;
+	}
+
+	event->lcore = lcore;
+	event->fn = fn;
+	event->arg1 = arg1;
+	event->arg2 = arg2;
+
+	return event;
+}
+
+void
+spdk_event_call(struct spdk_event *event)
+{
+	int rc;
+	struct spdk_reactor *reactor;
+
+	reactor = spdk_reactor_get(event->lcore);
+
+	assert(reactor->events != NULL);
+	rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1);
+	if (rc != 1) {
+		assert(false);
+	}
+}
+
+static inline uint32_t
+_spdk_event_queue_run_batch(struct spdk_reactor *reactor)
+{
+	unsigned count, i;
+	void *events[SPDK_EVENT_BATCH_SIZE];
+
+#ifdef DEBUG
+	/*
+	 * spdk_ring_dequeue() fills events and returns how many entries it wrote,
+	 * so we will never actually read uninitialized data from events, but just to be sure
+	 * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
+	 */
+	memset(events, 0, sizeof(events));
+#endif
+
+	count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
+	if (count == 0) {
+		return 0;
+	}
+
+	for (i = 0; i < count; i++) {
+		struct spdk_event *event = events[i];
+
+		assert(event != NULL);
+		event->fn(event->arg1, event->arg2);
+	}
+
+	spdk_mempool_put_bulk(reactor->event_mempool, events, count);
+
+	return count;
+}
+
+static void
+_spdk_reactor_msg_passed(void *arg1, void *arg2)
+{
+	spdk_thread_fn fn = arg1;
+
+	fn(arg2);
+}
+
+static void
+_spdk_reactor_send_msg(spdk_thread_fn fn, void *ctx, void *thread_ctx)
+{
+	struct spdk_event *event;
+	struct spdk_reactor *reactor;
+
+	reactor = thread_ctx;
+
+	event = spdk_event_allocate(reactor->lcore, _spdk_reactor_msg_passed, fn, ctx);
+
+	spdk_event_call(event);
+}
+
+static void
+_spdk_poller_insert_timer(struct spdk_reactor *reactor, struct spdk_poller *poller, uint64_t now)
+{
+	struct spdk_poller *iter;
+	uint64_t next_run_tick;
+
+	next_run_tick = now + poller->period_ticks;
+	poller->next_run_tick = next_run_tick;
+
+	/*
+	 * Insert poller in the reactor's timer_pollers list in sorted order by next scheduled
+	 * run time.
+	 */
+	TAILQ_FOREACH_REVERSE(iter, &reactor->timer_pollers, timer_pollers_head, tailq) {
+		if (iter->next_run_tick <= next_run_tick) {
+			TAILQ_INSERT_AFTER(&reactor->timer_pollers, iter, poller, tailq);
+			return;
+		}
+	}
+
+	/* No earlier pollers were found, so this poller must be the new head */
+	TAILQ_INSERT_HEAD(&reactor->timer_pollers, poller, tailq);
+}
+
+static struct spdk_poller *
+_spdk_reactor_start_poller(void *thread_ctx,
+			   spdk_poller_fn fn,
+			   void *arg,
+			   uint64_t period_microseconds)
+{
+	struct spdk_poller *poller;
+	struct spdk_reactor *reactor;
+	uint64_t quotient, remainder, ticks;
+
+	reactor = thread_ctx;
+
+	poller = calloc(1, sizeof(*poller));
+	if (poller == NULL) {
+		SPDK_ERRLOG("Poller memory allocation failed\n");
+		return NULL;
+	}
+
+	poller->lcore = reactor->lcore;
+	poller->state = SPDK_POLLER_STATE_WAITING;
+	poller->fn = fn;
+	poller->arg = arg;
+
+	if (period_microseconds) {
+		quotient = period_microseconds / SPDK_SEC_TO_USEC;
+		remainder = period_microseconds % SPDK_SEC_TO_USEC;
+		ticks = spdk_get_ticks_hz();
+
+		poller->period_ticks = ticks * quotient + (ticks * remainder) / SPDK_SEC_TO_USEC;
+	} else {
+		poller->period_ticks = 0;
+	}
+
+	if (poller->period_ticks) {
+		_spdk_poller_insert_timer(reactor, poller, spdk_get_ticks());
+	} else {
+		TAILQ_INSERT_TAIL(&reactor->active_pollers, poller, tailq);
+	}
+
+	return poller;
+}
+
+static void
+_spdk_reactor_stop_poller(struct spdk_poller *poller, void *thread_ctx)
+{
+	struct spdk_reactor *reactor;
+
+	reactor = thread_ctx;
+
+	assert(poller->lcore == spdk_env_get_current_core());
+
+	if (poller->state == SPDK_POLLER_STATE_RUNNING) {
+		/*
+		 * We are being called from the poller_fn, so set the state to unregistered
+		 * and let the reactor loop free the poller.
+		 */
+		poller->state = SPDK_POLLER_STATE_UNREGISTERED;
+	} else {
+		/* Poller is not running currently, so just free it. */
+		if (poller->period_ticks) {
+			TAILQ_REMOVE(&reactor->timer_pollers, poller, tailq);
+		} else {
+			TAILQ_REMOVE(&reactor->active_pollers, poller, tailq);
+		}
+
+		free(poller);
+	}
+}
+
+static int
+get_rusage(void *arg)
+{
+	struct spdk_reactor	*reactor = arg;
+	struct rusage		rusage;
+
+	if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
+		return -1;
+	}
+
+	if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
+		SPDK_INFOLOG(SPDK_LOG_REACTOR,
+			     "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
+			     reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
+			     rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
+	}
+	reactor->rusage = rusage;
+
+	return -1;
+}
+
+static void
+_spdk_reactor_context_switch_monitor_start(void *arg1, void *arg2)
+{
+	struct spdk_reactor *reactor = arg1;
+
+	if (reactor->rusage_poller == NULL) {
+		getrusage(RUSAGE_THREAD, &reactor->rusage);
+		reactor->rusage_poller = spdk_poller_register(get_rusage, reactor, 1000000);
+	}
+}
+
+static void
+_spdk_reactor_context_switch_monitor_stop(void *arg1, void *arg2)
+{
+	struct spdk_reactor *reactor = arg1;
+
+	if (reactor->rusage_poller != NULL) {
+		spdk_poller_unregister(&reactor->rusage_poller);
+	}
+}
+
+static size_t
+_spdk_reactor_get_max_event_cnt(uint8_t socket_count)
+{
+	size_t cnt;
+
+	/* Try to make event ring fill at most 2MB of memory,
+	 * as some ring implementations may require physical address
+	 * contingency. We don't want to introduce a requirement of
+	 * at least 2 physically contiguous 2MB hugepages.
+	 */
+	cnt = spdk_min(262144 / socket_count, 262144 / 2);
+	/* Take into account one extra element required by
+	 * some ring implementations.
+	 */
+	cnt -= 1;
+	return cnt;
+}
+
+void
+spdk_reactor_enable_context_switch_monitor(bool enable)
+{
+	struct spdk_reactor *reactor;
+	spdk_event_fn fn;
+	uint32_t core;
+
+	if (enable != g_context_switch_monitor_enabled) {
+		g_context_switch_monitor_enabled = enable;
+		if (enable) {
+			fn = _spdk_reactor_context_switch_monitor_start;
+		} else {
+			fn = _spdk_reactor_context_switch_monitor_stop;
+		}
+		SPDK_ENV_FOREACH_CORE(core) {
+			reactor = spdk_reactor_get(core);
+			spdk_event_call(spdk_event_allocate(core, fn, reactor, NULL));
+		}
+	}
+}
+
+bool
+spdk_reactor_context_switch_monitor_enabled(void)
+{
+	return g_context_switch_monitor_enabled;
+}
+
+static void
+spdk_reactor_add_tsc_stats(void *arg, int rc, uint64_t now)
+{
+	struct spdk_reactor *reactor = arg;
+	struct spdk_reactor_tsc_stats *tsc_stats = &reactor->tsc_stats;
+
+	if (rc == 0) {
+		/* Poller status idle */
+		tsc_stats->idle_tsc += now - reactor->tsc_last;
+	} else if (rc > 0) {
+		/* Poller status busy */
+		tsc_stats->busy_tsc += now - reactor->tsc_last;
+	} else {
+		/* Poller status unknown */
+		tsc_stats->unknown_tsc += now - reactor->tsc_last;
+	}
+
+	reactor->tsc_last = now;
+}
+
+int
+spdk_reactor_get_tsc_stats(struct spdk_reactor_tsc_stats *tsc_stats, uint32_t core)
+{
+	struct spdk_reactor *reactor;
+
+	if (!spdk_cpuset_get_cpu(g_spdk_app_core_mask, core)) {
+		return -1;
+	}
+
+	reactor = spdk_reactor_get(core);
+	*tsc_stats = reactor->tsc_stats;
+
+	return 0;
+}
+
+/**
+ *
+ * \brief This is the main function of the reactor thread.
+ *
+ * \code
+ *
+ * while (1)
+ *	if (events to run)
+ *		dequeue and run a batch of events
+ *
+ *	if (active pollers)
+ *		run the first poller in the list and move it to the back
+ *
+ *	if (first timer poller has expired)
+ *		run the first timer poller and reinsert it in the timer list
+ *
+ *	if (no action taken and sleep enabled)
+ *		sleep until next timer poller is scheduled to expire
+ * \endcode
+ *
+ */
+static int
+_spdk_reactor_run(void *arg)
+{
+	struct spdk_reactor	*reactor = arg;
+	struct spdk_poller	*poller;
+	uint32_t		event_count;
+	uint64_t		now;
+	uint64_t		sleep_cycles;
+	uint32_t		sleep_us;
+	int			rc = -1;
+	char			thread_name[32];
+
+	snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
+	if (spdk_allocate_thread(_spdk_reactor_send_msg,
+				 _spdk_reactor_start_poller,
+				 _spdk_reactor_stop_poller,
+				 reactor, thread_name) == NULL) {
+		return -1;
+	}
+	SPDK_NOTICELOG("Reactor started on core %u on socket %u\n", reactor->lcore,
+		       reactor->socket_id);
+
+	sleep_cycles = reactor->max_delay_us * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
+	if (g_context_switch_monitor_enabled) {
+		_spdk_reactor_context_switch_monitor_start(reactor, NULL);
+	}
+	now = spdk_get_ticks();
+	reactor->tsc_last = now;
+
+	while (1) {
+		bool took_action = false;
+
+		event_count = _spdk_event_queue_run_batch(reactor);
+		if (event_count > 0) {
+			rc = 1;
+			now = spdk_get_ticks();
+			spdk_reactor_add_tsc_stats(reactor, rc, now);
+			took_action = true;
+		}
+
+		poller = TAILQ_FIRST(&reactor->active_pollers);
+		if (poller) {
+			TAILQ_REMOVE(&reactor->active_pollers, poller, tailq);
+			poller->state = SPDK_POLLER_STATE_RUNNING;
+			rc = poller->fn(poller->arg);
+			now = spdk_get_ticks();
+			spdk_reactor_add_tsc_stats(reactor, rc, now);
+			if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
+				free(poller);
+			} else {
+				poller->state = SPDK_POLLER_STATE_WAITING;
+				TAILQ_INSERT_TAIL(&reactor->active_pollers, poller, tailq);
+			}
+			took_action = true;
+		}
+
+		poller = TAILQ_FIRST(&reactor->timer_pollers);
+		if (poller) {
+			if (took_action == false) {
+				now = spdk_get_ticks();
+			}
+
+			if (now >= poller->next_run_tick) {
+				uint64_t tmp_timer_tsc;
+
+				TAILQ_REMOVE(&reactor->timer_pollers, poller, tailq);
+				poller->state = SPDK_POLLER_STATE_RUNNING;
+				rc = poller->fn(poller->arg);
+				/* Save the tsc value from before poller->fn was executed. We want to
+				 * use the current time for idle/busy tsc value accounting, but want to
+				 * use the older time to reinsert to the timer poller below. */
+				tmp_timer_tsc = now;
+				now = spdk_get_ticks();
+				spdk_reactor_add_tsc_stats(reactor, rc, now);
+				if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
+					free(poller);
+				} else {
+					poller->state = SPDK_POLLER_STATE_WAITING;
+					_spdk_poller_insert_timer(reactor, poller, tmp_timer_tsc);
+				}
+				took_action = true;
+			}
+		}
+
+		/* Determine if the thread can sleep */
+		if (sleep_cycles && !took_action) {
+			now = spdk_get_ticks();
+			sleep_us = reactor->max_delay_us;
+
+			poller = TAILQ_FIRST(&reactor->timer_pollers);
+			if (poller) {
+				/* There are timers registered, so don't sleep beyond
+				 * when the next timer should fire */
+				if (poller->next_run_tick < (now + sleep_cycles)) {
+					if (poller->next_run_tick <= now) {
+						sleep_us = 0;
+					} else {
+						sleep_us = ((poller->next_run_tick - now) *
+							    SPDK_SEC_TO_USEC) / spdk_get_ticks_hz();
+					}
+				}
+			}
+
+			if (sleep_us > 0) {
+				usleep(sleep_us);
+			}
+		}
+
+		if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
+			break;
+		}
+	}
+
+	_spdk_reactor_context_switch_monitor_stop(reactor, NULL);
+	spdk_free_thread();
+	return 0;
+}
+
+static void
+spdk_reactor_construct(struct spdk_reactor *reactor, uint32_t lcore, uint64_t max_delay_us)
+{
+	reactor->lcore = lcore;
+	reactor->socket_id = spdk_env_get_socket_id(lcore);
+	assert(reactor->socket_id < SPDK_MAX_SOCKET);
+	reactor->max_delay_us = max_delay_us;
+
+	TAILQ_INIT(&reactor->active_pollers);
+	TAILQ_INIT(&reactor->timer_pollers);
+
+	reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, reactor->socket_id);
+	if (!reactor->events) {
+		SPDK_NOTICELOG("Ring creation failed on preferred socket %d. Try other sockets.\n",
+			       reactor->socket_id);
+
+		reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536,
+						   SPDK_ENV_SOCKET_ID_ANY);
+	}
+	assert(reactor->events != NULL);
+
+	reactor->event_mempool = g_spdk_event_mempool[reactor->socket_id];
+}
+
+int
+spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
+{
+	int ret;
+	struct spdk_cpuset *validmask;
+
+	ret = spdk_cpuset_parse(cpumask, mask);
+	if (ret < 0) {
+		return ret;
+	}
+
+	validmask = spdk_app_get_core_mask();
+	spdk_cpuset_and(cpumask, validmask);
+
+	return 0;
+}
+
+struct spdk_cpuset *
+spdk_app_get_core_mask(void)
+{
+	return g_spdk_app_core_mask;
+}
+
+
+static uint64_t
+spdk_reactor_get_socket_mask(void)
+{
+	uint32_t i;
+	uint32_t socket_id;
+	uint64_t socket_info = 0;
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		socket_id = spdk_env_get_socket_id(i);
+		socket_info |= (1ULL << socket_id);
+	}
+
+	return socket_info;
+}
+
+void
+spdk_reactors_start(void)
+{
+	struct spdk_reactor *reactor;
+	uint32_t i, current_core;
+	int rc;
+
+	g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
+	g_spdk_app_core_mask = spdk_cpuset_alloc();
+
+	current_core = spdk_env_get_current_core();
+	SPDK_ENV_FOREACH_CORE(i) {
+		if (i != current_core) {
+			reactor = spdk_reactor_get(i);
+			rc = spdk_env_thread_launch_pinned(reactor->lcore, _spdk_reactor_run, reactor);
+			if (rc < 0) {
+				SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
+				assert(false);
+				return;
+			}
+		}
+		spdk_cpuset_set_cpu(g_spdk_app_core_mask, i, true);
+	}
+
+	/* Start the master reactor */
+	reactor = spdk_reactor_get(current_core);
+	_spdk_reactor_run(reactor);
+
+	spdk_env_thread_wait_all();
+
+	g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
+	spdk_cpuset_free(g_spdk_app_core_mask);
+	g_spdk_app_core_mask = NULL;
+}
+
+void
+spdk_reactors_stop(void *arg1, void *arg2)
+{
+	g_reactor_state = SPDK_REACTOR_STATE_EXITING;
+}
+
+int
+spdk_reactors_init(unsigned int max_delay_us)
+{
+	int rc;
+	uint32_t i, j, last_core;
+	struct spdk_reactor *reactor;
+	uint64_t socket_mask = 0x0;
+	uint8_t socket_count = 0;
+	char mempool_name[32];
+
+	socket_mask = spdk_reactor_get_socket_mask();
+	SPDK_NOTICELOG("Occupied cpu socket mask is 0x%lx\n", socket_mask);
+
+	for (i = 0; i < SPDK_MAX_SOCKET; i++) {
+		if ((1ULL << i) & socket_mask) {
+			socket_count++;
+		}
+	}
+	if (socket_count == 0) {
+		SPDK_ERRLOG("No sockets occupied (internal error)\n");
+		return -1;
+	}
+
+	for (i = 0; i < SPDK_MAX_SOCKET; i++) {
+		if ((1ULL << i) & socket_mask) {
+			snprintf(mempool_name, sizeof(mempool_name), "evtpool%d_%d", i, getpid());
+			g_spdk_event_mempool[i] = spdk_mempool_create(mempool_name,
+						  _spdk_reactor_get_max_event_cnt(socket_count),
+						  sizeof(struct spdk_event),
+						  SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, i);
+
+			if (g_spdk_event_mempool[i] == NULL) {
+				SPDK_NOTICELOG("Event_mempool creation failed on preferred socket %d.\n", i);
+
+				/*
+				 * Instead of failing the operation directly, try to create
+				 * the mempool on any available sockets in the case that
+				 * memory is not evenly installed on all sockets. If still
+				 * fails, free all allocated memory and exits.
+				 */
+				g_spdk_event_mempool[i] = spdk_mempool_create(
+								  mempool_name,
+								  _spdk_reactor_get_max_event_cnt(socket_count),
+								  sizeof(struct spdk_event),
+								  SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+								  SPDK_ENV_SOCKET_ID_ANY);
+
+				if (g_spdk_event_mempool[i] == NULL) {
+					for (j = i - 1; j < i; j--) {
+						if (g_spdk_event_mempool[j] != NULL) {
+							spdk_mempool_free(g_spdk_event_mempool[j]);
+						}
+					}
+					SPDK_ERRLOG("spdk_event_mempool creation failed\n");
+					return -1;
+				}
+			}
+		} else {
+			g_spdk_event_mempool[i] = NULL;
+		}
+	}
+
+	/* struct spdk_reactor must be aligned on 64 byte boundary */
+	last_core = spdk_env_get_last_core();
+	rc = posix_memalign((void **)&g_reactors, 64,
+			    (last_core + 1) * sizeof(struct spdk_reactor));
+	if (rc != 0) {
+		SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
+			    last_core + 1);
+		for (i = 0; i < SPDK_MAX_SOCKET; i++) {
+			if (g_spdk_event_mempool[i] != NULL) {
+				spdk_mempool_free(g_spdk_event_mempool[i]);
+			}
+		}
+		return -1;
+	}
+
+	memset(g_reactors, 0, (last_core + 1) * sizeof(struct spdk_reactor));
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		reactor = spdk_reactor_get(i);
+		spdk_reactor_construct(reactor, i, max_delay_us);
+	}
+
+	g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
+
+	return 0;
+}
+
+void
+spdk_reactors_fini(void)
+{
+	uint32_t i;
+	struct spdk_reactor *reactor;
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		reactor = spdk_reactor_get(i);
+		if (spdk_likely(reactor != NULL) && reactor->events != NULL) {
+			spdk_ring_free(reactor->events);
+		}
+	}
+
+	for (i = 0; i < SPDK_MAX_SOCKET; i++) {
+		if (g_spdk_event_mempool[i] != NULL) {
+			spdk_mempool_free(g_spdk_event_mempool[i]);
+		}
+	}
+
+	free(g_reactors);
+	g_reactors = NULL;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("reactor", SPDK_LOG_REACTOR)
diff --git a/src/spdk/lib/event/rpc.c b/src/spdk/lib/event/rpc.c
new file mode 100644
index 00000000..f8414349
--- /dev/null
+++ b/src/spdk/lib/event/rpc.c
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+
+#include "spdk_internal/event.h"
+
+#define RPC_SELECT_INTERVAL	4000 /* 4ms */
+
+static struct spdk_poller *g_rpc_poller = NULL;
+
+static int
+spdk_rpc_subsystem_poll(void *arg)
+{
+	spdk_rpc_accept();
+	return -1;
+}
+
+void
+spdk_rpc_initialize(const char *listen_addr)
+{
+	int rc;
+
+	if (listen_addr == NULL) {
+		return;
+	}
+
+	/* Listen on the requested address */
+	rc = spdk_rpc_listen(listen_addr);
+	if (rc != 0) {
+		SPDK_ERRLOG("Unable to start RPC service at %s\n", listen_addr);
+		return;
+	}
+
+	spdk_rpc_set_state(SPDK_RPC_STARTUP);
+
+	/* Register a poller to periodically check for RPCs */
+	g_rpc_poller = spdk_poller_register(spdk_rpc_subsystem_poll, NULL, RPC_SELECT_INTERVAL);
+}
+
+void
+spdk_rpc_finish(void)
+{
+	spdk_rpc_close();
+	spdk_poller_unregister(&g_rpc_poller);
+}
diff --git a/src/spdk/lib/event/rpc/Makefile b/src/spdk/lib/event/rpc/Makefile
new file mode 100644
index 00000000..fcba526a
--- /dev/null
+++ b/src/spdk/lib/event/rpc/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = app_rpc.c subsystem_rpc.c
+LIBNAME = app_rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/rpc/app_rpc.c b/src/spdk/lib/event/rpc/app_rpc.c
new file mode 100644
index 00000000..95cb0d2a
--- /dev/null
+++ b/src/spdk/lib/event/rpc/app_rpc.c
@@ -0,0 +1,155 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/event.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_kill_instance {
+	char *sig_name;
+};
+
+static void
+free_rpc_kill_instance(struct rpc_kill_instance *req)
+{
+	free(req->sig_name);
+}
+
+static const struct spdk_json_object_decoder rpc_kill_instance_decoders[] = {
+	{"sig_name", offsetof(struct rpc_kill_instance, sig_name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_kill_instance(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	static const struct {
+		const char	*signal_string;
+		int32_t		signal;
+	} signals[] = {
+		{"SIGINT",	SIGINT},
+		{"SIGTERM",	SIGTERM},
+		{"SIGQUIT",	SIGQUIT},
+		{"SIGHUP",	SIGHUP},
+		{"SIGKILL",	SIGKILL},
+	};
+	size_t i, sig_count;
+	int signal;
+	struct rpc_kill_instance req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_kill_instance_decoders,
+				    SPDK_COUNTOF(rpc_kill_instance_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_REACTOR, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	sig_count = SPDK_COUNTOF(signals);
+	signal = atoi(req.sig_name);
+	for (i = 0 ; i < sig_count; i++) {
+		if (strcmp(req.sig_name, signals[i].signal_string) == 0 ||
+		    signal == signals[i].signal) {
+			break;
+		}
+	}
+
+	if (i == sig_count) {
+		goto invalid;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_REACTOR, "sending signal %d\n", signals[i].signal);
+	free_rpc_kill_instance(&req);
+	kill(getpid(), signals[i].signal);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_kill_instance(&req);
+}
+SPDK_RPC_REGISTER("kill_instance", spdk_rpc_kill_instance, SPDK_RPC_RUNTIME)
+
+
+struct rpc_context_switch_monitor {
+	bool enabled;
+};
+
+static const struct spdk_json_object_decoder rpc_context_switch_monitor_decoders[] = {
+	{"enabled", offsetof(struct rpc_context_switch_monitor, enabled), spdk_json_decode_bool},
+};
+
+static void
+spdk_rpc_context_switch_monitor(struct spdk_jsonrpc_request *request,
+				const struct spdk_json_val *params)
+{
+	struct rpc_context_switch_monitor req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_context_switch_monitor_decoders,
+					    SPDK_COUNTOF(rpc_context_switch_monitor_decoders),
+					    &req)) {
+			SPDK_DEBUGLOG(SPDK_LOG_REACTOR, "spdk_json_decode_object failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+			return;
+		}
+
+		spdk_reactor_enable_context_switch_monitor(req.enabled);
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "enabled");
+	spdk_json_write_bool(w, spdk_reactor_context_switch_monitor_enabled());
+
+	spdk_json_write_object_end(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+SPDK_RPC_REGISTER("context_switch_monitor", spdk_rpc_context_switch_monitor, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/event/rpc/subsystem_rpc.c b/src/spdk/lib/event/rpc/subsystem_rpc.c
new file mode 100644
index 00000000..1b83990f
--- /dev/null
+++ b/src/spdk/lib/event/rpc/subsystem_rpc.c
@@ -0,0 +1,129 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk_internal/event.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/env.h"
+
+static void
+spdk_rpc_get_subsystems(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_subsystem *subsystem;
+	struct spdk_subsystem_depend *deps;
+
+	if (params) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "'get_subsystems' requires no arguments");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	TAILQ_FOREACH(subsystem, &g_subsystems, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "subsystem", subsystem->name);
+		spdk_json_write_named_array_begin(w, "depends_on");
+		TAILQ_FOREACH(deps, &g_subsystems_deps, tailq) {
+			if (strcmp(subsystem->name, deps->name) == 0) {
+				spdk_json_write_string(w, deps->depends_on);
+			}
+		}
+		spdk_json_write_array_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+SPDK_RPC_REGISTER("get_subsystems", spdk_rpc_get_subsystems, SPDK_RPC_RUNTIME)
+
+struct rpc_get_subsystem_config {
+	char *name;
+};
+
+static const struct spdk_json_object_decoder rpc_get_subsystem_config[] = {
+	{"name", offsetof(struct rpc_get_subsystem_config, name), spdk_json_decode_string},
+};
+
+static void
+rpc_get_subsystem_config_done(void *arg1, void *arg2)
+{
+	struct spdk_jsonrpc_request *request = arg1;
+	struct spdk_json_write_ctx *w = arg2;
+
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_get_subsystem_config(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_get_subsystem_config req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_subsystem *subsystem;
+	struct spdk_event *ev;
+
+	if (spdk_json_decode_object(params, rpc_get_subsystem_config,
+				    SPDK_COUNTOF(rpc_get_subsystem_config), &req)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid arguments");
+		return;
+	}
+
+	subsystem = spdk_subsystem_find(&g_subsystems, req.name);
+	if (!subsystem) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Subsystem '%s' not found", req.name);
+		free(req.name);
+		return;
+	}
+
+	free(req.name);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w) {
+		ev = spdk_event_allocate(spdk_env_get_current_core(), rpc_get_subsystem_config_done, request, w);
+		spdk_subsystem_config_json(w, subsystem, ev);
+	}
+}
+
+SPDK_RPC_REGISTER("get_subsystem_config", spdk_rpc_get_subsystem_config, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/event/subsystem.c b/src/spdk/lib/event/subsystem.c
new file mode 100644
index 00000000..438e7f54
--- /dev/null
+++ b/src/spdk/lib/event/subsystem.c
@@ -0,0 +1,256 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/log.h"
+
+#include "spdk_internal/event.h"
+#include "spdk/env.h"
+
+struct spdk_subsystem_list g_subsystems = TAILQ_HEAD_INITIALIZER(g_subsystems);
+struct spdk_subsystem_depend_list g_subsystems_deps = TAILQ_HEAD_INITIALIZER(g_subsystems_deps);
+static struct spdk_subsystem *g_next_subsystem;
+static bool g_subsystems_initialized = false;
+static struct spdk_event *g_app_start_event;
+static struct spdk_event *g_app_stop_event;
+static uint32_t g_fini_core;
+
+void
+spdk_add_subsystem(struct spdk_subsystem *subsystem)
+{
+	TAILQ_INSERT_TAIL(&g_subsystems, subsystem, tailq);
+}
+
+void
+spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend)
+{
+	TAILQ_INSERT_TAIL(&g_subsystems_deps, depend, tailq);
+}
+
+struct spdk_subsystem *
+spdk_subsystem_find(struct spdk_subsystem_list *list, const char *name)
+{
+	struct spdk_subsystem *iter;
+
+	TAILQ_FOREACH(iter, list, tailq) {
+		if (strcmp(name, iter->name) == 0) {
+			return iter;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+subsystem_sort(void)
+{
+	bool depends_on, depends_on_sorted;
+	struct spdk_subsystem *subsystem, *subsystem_tmp;
+	struct spdk_subsystem_depend *subsystem_dep;
+
+	struct spdk_subsystem_list subsystems_list = TAILQ_HEAD_INITIALIZER(subsystems_list);
+
+	while (!TAILQ_EMPTY(&g_subsystems)) {
+		TAILQ_FOREACH_SAFE(subsystem, &g_subsystems, tailq, subsystem_tmp) {
+			depends_on = false;
+			TAILQ_FOREACH(subsystem_dep, &g_subsystems_deps, tailq) {
+				if (strcmp(subsystem->name, subsystem_dep->name) == 0) {
+					depends_on = true;
+					depends_on_sorted = !!spdk_subsystem_find(&subsystems_list, subsystem_dep->depends_on);
+					if (depends_on_sorted) {
+						continue;
+					}
+					break;
+				}
+			}
+
+			if (depends_on == false) {
+				TAILQ_REMOVE(&g_subsystems, subsystem, tailq);
+				TAILQ_INSERT_TAIL(&subsystems_list, subsystem, tailq);
+			} else {
+				if (depends_on_sorted == true) {
+					TAILQ_REMOVE(&g_subsystems, subsystem, tailq);
+					TAILQ_INSERT_TAIL(&subsystems_list, subsystem, tailq);
+				}
+			}
+		}
+	}
+
+	TAILQ_FOREACH_SAFE(subsystem, &subsystems_list, tailq, subsystem_tmp) {
+		TAILQ_REMOVE(&subsystems_list, subsystem, tailq);
+		TAILQ_INSERT_TAIL(&g_subsystems, subsystem, tailq);
+	}
+}
+
+void
+spdk_subsystem_init_next(int rc)
+{
+	if (rc) {
+		SPDK_ERRLOG("Init subsystem %s failed\n", g_next_subsystem->name);
+		spdk_app_stop(rc);
+		return;
+	}
+
+	if (!g_next_subsystem) {
+		g_next_subsystem = TAILQ_FIRST(&g_subsystems);
+	} else {
+		g_next_subsystem = TAILQ_NEXT(g_next_subsystem, tailq);
+	}
+
+	if (!g_next_subsystem) {
+		g_subsystems_initialized = true;
+		spdk_event_call(g_app_start_event);
+		return;
+	}
+
+	if (g_next_subsystem->init) {
+		g_next_subsystem->init();
+	} else {
+		spdk_subsystem_init_next(0);
+	}
+}
+
+static void
+spdk_subsystem_verify(void *arg1, void *arg2)
+{
+	struct spdk_subsystem_depend *dep;
+
+	/* Verify that all dependency name and depends_on subsystems are registered */
+	TAILQ_FOREACH(dep, &g_subsystems_deps, tailq) {
+		if (!spdk_subsystem_find(&g_subsystems, dep->name)) {
+			SPDK_ERRLOG("subsystem %s is missing\n", dep->name);
+			spdk_app_stop(-1);
+			return;
+		}
+		if (!spdk_subsystem_find(&g_subsystems, dep->depends_on)) {
+			SPDK_ERRLOG("subsystem %s dependency %s is missing\n",
+				    dep->name, dep->depends_on);
+			spdk_app_stop(-1);
+			return;
+		}
+	}
+
+	subsystem_sort();
+
+	spdk_subsystem_init_next(0);
+}
+
+void
+spdk_subsystem_init(struct spdk_event *app_start_event)
+{
+	struct spdk_event *verify_event;
+
+	g_app_start_event = app_start_event;
+
+	verify_event = spdk_event_allocate(spdk_env_get_current_core(), spdk_subsystem_verify, NULL, NULL);
+	spdk_event_call(verify_event);
+}
+
+static void
+_spdk_subsystem_fini_next(void *arg1, void *arg2)
+{
+	assert(g_fini_core == spdk_env_get_current_core());
+
+	if (!g_next_subsystem) {
+		/* If the initialized flag is false, then we've failed to initialize
+		 * the very first subsystem and no de-init is needed
+		 */
+		if (g_subsystems_initialized) {
+			g_next_subsystem = TAILQ_LAST(&g_subsystems, spdk_subsystem_list);
+		}
+	} else {
+		/* We rewind the g_next_subsystem unconditionally - even when some subsystem failed
+		 * to initialize. It is assumed that subsystem which failed to initialize does not
+		 * need to be deinitialized.
+		 */
+		g_next_subsystem = TAILQ_PREV(g_next_subsystem, spdk_subsystem_list, tailq);
+	}
+
+	while (g_next_subsystem) {
+		if (g_next_subsystem->fini) {
+			g_next_subsystem->fini();
+			return;
+		}
+		g_next_subsystem = TAILQ_PREV(g_next_subsystem, spdk_subsystem_list, tailq);
+	}
+
+	spdk_event_call(g_app_stop_event);
+	return;
+}
+
+void
+spdk_subsystem_fini_next(void)
+{
+	if (g_fini_core != spdk_env_get_current_core()) {
+		struct spdk_event *event;
+
+		event = spdk_event_allocate(g_fini_core, _spdk_subsystem_fini_next, NULL, NULL);
+		spdk_event_call(event);
+	} else {
+		_spdk_subsystem_fini_next(NULL, NULL);
+	}
+}
+
+void
+spdk_subsystem_fini(struct spdk_event *app_stop_event)
+{
+	g_app_stop_event = app_stop_event;
+	g_fini_core = spdk_env_get_current_core();
+
+	spdk_subsystem_fini_next();
+}
+
+void
+spdk_subsystem_config(FILE *fp)
+{
+	struct spdk_subsystem *subsystem;
+
+	TAILQ_FOREACH(subsystem, &g_subsystems, tailq) {
+		if (subsystem->config) {
+			subsystem->config(fp);
+		}
+	}
+}
+
+void
+spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem,
+			   struct spdk_event *done_ev)
+{
+	if (subsystem && subsystem->write_config_json) {
+		subsystem->write_config_json(w, done_ev);
+	} else {
+		spdk_json_write_null(w);
+		spdk_event_call(done_ev);
+	}
+}
diff --git a/src/spdk/lib/event/subsystems/Makefile b/src/spdk/lib/event/subsystems/Makefile
new file mode 100644
index 00000000..4a19160b
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/Makefile
@@ -0,0 +1,44 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+DIRS-y += bdev copy iscsi nbd net nvmf scsi vhost
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk
diff --git a/src/spdk/lib/event/subsystems/bdev/Makefile b/src/spdk/lib/event/subsystems/bdev/Makefile
new file mode 100644
index 00000000..1747b759
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/bdev/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev.c bdev_rpc.c
+LIBNAME = event_bdev
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/bdev/bdev.c b/src/spdk/lib/event/subsystems/bdev/bdev.c
new file mode 100644
index 00000000..5999d612
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/bdev/bdev.c
@@ -0,0 +1,83 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+
+#include "spdk_internal/event.h"
+#include "spdk/env.h"
+
+static void
+spdk_bdev_initialize_complete(void *cb_arg, int rc)
+{
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_bdev_subsystem_initialize(void)
+{
+	spdk_bdev_initialize(spdk_bdev_initialize_complete, NULL);
+}
+
+static void
+spdk_bdev_subsystem_finish_done(void *cb_arg)
+{
+	spdk_subsystem_fini_next();
+}
+
+static void
+spdk_bdev_subsystem_finish(void)
+{
+	spdk_bdev_finish(spdk_bdev_subsystem_finish_done, NULL);
+}
+
+static void
+_spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev)
+{
+	spdk_bdev_subsystem_config_json(w);
+	spdk_event_call(done_ev);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_bdev = {
+	.name = "bdev",
+	.init = spdk_bdev_subsystem_initialize,
+	.fini = spdk_bdev_subsystem_finish,
+	.config = spdk_bdev_config_text,
+	.write_config_json = _spdk_bdev_subsystem_config_json,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_bdev);
+SPDK_SUBSYSTEM_DEPEND(bdev, copy)
diff --git a/src/spdk/lib/event/subsystems/bdev/bdev_rpc.c b/src/spdk/lib/event/subsystems/bdev/bdev_rpc.c
new file mode 100644
index 00000000..69ead5f2
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/bdev/bdev_rpc.c
@@ -0,0 +1,97 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/bdev.h"
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+struct spdk_rpc_set_bdev_opts {
+	uint32_t bdev_io_pool_size;
+	uint32_t bdev_io_cache_size;
+};
+
+static const struct spdk_json_object_decoder rpc_set_bdev_opts_decoders[] = {
+	{"bdev_io_pool_size", offsetof(struct spdk_rpc_set_bdev_opts, bdev_io_pool_size), spdk_json_decode_uint32, true},
+	{"bdev_io_cache_size", offsetof(struct spdk_rpc_set_bdev_opts, bdev_io_cache_size), spdk_json_decode_uint32, true},
+};
+
+static void
+spdk_rpc_set_bdev_opts(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+	struct spdk_rpc_set_bdev_opts rpc_opts;
+	struct spdk_bdev_opts bdev_opts;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	rpc_opts.bdev_io_pool_size = UINT32_MAX;
+	rpc_opts.bdev_io_cache_size = UINT32_MAX;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_set_bdev_opts_decoders,
+					    SPDK_COUNTOF(rpc_set_bdev_opts_decoders), &rpc_opts)) {
+			SPDK_ERRLOG("spdk_json_decode_object() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+	}
+
+	spdk_bdev_get_opts(&bdev_opts);
+	if (rpc_opts.bdev_io_pool_size != UINT32_MAX) {
+		bdev_opts.bdev_io_pool_size = rpc_opts.bdev_io_pool_size;
+	}
+	if (rpc_opts.bdev_io_cache_size != UINT32_MAX) {
+		bdev_opts.bdev_io_cache_size = rpc_opts.bdev_io_cache_size;
+	}
+	rc = spdk_bdev_set_opts(&bdev_opts);
+
+	if (rc != 0) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Pool size %" PRIu32 " too small for cache size %" PRIu32,
+						     bdev_opts.bdev_io_pool_size, bdev_opts.bdev_io_cache_size);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_bdev_options", spdk_rpc_set_bdev_opts, SPDK_RPC_STARTUP)
diff --git a/src/spdk/lib/event/subsystems/copy/Makefile b/src/spdk/lib/event/subsystems/copy/Makefile
new file mode 100644
index 00000000..691eee2a
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/copy/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = copy.c
+LIBNAME = event_copy
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/copy/copy.c b/src/spdk/lib/event/subsystems/copy/copy.c
new file mode 100644
index 00000000..9bc6e281
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/copy/copy.c
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/copy_engine.h"
+
+#include "spdk_internal/event.h"
+#include "spdk/env.h"
+
+static void
+spdk_copy_engine_subsystem_initialize(void)
+{
+	int rc;
+
+	rc = spdk_copy_engine_initialize();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_copy_engine_subsystem_finish_done(void *cb_arg)
+{
+	spdk_subsystem_fini_next();
+}
+
+static void
+spdk_copy_engine_subsystem_finish(void)
+{
+	spdk_copy_engine_finish(spdk_copy_engine_subsystem_finish_done, NULL);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_copy = {
+	.name = "copy",
+	.init = spdk_copy_engine_subsystem_initialize,
+	.fini = spdk_copy_engine_subsystem_finish,
+	.config = spdk_copy_engine_config_text,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_copy);
diff --git a/src/spdk/lib/event/subsystems/iscsi/Makefile b/src/spdk/lib/event/subsystems/iscsi/Makefile
new file mode 100644
index 00000000..f57d9f9c
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/iscsi/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib
+C_SRCS = iscsi.c iscsi_rpc.c
+LIBNAME = event_iscsi
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/iscsi/iscsi.c b/src/spdk/lib/event/subsystems/iscsi/iscsi.c
new file mode 100644
index 00000000..72750398
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/iscsi/iscsi.c
@@ -0,0 +1,81 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "iscsi/iscsi.h"
+
+#include "spdk_internal/event.h"
+
+static void
+spdk_iscsi_subsystem_init_complete(void *cb_arg, int rc)
+{
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_iscsi_subsystem_init(void)
+{
+	spdk_iscsi_init(spdk_iscsi_subsystem_init_complete, NULL);
+}
+
+static void
+spdk_iscsi_subsystem_fini_done(void *arg)
+{
+	spdk_subsystem_fini_next();
+}
+
+static void
+spdk_iscsi_subsystem_fini(void)
+{
+	spdk_iscsi_fini(spdk_iscsi_subsystem_fini_done, NULL);
+}
+
+static void
+spdk_iscsi_subsystem_config_json(struct spdk_json_write_ctx *w,
+				 struct spdk_event *done_ev)
+{
+	spdk_iscsi_config_json(w);
+	spdk_event_call(done_ev);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_iscsi = {
+	.name = "iscsi",
+	.init = spdk_iscsi_subsystem_init,
+	.fini = spdk_iscsi_subsystem_fini,
+	.config = spdk_iscsi_config_text,
+	.write_config_json = spdk_iscsi_subsystem_config_json,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_iscsi);
+SPDK_SUBSYSTEM_DEPEND(iscsi, scsi)
diff --git a/src/spdk/lib/event/subsystems/iscsi/iscsi_rpc.c b/src/spdk/lib/event/subsystems/iscsi/iscsi_rpc.c
new file mode 100644
index 00000000..fb96be07
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/iscsi/iscsi_rpc.c
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "iscsi/iscsi.h"
+#include "iscsi/conn.h"
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/event.h"
+
+#include "spdk_internal/log.h"
+
+static const struct spdk_json_object_decoder rpc_set_iscsi_opts_decoders[] = {
+	{"auth_file", offsetof(struct spdk_iscsi_opts, authfile), spdk_json_decode_string, true},
+	{"node_base", offsetof(struct spdk_iscsi_opts, nodebase), spdk_json_decode_string, true},
+	{"nop_timeout", offsetof(struct spdk_iscsi_opts, timeout), spdk_json_decode_int32, true},
+	{"nop_in_interval", offsetof(struct spdk_iscsi_opts, nopininterval), spdk_json_decode_int32, true},
+	{"no_discovery_auth", offsetof(struct spdk_iscsi_opts, disable_chap), spdk_json_decode_bool, true},
+	{"req_discovery_auth", offsetof(struct spdk_iscsi_opts, require_chap), spdk_json_decode_bool, true},
+	{"req_discovery_auth_mutual", offsetof(struct spdk_iscsi_opts, mutual_chap), spdk_json_decode_bool, true},
+	{"discovery_auth_group", offsetof(struct spdk_iscsi_opts, chap_group), spdk_json_decode_int32, true},
+	{"disable_chap", offsetof(struct spdk_iscsi_opts, disable_chap), spdk_json_decode_bool, true},
+	{"require_chap", offsetof(struct spdk_iscsi_opts, require_chap), spdk_json_decode_bool, true},
+	{"mutual_chap", offsetof(struct spdk_iscsi_opts, mutual_chap), spdk_json_decode_bool, true},
+	{"chap_group", offsetof(struct spdk_iscsi_opts, chap_group), spdk_json_decode_int32, true},
+	{"max_sessions", offsetof(struct spdk_iscsi_opts, MaxSessions), spdk_json_decode_uint32, true},
+	{"max_queue_depth", offsetof(struct spdk_iscsi_opts, MaxQueueDepth), spdk_json_decode_uint32, true},
+	{"max_connections_per_session", offsetof(struct spdk_iscsi_opts, MaxConnectionsPerSession), spdk_json_decode_uint32, true},
+	{"default_time2wait", offsetof(struct spdk_iscsi_opts, DefaultTime2Wait), spdk_json_decode_uint32, true},
+	{"default_time2retain", offsetof(struct spdk_iscsi_opts, DefaultTime2Retain), spdk_json_decode_uint32, true},
+	{"first_burst_length", offsetof(struct spdk_iscsi_opts, FirstBurstLength), spdk_json_decode_uint32, true},
+	{"immediate_data", offsetof(struct spdk_iscsi_opts, ImmediateData), spdk_json_decode_bool, true},
+	{"error_recovery_level", offsetof(struct spdk_iscsi_opts, ErrorRecoveryLevel), spdk_json_decode_uint32, true},
+	{"allow_duplicated_isid", offsetof(struct spdk_iscsi_opts, AllowDuplicateIsid), spdk_json_decode_bool, true},
+	{"min_connections_per_core", offsetof(struct spdk_iscsi_opts, min_connections_per_core), spdk_json_decode_uint32, true},
+};
+
+static void
+spdk_rpc_iscsi_set_opts(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct spdk_iscsi_opts *opts;
+	struct spdk_json_write_ctx *w;
+
+	if (g_spdk_iscsi_opts != NULL) {
+		SPDK_ERRLOG("this RPC must not be called more than once.\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Must not call more than once");
+		return;
+	}
+
+	opts = spdk_iscsi_opts_alloc();
+	if (opts == NULL) {
+		SPDK_ERRLOG("spdk_iscsi_opts_alloc() failed.\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Out of memory");
+		return;
+	}
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_set_iscsi_opts_decoders,
+					    SPDK_COUNTOF(rpc_set_iscsi_opts_decoders), opts)) {
+			SPDK_ERRLOG("spdk_json_decode_object() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			spdk_iscsi_opts_free(opts);
+			return;
+		}
+	}
+
+	g_spdk_iscsi_opts = spdk_iscsi_opts_copy(opts);
+	spdk_iscsi_opts_free(opts);
+
+	if (g_spdk_iscsi_opts == NULL) {
+		SPDK_ERRLOG("spdk_iscsi_opts_copy() failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Out of memory");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_iscsi_options", spdk_rpc_iscsi_set_opts, SPDK_RPC_STARTUP)
diff --git a/src/spdk/lib/event/subsystems/nbd/Makefile b/src/spdk/lib/event/subsystems/nbd/Makefile
new file mode 100644
index 00000000..92d99f15
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nbd/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = nbd.c
+LIBNAME = event_nbd
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/nbd/nbd.c b/src/spdk/lib/event/subsystems/nbd/nbd.c
new file mode 100644
index 00000000..a943eb82
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nbd/nbd.c
@@ -0,0 +1,74 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nbd.h"
+
+#include "spdk_internal/event.h"
+
+static void
+spdk_nbd_subsystem_init(void)
+{
+	int rc;
+
+	rc = spdk_nbd_init();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_nbd_subsystem_fini(void)
+{
+	spdk_nbd_fini();
+	spdk_subsystem_fini_next();
+}
+
+static void
+spdk_nbd_subsystem_write_config_json(struct spdk_json_write_ctx *w,
+				     struct spdk_event *done_ev)
+{
+	spdk_nbd_write_config_json(w);
+	spdk_event_call(done_ev);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_nbd = {
+	.name = "nbd",
+	.init = spdk_nbd_subsystem_init,
+	.fini = spdk_nbd_subsystem_fini,
+	.config = NULL,
+	.write_config_json = spdk_nbd_subsystem_write_config_json,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nbd);
+SPDK_SUBSYSTEM_DEPEND(nbd, bdev)
diff --git a/src/spdk/lib/event/subsystems/net/Makefile b/src/spdk/lib/event/subsystems/net/Makefile
new file mode 100644
index 00000000..cf81f07b
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/net/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = net.c
+LIBNAME = event_net
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/net/net.c b/src/spdk/lib/event/subsystems/net/net.c
new file mode 100644
index 00000000..9355514f
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/net/net.c
@@ -0,0 +1,91 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/net.h"
+
+#include "spdk_internal/event.h"
+
+static void
+spdk_interface_subsystem_init(void)
+{
+	int rc;
+
+	rc = spdk_interface_init();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_interface_subsystem_destroy(void)
+{
+	spdk_interface_destroy();
+	spdk_subsystem_fini_next();
+}
+
+static struct spdk_subsystem g_spdk_subsystem_interface = {
+	.name = "interface",
+	.init = spdk_interface_subsystem_init,
+	.fini = spdk_interface_subsystem_destroy,
+	.config = NULL,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_interface);
+
+static void
+spdk_net_subsystem_start(void)
+{
+	int rc;
+
+	rc = spdk_net_framework_start();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_net_subsystem_fini(void)
+{
+	spdk_net_framework_fini();
+	spdk_subsystem_fini_next();
+}
+
+static struct spdk_subsystem g_spdk_subsystem_net_framework = {
+	.name = "net_framework",
+	.init = spdk_net_subsystem_start,
+	.fini = spdk_net_subsystem_fini,
+	.config = NULL,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_net_framework);
+SPDK_SUBSYSTEM_DEPEND(net_framework, interface)
diff --git a/src/spdk/lib/event/subsystems/nvmf/Makefile b/src/spdk/lib/event/subsystems/nvmf/Makefile
new file mode 100644
index 00000000..eca62e25
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = conf.c nvmf_rpc.c nvmf_rpc_deprecated.c nvmf_tgt.c
+LIBNAME = event_nvmf
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/nvmf/conf.c b/src/spdk/lib/event/subsystems/nvmf/conf.c
new file mode 100644
index 00000000..986e81c9
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/conf.c
@@ -0,0 +1,587 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "event_nvmf.h"
+
+#include "spdk/conf.h"
+#include "spdk/log.h"
+#include "spdk/bdev.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#define SPDK_NVMF_MAX_NAMESPACES (1 << 14)
+
+struct spdk_nvmf_tgt_opts *g_spdk_nvmf_tgt_opts = NULL;
+struct spdk_nvmf_tgt_conf *g_spdk_nvmf_tgt_conf = NULL;
+
+static int
+spdk_add_nvmf_discovery_subsystem(void)
+{
+	struct spdk_nvmf_subsystem *subsystem;
+
+	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN,
+					       SPDK_NVMF_SUBTYPE_DISCOVERY, 0);
+	if (subsystem == NULL) {
+		SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
+		return -1;
+	}
+
+	spdk_nvmf_subsystem_set_allow_any_host(subsystem, true);
+
+	return 0;
+}
+
+static void
+spdk_nvmf_read_config_file_tgt_opts(struct spdk_conf_section *sp,
+				    struct spdk_nvmf_tgt_opts *opts)
+{
+	int max_queue_depth;
+	int max_queues_per_sess;
+	int in_capsule_data_size;
+	int max_io_size;
+	int io_unit_size;
+
+	max_queue_depth = spdk_conf_section_get_intval(sp, "MaxQueueDepth");
+	if (max_queue_depth >= 0) {
+		opts->max_queue_depth = max_queue_depth;
+	}
+
+	max_queues_per_sess = spdk_conf_section_get_intval(sp, "MaxQueuesPerSession");
+	if (max_queues_per_sess >= 0) {
+		opts->max_qpairs_per_ctrlr = max_queues_per_sess;
+	}
+
+	in_capsule_data_size = spdk_conf_section_get_intval(sp, "InCapsuleDataSize");
+	if (in_capsule_data_size >= 0) {
+		opts->in_capsule_data_size = in_capsule_data_size;
+	}
+
+	max_io_size = spdk_conf_section_get_intval(sp, "MaxIOSize");
+	if (max_io_size >= 0) {
+		opts->max_io_size = max_io_size;
+	}
+
+	io_unit_size = spdk_conf_section_get_intval(sp, "IOUnitSize");
+	if (io_unit_size >= 0) {
+		opts->io_unit_size = io_unit_size;
+	}
+}
+
+static void
+spdk_nvmf_read_config_file_tgt_conf(struct spdk_conf_section *sp,
+				    struct spdk_nvmf_tgt_conf *conf)
+{
+	int acceptor_poll_rate;
+
+	acceptor_poll_rate = spdk_conf_section_get_intval(sp, "AcceptorPollRate");
+	if (acceptor_poll_rate >= 0) {
+		conf->acceptor_poll_rate = acceptor_poll_rate;
+	}
+}
+
+static struct spdk_nvmf_tgt_opts *
+spdk_nvmf_parse_tgt_opts(void)
+{
+	struct spdk_nvmf_tgt_opts *opts;
+	struct spdk_conf_section *sp;
+
+	opts = calloc(1, sizeof(*opts));
+	if (!opts) {
+		SPDK_ERRLOG("calloc() failed for target options\n");
+		return NULL;
+	}
+
+	spdk_nvmf_tgt_opts_init(opts);
+
+	sp = spdk_conf_find_section(NULL, "Nvmf");
+	if (sp != NULL) {
+		spdk_nvmf_read_config_file_tgt_opts(sp, opts);
+	}
+
+	return opts;
+}
+
+static struct spdk_nvmf_tgt_conf *
+spdk_nvmf_parse_tgt_conf(void)
+{
+	struct spdk_nvmf_tgt_conf *conf;
+	struct spdk_conf_section *sp;
+
+	conf = calloc(1, sizeof(*conf));
+	if (!conf) {
+		SPDK_ERRLOG("calloc() failed for target conf\n");
+		return NULL;
+	}
+
+	conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
+	conf->conn_sched = DEFAULT_CONN_SCHED;
+
+	sp = spdk_conf_find_section(NULL, "Nvmf");
+	if (sp != NULL) {
+		spdk_nvmf_read_config_file_tgt_conf(sp, conf);
+	}
+
+	return conf;
+}
+
+static int
+spdk_nvmf_parse_nvmf_tgt(void)
+{
+	int rc;
+
+	if (!g_spdk_nvmf_tgt_opts) {
+		g_spdk_nvmf_tgt_opts = spdk_nvmf_parse_tgt_opts();
+		if (!g_spdk_nvmf_tgt_opts) {
+			SPDK_ERRLOG("spdk_nvmf_parse_tgt_opts() failed\n");
+			return -1;
+		}
+	}
+
+	if (!g_spdk_nvmf_tgt_conf) {
+		g_spdk_nvmf_tgt_conf = spdk_nvmf_parse_tgt_conf();
+		if (!g_spdk_nvmf_tgt_conf) {
+			SPDK_ERRLOG("spdk_nvmf_parse_tgt_conf() failed\n");
+			return -1;
+		}
+	}
+
+	g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(g_spdk_nvmf_tgt_opts);
+
+	free(g_spdk_nvmf_tgt_opts);
+	g_spdk_nvmf_tgt_opts = NULL;
+
+	if (!g_spdk_nvmf_tgt) {
+		SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n");
+		return -1;
+	}
+
+	rc = spdk_add_nvmf_discovery_subsystem();
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_add_nvmf_discovery_subsystem failed\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+spdk_nvmf_tgt_listen_done(void *cb_arg, int status)
+{
+	/* TODO: Config parsing should wait for this operation to finish. */
+
+	if (status) {
+		SPDK_ERRLOG("Failed to listen on transport address\n");
+	}
+}
+
+static int
+spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
+{
+	const char *nqn, *mode;
+	size_t i;
+	int ret;
+	int lcore;
+	bool allow_any_host;
+	const char *sn;
+	struct spdk_nvmf_subsystem *subsystem;
+	int num_ns;
+
+	nqn = spdk_conf_section_get_val(sp, "NQN");
+	if (nqn == NULL) {
+		SPDK_ERRLOG("Subsystem missing NQN\n");
+		return -1;
+	}
+
+	mode = spdk_conf_section_get_val(sp, "Mode");
+	lcore = spdk_conf_section_get_intval(sp, "Core");
+	num_ns = spdk_conf_section_get_intval(sp, "MaxNamespaces");
+
+	if (num_ns < 1) {
+		num_ns = 0;
+	} else if (num_ns > SPDK_NVMF_MAX_NAMESPACES) {
+		num_ns = SPDK_NVMF_MAX_NAMESPACES;
+	}
+
+	/* Mode is no longer a valid parameter, but print out a nice
+	 * message if it exists to inform users.
+	 */
+	if (mode) {
+		SPDK_NOTICELOG("Mode present in the [Subsystem] section of the config file.\n"
+			       "Mode was removed as a valid parameter.\n");
+		if (strcasecmp(mode, "Virtual") == 0) {
+			SPDK_NOTICELOG("Your mode value is 'Virtual' which is now the only possible mode.\n"
+				       "Your configuration file will work as expected.\n");
+		} else {
+			SPDK_NOTICELOG("Please remove Mode from your configuration file.\n");
+			return -1;
+		}
+	}
+
+	/* Core is no longer a valid parameter, but print out a nice
+	 * message if it exists to inform users.
+	 */
+	if (lcore >= 0) {
+		SPDK_NOTICELOG("Core present in the [Subsystem] section of the config file.\n"
+			       "Core was removed as an option. Subsystems can now run on all available cores.\n");
+		SPDK_NOTICELOG("Please remove Core from your configuration file. Ignoring it and continuing.\n");
+	}
+
+	sn = spdk_conf_section_get_val(sp, "SN");
+	if (sn == NULL) {
+		SPDK_ERRLOG("Subsystem %s: missing serial number\n", nqn);
+		return -1;
+	}
+
+	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, nqn, SPDK_NVMF_SUBTYPE_NVME, num_ns);
+	if (subsystem == NULL) {
+		goto done;
+	}
+
+	if (spdk_nvmf_subsystem_set_sn(subsystem, sn)) {
+		SPDK_ERRLOG("Subsystem %s: invalid serial number '%s'\n", nqn, sn);
+		spdk_nvmf_subsystem_destroy(subsystem);
+		subsystem = NULL;
+		goto done;
+	}
+
+	for (i = 0; ; i++) {
+		struct spdk_nvmf_ns_opts ns_opts;
+		struct spdk_bdev *bdev;
+		const char *bdev_name;
+		const char *uuid_str;
+		char *nsid_str;
+
+		bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0);
+		if (!bdev_name) {
+			break;
+		}
+
+		bdev = spdk_bdev_get_by_name(bdev_name);
+		if (bdev == NULL) {
+			SPDK_ERRLOG("Could not find namespace bdev '%s'\n", bdev_name);
+			spdk_nvmf_subsystem_destroy(subsystem);
+			subsystem = NULL;
+			goto done;
+		}
+
+		spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts));
+
+		nsid_str = spdk_conf_section_get_nmval(sp, "Namespace", i, 1);
+		if (nsid_str) {
+			char *end;
+			unsigned long nsid_ul = strtoul(nsid_str, &end, 0);
+
+			if (*end != '\0' || nsid_ul == 0 || nsid_ul >= UINT32_MAX) {
+				SPDK_ERRLOG("Invalid NSID %s\n", nsid_str);
+				spdk_nvmf_subsystem_destroy(subsystem);
+				subsystem = NULL;
+				goto done;
+			}
+
+			ns_opts.nsid = (uint32_t)nsid_ul;
+		}
+
+		uuid_str = spdk_conf_section_get_nmval(sp, "Namespace", i, 2);
+		if (uuid_str) {
+			if (spdk_uuid_parse(&ns_opts.uuid, uuid_str)) {
+				SPDK_ERRLOG("Invalid UUID %s\n", uuid_str);
+				spdk_nvmf_subsystem_destroy(subsystem);
+				subsystem = NULL;
+				goto done;
+			}
+		}
+
+		if (spdk_nvmf_subsystem_add_ns(subsystem, bdev, &ns_opts, sizeof(ns_opts)) == 0) {
+			SPDK_ERRLOG("Unable to add namespace\n");
+			spdk_nvmf_subsystem_destroy(subsystem);
+			subsystem = NULL;
+			goto done;
+		}
+
+		SPDK_INFOLOG(SPDK_LOG_NVMF, "Attaching block device %s to subsystem %s\n",
+			     spdk_bdev_get_name(bdev), spdk_nvmf_subsystem_get_nqn(subsystem));
+	}
+
+	/* Parse Listen sections */
+	for (i = 0; ; i++) {
+		struct spdk_nvme_transport_id trid = {0};
+		const char *transport;
+		const char *address;
+		char *address_dup;
+		char *host;
+		char *port;
+
+		transport = spdk_conf_section_get_nmval(sp, "Listen", i, 0);
+		if (!transport) {
+			break;
+		}
+
+		if (spdk_nvme_transport_id_parse_trtype(&trid.trtype, transport)) {
+			SPDK_ERRLOG("Invalid listen address transport type '%s'\n", transport);
+			continue;
+		}
+
+		address = spdk_conf_section_get_nmval(sp, "Listen", i, 1);
+		if (!address) {
+			break;
+		}
+
+		address_dup = strdup(address);
+		if (!address_dup) {
+			break;
+		}
+
+		ret = spdk_parse_ip_addr(address_dup, &host, &port);
+		if (ret < 0) {
+			SPDK_ERRLOG("Unable to parse listen address '%s'\n", address);
+			free(address_dup);
+			continue;
+		}
+
+		if (strchr(host, ':')) {
+			trid.adrfam = SPDK_NVMF_ADRFAM_IPV6;
+		} else {
+			trid.adrfam = SPDK_NVMF_ADRFAM_IPV4;
+		}
+
+		snprintf(trid.traddr, sizeof(trid.traddr), "%s", host);
+		if (port) {
+			snprintf(trid.trsvcid, sizeof(trid.trsvcid), "%s", port);
+		}
+		free(address_dup);
+
+		spdk_nvmf_tgt_listen(g_spdk_nvmf_tgt, &trid, spdk_nvmf_tgt_listen_done, NULL);
+
+		spdk_nvmf_subsystem_add_listener(subsystem, &trid);
+	}
+
+	/* Parse Host sections */
+	for (i = 0; ; i++) {
+		const char *host = spdk_conf_section_get_nval(sp, "Host", i);
+
+		if (!host) {
+			break;
+		}
+
+		spdk_nvmf_subsystem_add_host(subsystem, host);
+	}
+
+	allow_any_host = spdk_conf_section_get_boolval(sp, "AllowAnyHost", false);
+	spdk_nvmf_subsystem_set_allow_any_host(subsystem, allow_any_host);
+
+done:
+	return (subsystem != NULL);
+}
+
+static int
+spdk_nvmf_parse_subsystems(void)
+{
+	int rc = 0;
+	struct spdk_conf_section *sp;
+
+	sp = spdk_conf_first_section(NULL);
+	while (sp != NULL) {
+		if (spdk_conf_section_match_prefix(sp, "Subsystem")) {
+			rc = spdk_nvmf_parse_subsystem(sp);
+			if (rc < 0) {
+				return -1;
+			}
+		}
+		sp = spdk_conf_next_section(sp);
+	}
+	return 0;
+}
+
+struct spdk_nvmf_parse_transport_ctx {
+	struct spdk_conf_section *sp;
+	spdk_nvmf_parse_conf_done_fn cb_fn;
+};
+
+static void spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx);
+
+static void
+spdk_nvmf_tgt_add_transport_done(void *cb_arg, int status)
+{
+	struct spdk_nvmf_parse_transport_ctx *ctx = cb_arg;
+	int rc;
+
+	if (status < 0) {
+		SPDK_ERRLOG("Add transport to target failed (%d).\n", status);
+		ctx->cb_fn(status);
+		free(ctx);
+		return;
+	}
+
+	/* find next transport */
+	ctx->sp = spdk_conf_next_section(ctx->sp);
+	while (ctx->sp) {
+		if (spdk_conf_section_match_prefix(ctx->sp, "Transport")) {
+			spdk_nvmf_parse_transport(ctx);
+			return;
+		}
+		ctx->sp = spdk_conf_next_section(ctx->sp);
+	}
+
+	/* done with transports, parse Subsystem sections */
+	rc = spdk_nvmf_parse_subsystems();
+
+	ctx->cb_fn(rc);
+	free(ctx);
+}
+
+static void
+spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
+{
+	const char *type;
+	struct spdk_nvmf_transport_opts opts = { 0 };
+	enum spdk_nvme_transport_type trtype;
+	struct spdk_nvmf_transport *transport;
+	int val;
+
+	type = spdk_conf_section_get_val(ctx->sp, "Type");
+	if (type == NULL) {
+		SPDK_ERRLOG("Transport missing Type\n");
+		ctx->cb_fn(-1);
+		free(ctx);
+		return;
+	}
+
+	if (spdk_nvme_transport_id_parse_trtype(&trtype, type)) {
+		SPDK_ERRLOG("Invalid transport type '%s'\n", type);
+		ctx->cb_fn(-1);
+		free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_tgt_get_transport(g_spdk_nvmf_tgt, trtype)) {
+		SPDK_ERRLOG("Duplicate transport type '%s'\n", type);
+		ctx->cb_fn(-1);
+		free(ctx);
+		return;
+	}
+
+	if (!spdk_nvmf_transport_opts_init(trtype, &opts)) {
+		ctx->cb_fn(-1);
+		free(ctx);
+		return;
+	}
+
+	val = spdk_conf_section_get_intval(ctx->sp, "MaxQueueDepth");
+	if (val >= 0) {
+		opts.max_queue_depth = val;
+	}
+	val = spdk_conf_section_get_intval(ctx->sp, "MaxQueuesPerSession");
+	if (val >= 0) {
+		opts.max_qpairs_per_ctrlr = val;
+	}
+	val = spdk_conf_section_get_intval(ctx->sp, "InCapsuleDataSize");
+	if (val >= 0) {
+		opts.in_capsule_data_size = val;
+	}
+	val = spdk_conf_section_get_intval(ctx->sp, "MaxIOSize");
+	if (val >= 0) {
+		opts.max_io_size = val;
+	}
+	val = spdk_conf_section_get_intval(ctx->sp, "IOUnitSize");
+	if (val >= 0) {
+		opts.io_unit_size = val;
+	}
+	val = spdk_conf_section_get_intval(ctx->sp, "MaxAQDepth");
+	if (val >= 0) {
+		opts.max_aq_depth = val;
+	}
+
+	transport = spdk_nvmf_transport_create(trtype, &opts);
+	if (transport) {
+		spdk_nvmf_tgt_add_transport(g_spdk_nvmf_tgt, transport, spdk_nvmf_tgt_add_transport_done, ctx);
+	} else {
+		ctx->cb_fn(-1);
+		free(ctx);
+		return;
+	}
+}
+
+static int
+spdk_nvmf_parse_transports(spdk_nvmf_parse_conf_done_fn cb_fn)
+{
+	struct spdk_nvmf_parse_transport_ctx *ctx;
+
+	ctx = calloc(1, sizeof(struct spdk_nvmf_parse_transport_ctx));
+	if (!ctx) {
+		SPDK_ERRLOG("Failed alloc of context memory for parse transports\n");
+		return -ENOMEM;
+	}
+
+	ctx->cb_fn = cb_fn;
+	ctx->sp = spdk_conf_first_section(NULL);
+	while (ctx->sp != NULL) {
+		if (spdk_conf_section_match_prefix(ctx->sp, "Transport")) {
+			spdk_nvmf_parse_transport(ctx);
+			return 0;
+		}
+		ctx->sp = spdk_conf_next_section(ctx->sp);
+	}
+
+	/* if we get here, there are no transports defined in conf file */
+	free(ctx);
+	cb_fn(spdk_nvmf_parse_subsystems());
+
+	return 0;
+}
+
+int
+spdk_nvmf_parse_conf(spdk_nvmf_parse_conf_done_fn cb_fn)
+{
+	int rc;
+
+	if (cb_fn == NULL) {
+		SPDK_ERRLOG("Callback function is NULL\n");
+		return -1;
+	}
+
+	/* NVMf section */
+	rc = spdk_nvmf_parse_nvmf_tgt();
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* Transport sections */
+	rc = spdk_nvmf_parse_transports(cb_fn);
+	if (rc < 0) {
+		return rc;
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/event/subsystems/nvmf/event_nvmf.h b/src/spdk/lib/event/subsystems/nvmf/event_nvmf.h
new file mode 100644
index 00000000..50e5d755
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/event_nvmf.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NVMF_TGT_H
+#define NVMF_TGT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nvmf.h"
+#include "spdk/queue.h"
+
+#include "spdk_internal/event.h"
+#include "spdk_internal/log.h"
+
+#define ACCEPT_TIMEOUT_US	10000 /* 10ms */
+#define DEFAULT_CONN_SCHED CONNECT_SCHED_ROUND_ROBIN
+
+enum spdk_nvmf_connect_sched {
+	CONNECT_SCHED_ROUND_ROBIN = 0,
+	CONNECT_SCHED_HOST_IP,
+};
+
+struct spdk_nvmf_tgt_conf {
+	uint32_t acceptor_poll_rate;
+	enum spdk_nvmf_connect_sched conn_sched;
+};
+
+extern struct spdk_nvmf_tgt_opts *g_spdk_nvmf_tgt_opts;
+extern struct spdk_nvmf_tgt_conf *g_spdk_nvmf_tgt_conf;
+
+extern struct spdk_nvmf_tgt *g_spdk_nvmf_tgt;
+
+typedef void (*spdk_nvmf_parse_conf_done_fn)(int status);
+
+int spdk_nvmf_parse_conf(spdk_nvmf_parse_conf_done_fn cb_fn);
+
+#endif
diff --git a/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc.c b/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc.c
new file mode 100644
index 00000000..e4114afe
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc.c
@@ -0,0 +1,1562 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "event_nvmf.h"
+
+#include "spdk/bdev.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+static int
+json_write_hex_str(struct spdk_json_write_ctx *w, const void *data, size_t size)
+{
+	static const char hex_char[16] = "0123456789ABCDEF";
+	const uint8_t *buf = data;
+	char *str, *out;
+	int rc;
+
+	str = malloc(size * 2 + 1);
+	if (str == NULL) {
+		return -1;
+	}
+
+	out = str;
+	while (size--) {
+		unsigned byte = *buf++;
+
+		out[0] = hex_char[(byte >> 4) & 0xF];
+		out[1] = hex_char[byte & 0xF];
+
+		out += 2;
+	}
+	*out = '\0';
+
+	rc = spdk_json_write_string(w, str);
+	free(str);
+
+	return rc;
+}
+
+static int
+hex_nybble_to_num(char c)
+{
+	if (c >= '0' && c <= '9') {
+		return c - '0';
+	}
+
+	if (c >= 'a' && c <= 'f') {
+		return c - 'a' + 0xA;
+	}
+
+	if (c >= 'A' && c <= 'F') {
+		return c - 'A' + 0xA;
+	}
+
+	return -1;
+}
+
+static int
+hex_byte_to_num(const char *str)
+{
+	int hi, lo;
+
+	hi = hex_nybble_to_num(str[0]);
+	if (hi < 0) {
+		return hi;
+	}
+
+	lo = hex_nybble_to_num(str[1]);
+	if (lo < 0) {
+		return lo;
+	}
+
+	return hi * 16 + lo;
+}
+
+static int
+decode_hex_string_be(const char *str, uint8_t *out, size_t size)
+{
+	size_t i;
+
+	/* Decode a string in "ABCDEF012345" format to its binary representation */
+	for (i = 0; i < size; i++) {
+		int num = hex_byte_to_num(str);
+
+		if (num < 0) {
+			/* Invalid hex byte or end of string */
+			return -1;
+		}
+
+		out[i] = (uint8_t)num;
+		str += 2;
+	}
+
+	if (i != size || *str != '\0') {
+		/* Length mismatch */
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+decode_ns_nguid(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		/* 16-byte NGUID */
+		rc = decode_hex_string_be(str, out, 16);
+	}
+
+	free(str);
+	return rc;
+}
+
+static int
+decode_ns_eui64(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		/* 8-byte EUI-64 */
+		rc = decode_hex_string_be(str, out, 8);
+	}
+
+	free(str);
+	return rc;
+}
+
+static int
+decode_ns_uuid(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		rc = spdk_uuid_parse(out, str);
+	}
+
+	free(str);
+	return rc;
+}
+
+static void
+dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct spdk_nvmf_subsystem *subsystem)
+{
+	struct spdk_nvmf_host		*host;
+	struct spdk_nvmf_listener	*listener;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "nqn");
+	spdk_json_write_string(w, spdk_nvmf_subsystem_get_nqn(subsystem));
+	spdk_json_write_name(w, "subtype");
+	if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
+		spdk_json_write_string(w, "NVMe");
+	} else {
+		spdk_json_write_string(w, "Discovery");
+	}
+
+	spdk_json_write_name(w, "listen_addresses");
+	spdk_json_write_array_begin(w);
+
+	for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+	     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+		const struct spdk_nvme_transport_id *trid;
+		const char *trtype;
+		const char *adrfam;
+
+		trid = spdk_nvmf_listener_get_trid(listener);
+
+		spdk_json_write_object_begin(w);
+		trtype = spdk_nvme_transport_id_trtype_str(trid->trtype);
+		if (trtype == NULL) {
+			trtype = "unknown";
+		}
+		adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+		if (adrfam == NULL) {
+			adrfam = "unknown";
+		}
+		/* NOTE: "transport" is kept for compatibility; new code should use "trtype" */
+		spdk_json_write_name(w, "transport");
+		spdk_json_write_string(w, trtype);
+		spdk_json_write_name(w, "trtype");
+		spdk_json_write_string(w, trtype);
+		spdk_json_write_name(w, "adrfam");
+		spdk_json_write_string(w, adrfam);
+		spdk_json_write_name(w, "traddr");
+		spdk_json_write_string(w, trid->traddr);
+		spdk_json_write_name(w, "trsvcid");
+		spdk_json_write_string(w, trid->trsvcid);
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_name(w, "allow_any_host");
+	spdk_json_write_bool(w, spdk_nvmf_subsystem_get_allow_any_host(subsystem));
+
+	spdk_json_write_name(w, "hosts");
+	spdk_json_write_array_begin(w);
+
+	for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
+	     host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
+		spdk_json_write_object_begin(w);
+		spdk_json_write_name(w, "nqn");
+		spdk_json_write_string(w, spdk_nvmf_host_get_nqn(host));
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
+		struct spdk_nvmf_ns *ns;
+		struct spdk_nvmf_ns_opts ns_opts;
+		uint32_t max_namespaces;
+
+		spdk_json_write_name(w, "serial_number");
+		spdk_json_write_string(w, spdk_nvmf_subsystem_get_sn(subsystem));
+
+		max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
+		if (max_namespaces != 0) {
+			spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
+		}
+
+		spdk_json_write_name(w, "namespaces");
+		spdk_json_write_array_begin(w);
+		for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+		     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+			spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
+			spdk_json_write_object_begin(w);
+			spdk_json_write_name(w, "nsid");
+			spdk_json_write_int32(w, spdk_nvmf_ns_get_id(ns));
+			spdk_json_write_name(w, "bdev_name");
+			spdk_json_write_string(w, spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+			/* NOTE: "name" is kept for compatibility only - new code should use bdev_name. */
+			spdk_json_write_name(w, "name");
+			spdk_json_write_string(w, spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+
+			if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
+				spdk_json_write_name(w, "nguid");
+				json_write_hex_str(w, ns_opts.nguid, sizeof(ns_opts.nguid));
+			}
+
+			if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
+				spdk_json_write_name(w, "eui64");
+				json_write_hex_str(w, ns_opts.eui64, sizeof(ns_opts.eui64));
+			}
+
+			if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
+				char uuid_str[SPDK_UUID_STRING_LEN];
+
+				spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
+				spdk_json_write_name(w, "uuid");
+				spdk_json_write_string(w, uuid_str);
+			}
+
+			spdk_json_write_object_end(w);
+		}
+		spdk_json_write_array_end(w);
+	}
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_rpc_get_nvmf_subsystems(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_nvmf_subsystems requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
+	while (subsystem) {
+		dump_nvmf_subsystem(w, subsystem);
+		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_nvmf_subsystems", spdk_rpc_get_nvmf_subsystems, SPDK_RPC_RUNTIME)
+
+struct rpc_subsystem_create {
+	char *nqn;
+	char *serial_number;
+	uint32_t max_namespaces;
+	bool allow_any_host;
+};
+
+static const struct spdk_json_object_decoder rpc_subsystem_create_decoders[] = {
+	{"nqn", offsetof(struct rpc_subsystem_create, nqn), spdk_json_decode_string},
+	{"serial_number", offsetof(struct rpc_subsystem_create, serial_number), spdk_json_decode_string, true},
+	{"max_namespaces", offsetof(struct rpc_subsystem_create, max_namespaces), spdk_json_decode_uint32, true},
+	{"allow_any_host", offsetof(struct rpc_subsystem_create, allow_any_host), spdk_json_decode_bool, true},
+};
+
+static void
+spdk_rpc_nvmf_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
+				void *cb_arg, int status)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_nvmf_subsystem_create(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_subsystem_create *req;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		goto invalid;
+	}
+
+	if (spdk_json_decode_object(params, rpc_subsystem_create_decoders,
+				    SPDK_COUNTOF(rpc_subsystem_create_decoders),
+				    req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, req->nqn, SPDK_NVMF_SUBTYPE_NVME,
+					       req->max_namespaces);
+	if (!subsystem) {
+		goto invalid;
+	}
+
+	if (req->serial_number) {
+		if (spdk_nvmf_subsystem_set_sn(subsystem, req->serial_number)) {
+			SPDK_ERRLOG("Subsystem %s: invalid serial number '%s'\n", req->nqn, req->serial_number);
+			goto invalid;
+		}
+	}
+
+	spdk_nvmf_subsystem_set_allow_any_host(subsystem, req->allow_any_host);
+
+	free(req->nqn);
+	free(req->serial_number);
+	free(req);
+
+	spdk_nvmf_subsystem_start(subsystem,
+				  spdk_rpc_nvmf_subsystem_started,
+				  request);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	if (req) {
+		free(req->nqn);
+		free(req->serial_number);
+	}
+	free(req);
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_create", spdk_rpc_nvmf_subsystem_create, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_subsystem {
+	char *nqn;
+};
+
+static void
+free_rpc_delete_subsystem(struct rpc_delete_subsystem *r)
+{
+	free(r->nqn);
+}
+
+static void
+spdk_rpc_nvmf_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
+				void *cb_arg, int status)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	spdk_nvmf_subsystem_destroy(subsystem);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_subsystem_decoders[] = {
+	{"nqn", offsetof(struct rpc_delete_subsystem, nqn), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_delete_nvmf_subsystem(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_delete_subsystem req = {};
+	struct spdk_nvmf_subsystem *subsystem;
+
+	if (spdk_json_decode_object(params, rpc_delete_subsystem_decoders,
+				    SPDK_COUNTOF(rpc_delete_subsystem_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.nqn == NULL) {
+		SPDK_ERRLOG("missing name param\n");
+		goto invalid;
+	}
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, req.nqn);
+	if (!subsystem) {
+		goto invalid;
+	}
+
+	free_rpc_delete_subsystem(&req);
+
+	spdk_nvmf_subsystem_stop(subsystem,
+				 spdk_rpc_nvmf_subsystem_stopped,
+				 request);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_delete_subsystem(&req);
+}
+SPDK_RPC_REGISTER("delete_nvmf_subsystem", spdk_rpc_delete_nvmf_subsystem, SPDK_RPC_RUNTIME)
+
+struct rpc_listen_address {
+	char *transport;
+	char *adrfam;
+	char *traddr;
+	char *trsvcid;
+};
+
+#define RPC_MAX_LISTEN_ADDRESSES 255
+#define RPC_MAX_NAMESPACES 255
+
+struct rpc_listen_addresses {
+	size_t num_listen_address;
+	struct rpc_listen_address addresses[RPC_MAX_LISTEN_ADDRESSES];
+};
+
+static const struct spdk_json_object_decoder rpc_listen_address_decoders[] = {
+	/* NOTE: "transport" is kept for compatibility; new code should use "trtype" */
+	{"transport", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+	{"trtype", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+	{"adrfam", offsetof(struct rpc_listen_address, adrfam), spdk_json_decode_string, true},
+	{"traddr", offsetof(struct rpc_listen_address, traddr), spdk_json_decode_string},
+	{"trsvcid", offsetof(struct rpc_listen_address, trsvcid), spdk_json_decode_string},
+};
+
+static int
+decode_rpc_listen_address(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_listen_address *req = (struct rpc_listen_address *)out;
+	if (spdk_json_decode_object(val, rpc_listen_address_decoders,
+				    SPDK_COUNTOF(rpc_listen_address_decoders),
+				    req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+free_rpc_listen_address(struct rpc_listen_address *r)
+{
+	free(r->transport);
+	free(r->adrfam);
+	free(r->traddr);
+	free(r->trsvcid);
+}
+
+enum nvmf_rpc_listen_op {
+	NVMF_RPC_LISTEN_ADD,
+	NVMF_RPC_LISTEN_REMOVE,
+};
+
+struct nvmf_rpc_listener_ctx {
+	char				*nqn;
+	struct spdk_nvmf_subsystem	*subsystem;
+	struct rpc_listen_address	address;
+
+	struct spdk_jsonrpc_request	*request;
+	struct spdk_nvme_transport_id	trid;
+	enum nvmf_rpc_listen_op		op;
+	bool				response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_listener_decoder[] = {
+	{"nqn", offsetof(struct nvmf_rpc_listener_ctx, nqn), spdk_json_decode_string},
+	{"listen_address", offsetof(struct nvmf_rpc_listener_ctx, address), decode_rpc_listen_address},
+};
+
+static void
+nvmf_rpc_listener_ctx_free(struct nvmf_rpc_listener_ctx *ctx)
+{
+	free(ctx->nqn);
+	free_rpc_listen_address(&ctx->address);
+	free(ctx);
+}
+
+static void
+nvmf_rpc_listen_resumed(struct spdk_nvmf_subsystem *subsystem,
+			void *cb_arg, int status)
+{
+	struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+	struct spdk_jsonrpc_request *request;
+	struct spdk_json_write_ctx *w;
+
+	request = ctx->request;
+	if (ctx->response_sent) {
+		/* If an error occurred, the response has already been sent. */
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	nvmf_rpc_listener_ctx_free(ctx);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_tgt_listen(void *cb_arg, int status)
+{
+	struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+
+	if (status) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+	} else {
+		if (spdk_nvmf_subsystem_add_listener(ctx->subsystem, &ctx->trid)) {
+			spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			ctx->response_sent = true;
+		}
+	}
+
+	if (spdk_nvmf_subsystem_resume(ctx->subsystem, nvmf_rpc_listen_resumed, ctx)) {
+		if (!ctx->response_sent) {
+			spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		}
+		nvmf_rpc_listener_ctx_free(ctx);
+		/* Can't really do anything to recover here - subsystem will remain paused. */
+	}
+}
+
+static void
+nvmf_rpc_listen_paused(struct spdk_nvmf_subsystem *subsystem,
+		       void *cb_arg, int status)
+{
+	struct nvmf_rpc_listener_ctx *ctx = cb_arg;
+
+	if (ctx->op == NVMF_RPC_LISTEN_ADD) {
+		spdk_nvmf_tgt_listen(g_spdk_nvmf_tgt, &ctx->trid, nvmf_rpc_tgt_listen, ctx);
+		return;
+	} else if (ctx->op == NVMF_RPC_LISTEN_REMOVE) {
+		if (spdk_nvmf_subsystem_remove_listener(subsystem, &ctx->trid)) {
+			SPDK_ERRLOG("Unable to remove listener.\n");
+			spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			ctx->response_sent = true;
+		}
+	} else {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+	}
+
+	if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_listen_resumed, ctx)) {
+		if (!ctx->response_sent) {
+			spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		}
+		nvmf_rpc_listener_ctx_free(ctx);
+		/* Can't really do anything to recover here - subsystem will remain paused. */
+	}
+}
+
+static int
+rpc_listen_address_to_trid(const struct rpc_listen_address *address,
+			   struct spdk_nvme_transport_id *trid)
+{
+	size_t len;
+
+	memset(trid, 0, sizeof(*trid));
+
+	if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, address->transport)) {
+		SPDK_ERRLOG("Invalid transport type: %s\n", address->transport);
+		return -EINVAL;
+	}
+
+	if (address->adrfam) {
+		if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, address->adrfam)) {
+			SPDK_ERRLOG("Invalid adrfam: %s\n", address->adrfam);
+			return -EINVAL;
+		}
+	} else {
+		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+	}
+
+	len = strlen(address->traddr);
+	if (len > sizeof(trid->traddr) - 1) {
+		SPDK_ERRLOG("Transport address longer than %zu characters: %s\n",
+			    sizeof(trid->traddr) - 1, address->traddr);
+		return -EINVAL;
+	}
+	memcpy(trid->traddr, address->traddr, len + 1);
+
+	len = strlen(address->trsvcid);
+	if (len > sizeof(trid->trsvcid) - 1) {
+		SPDK_ERRLOG("Transport service id longer than %zu characters: %s\n",
+			    sizeof(trid->trsvcid) - 1, address->trsvcid);
+		return -EINVAL;
+	}
+	memcpy(trid->trsvcid, address->trsvcid, len + 1);
+
+	return 0;
+}
+
+static void
+nvmf_rpc_subsystem_add_listener(struct spdk_jsonrpc_request *request,
+				const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_listener_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	ctx->request = request;
+
+	if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_listener_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	ctx->subsystem = subsystem;
+
+	if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	ctx->op = NVMF_RPC_LISTEN_ADD;
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_listener", nvmf_rpc_subsystem_add_listener, SPDK_RPC_RUNTIME);
+
+static void
+nvmf_rpc_subsystem_remove_listener(struct spdk_jsonrpc_request *request,
+				   const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_listener_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	ctx->request = request;
+
+	if (spdk_json_decode_object(params, nvmf_rpc_listener_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_listener_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	ctx->subsystem = subsystem;
+
+	if (rpc_listen_address_to_trid(&ctx->address, &ctx->trid)) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+	ctx->op = NVMF_RPC_LISTEN_REMOVE;
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_listener_ctx_free(ctx);
+		return;
+	}
+
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_listener", nvmf_rpc_subsystem_remove_listener,
+		  SPDK_RPC_RUNTIME);
+
+struct spdk_nvmf_ns_params {
+	char *bdev_name;
+	uint32_t nsid;
+	char nguid[16];
+	char eui64[8];
+	struct spdk_uuid uuid;
+};
+
+struct rpc_namespaces {
+	size_t num_ns;
+	struct spdk_nvmf_ns_params ns_params[RPC_MAX_NAMESPACES];
+};
+
+
+static const struct spdk_json_object_decoder rpc_ns_params_decoders[] = {
+	{"nsid", offsetof(struct spdk_nvmf_ns_params, nsid), spdk_json_decode_uint32, true},
+	{"bdev_name", offsetof(struct spdk_nvmf_ns_params, bdev_name), spdk_json_decode_string},
+	{"nguid", offsetof(struct spdk_nvmf_ns_params, nguid), decode_ns_nguid, true},
+	{"eui64", offsetof(struct spdk_nvmf_ns_params, eui64), decode_ns_eui64, true},
+	{"uuid", offsetof(struct spdk_nvmf_ns_params, uuid), decode_ns_uuid, true},
+};
+
+static int
+decode_rpc_ns_params(const struct spdk_json_val *val, void *out)
+{
+	struct spdk_nvmf_ns_params *ns_params = out;
+
+	return spdk_json_decode_object(val, rpc_ns_params_decoders,
+				       SPDK_COUNTOF(rpc_ns_params_decoders),
+				       ns_params);
+}
+
+struct nvmf_rpc_ns_ctx {
+	char *nqn;
+	struct spdk_nvmf_ns_params ns_params;
+
+	struct spdk_jsonrpc_request *request;
+	bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_ns_decoder[] = {
+	{"nqn", offsetof(struct nvmf_rpc_ns_ctx, nqn), spdk_json_decode_string},
+	{"namespace", offsetof(struct nvmf_rpc_ns_ctx, ns_params), decode_rpc_ns_params},
+};
+
+static void
+nvmf_rpc_ns_ctx_free(struct nvmf_rpc_ns_ctx *ctx)
+{
+	free(ctx->nqn);
+	free(ctx->ns_params.bdev_name);
+	free(ctx);
+}
+
+static void
+nvmf_rpc_ns_resumed(struct spdk_nvmf_subsystem *subsystem,
+		    void *cb_arg, int status)
+{
+	struct nvmf_rpc_ns_ctx *ctx = cb_arg;
+	struct spdk_jsonrpc_request *request = ctx->request;
+	uint32_t nsid = ctx->ns_params.nsid;
+	bool response_sent = ctx->response_sent;
+	struct spdk_json_write_ctx *w;
+
+	nvmf_rpc_ns_ctx_free(ctx);
+
+	if (response_sent) {
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_uint32(w, nsid);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_ns_paused(struct spdk_nvmf_subsystem *subsystem,
+		   void *cb_arg, int status)
+{
+	struct nvmf_rpc_ns_ctx *ctx = cb_arg;
+	struct spdk_nvmf_ns_opts ns_opts;
+	struct spdk_bdev *bdev;
+
+	bdev = spdk_bdev_get_by_name(ctx->ns_params.bdev_name);
+	if (!bdev) {
+		SPDK_ERRLOG("No bdev with name %s\n", ctx->ns_params.bdev_name);
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+		goto resume;
+	}
+
+	spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts));
+	ns_opts.nsid = ctx->ns_params.nsid;
+
+	SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(ctx->ns_params.nguid), "size mismatch");
+	memcpy(ns_opts.nguid, ctx->ns_params.nguid, sizeof(ns_opts.nguid));
+
+	SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(ctx->ns_params.eui64), "size mismatch");
+	memcpy(ns_opts.eui64, ctx->ns_params.eui64, sizeof(ns_opts.eui64));
+
+	if (!spdk_mem_all_zero(&ctx->ns_params.uuid, sizeof(ctx->ns_params.uuid))) {
+		ns_opts.uuid = ctx->ns_params.uuid;
+	}
+
+	ctx->ns_params.nsid = spdk_nvmf_subsystem_add_ns(subsystem, bdev, &ns_opts, sizeof(ns_opts));
+	if (ctx->ns_params.nsid == 0) {
+		SPDK_ERRLOG("Unable to add namespace\n");
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+		goto resume;
+	}
+
+resume:
+	if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_ns_resumed, ctx)) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_ns_ctx_free(ctx);
+		return;
+	}
+}
+
+static void
+nvmf_rpc_subsystem_add_ns(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_ns_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	if (spdk_json_decode_object(params, nvmf_rpc_subsystem_ns_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_subsystem_ns_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_ns_ctx_free(ctx);
+		return;
+	}
+
+	ctx->request = request;
+	ctx->response_sent = false;
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_ns_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_ns_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_ns_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_ns", nvmf_rpc_subsystem_add_ns, SPDK_RPC_RUNTIME)
+
+struct nvmf_rpc_remove_ns_ctx {
+	char *nqn;
+	uint32_t nsid;
+
+	struct spdk_jsonrpc_request *request;
+	bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_remove_ns_decoder[] = {
+	{"nqn", offsetof(struct nvmf_rpc_remove_ns_ctx, nqn), spdk_json_decode_string},
+	{"nsid", offsetof(struct nvmf_rpc_remove_ns_ctx, nsid), spdk_json_decode_uint32},
+};
+
+static void
+nvmf_rpc_remove_ns_ctx_free(struct nvmf_rpc_remove_ns_ctx *ctx)
+{
+	free(ctx->nqn);
+	free(ctx);
+}
+
+static void
+nvmf_rpc_remove_ns_resumed(struct spdk_nvmf_subsystem *subsystem,
+			   void *cb_arg, int status)
+{
+	struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg;
+	struct spdk_jsonrpc_request *request = ctx->request;
+	bool response_sent = ctx->response_sent;
+	struct spdk_json_write_ctx *w;
+
+	nvmf_rpc_remove_ns_ctx_free(ctx);
+
+	if (response_sent) {
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_remove_ns_remove_done(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+	struct nvmf_rpc_remove_ns_ctx *ctx;
+
+	ctx = cb_arg;
+
+	if (status != 0) {
+		SPDK_ERRLOG("Unable to remove namespace ID %u\n", ctx->nsid);
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+	}
+
+	if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_remove_ns_resumed, ctx)) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_remove_ns_ctx_free(ctx);
+		return;
+	}
+}
+
+static void
+nvmf_rpc_remove_ns_paused(struct spdk_nvmf_subsystem *subsystem,
+			  void *cb_arg, int status)
+{
+	struct nvmf_rpc_remove_ns_ctx *ctx = cb_arg;
+	int ret;
+
+	ret = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid, nvmf_rpc_remove_ns_remove_done, ctx);
+	if (ret < 0) {
+		SPDK_ERRLOG("Unable to remove namespace ID %u\n", ctx->nsid);
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		ctx->response_sent = true;
+		spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_remove_ns_resumed, ctx);
+	}
+}
+
+static void
+nvmf_rpc_subsystem_remove_ns(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_remove_ns_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	if (spdk_json_decode_object(params, nvmf_rpc_subsystem_remove_ns_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_subsystem_remove_ns_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_remove_ns_ctx_free(ctx);
+		return;
+	}
+
+	ctx->request = request;
+	ctx->response_sent = false;
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_remove_ns_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_remove_ns_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_remove_ns_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_ns", nvmf_rpc_subsystem_remove_ns, SPDK_RPC_RUNTIME)
+
+enum nvmf_rpc_host_op {
+	NVMF_RPC_HOST_ADD,
+	NVMF_RPC_HOST_REMOVE,
+	NVMF_RPC_HOST_ALLOW_ANY,
+};
+
+struct nvmf_rpc_host_ctx {
+	struct spdk_jsonrpc_request *request;
+
+	char *nqn;
+	char *host;
+
+	enum nvmf_rpc_host_op op;
+
+	bool allow_any_host;
+
+	bool response_sent;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_host_decoder[] = {
+	{"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string},
+	{"host", offsetof(struct nvmf_rpc_host_ctx, host), spdk_json_decode_string},
+};
+
+static void
+nvmf_rpc_host_ctx_free(struct nvmf_rpc_host_ctx *ctx)
+{
+	free(ctx->nqn);
+	free(ctx->host);
+	free(ctx);
+}
+
+static void
+nvmf_rpc_host_resumed(struct spdk_nvmf_subsystem *subsystem,
+		      void *cb_arg, int status)
+{
+	struct nvmf_rpc_host_ctx *ctx = cb_arg;
+	struct spdk_jsonrpc_request *request;
+	struct spdk_json_write_ctx *w;
+	bool response_sent = ctx->response_sent;
+
+	request = ctx->request;
+	nvmf_rpc_host_ctx_free(ctx);
+
+	if (response_sent) {
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_host_paused(struct spdk_nvmf_subsystem *subsystem,
+		     void *cb_arg, int status)
+{
+	struct nvmf_rpc_host_ctx *ctx = cb_arg;
+	int rc = -1;
+
+	switch (ctx->op) {
+	case NVMF_RPC_HOST_ADD:
+		rc = spdk_nvmf_subsystem_add_host(subsystem, ctx->host);
+		break;
+	case NVMF_RPC_HOST_REMOVE:
+		rc = spdk_nvmf_subsystem_remove_host(subsystem, ctx->host);
+		break;
+	case NVMF_RPC_HOST_ALLOW_ANY:
+		rc = spdk_nvmf_subsystem_set_allow_any_host(subsystem, ctx->allow_any_host);
+		break;
+	}
+
+	if (rc != 0) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		ctx->response_sent = true;
+	}
+
+	if (spdk_nvmf_subsystem_resume(subsystem, nvmf_rpc_host_resumed, ctx)) {
+		if (!ctx->response_sent) {
+			spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		}
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+}
+
+static void
+nvmf_rpc_subsystem_add_host(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_host_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	ctx->request = request;
+	ctx->op = NVMF_RPC_HOST_ADD;
+	ctx->response_sent = false;
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_add_host", nvmf_rpc_subsystem_add_host, SPDK_RPC_RUNTIME)
+
+static void
+nvmf_rpc_subsystem_remove_host(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_host_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	if (spdk_json_decode_object(params, nvmf_rpc_subsystem_host_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_subsystem_host_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	ctx->request = request;
+	ctx->op = NVMF_RPC_HOST_REMOVE;
+	ctx->response_sent = false;
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_remove_host", nvmf_rpc_subsystem_remove_host, SPDK_RPC_RUNTIME)
+
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_any_host_decoder[] = {
+	{"nqn", offsetof(struct nvmf_rpc_host_ctx, nqn), spdk_json_decode_string},
+	{"allow_any_host", offsetof(struct nvmf_rpc_host_ctx, allow_any_host), spdk_json_decode_bool},
+};
+
+static void
+nvmf_rpc_subsystem_allow_any_host(struct spdk_jsonrpc_request *request,
+				  const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_host_ctx *ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	if (spdk_json_decode_object(params, nvmf_rpc_subsystem_any_host_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_subsystem_any_host_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	ctx->request = request;
+	ctx->op = NVMF_RPC_HOST_ALLOW_ANY;
+	ctx->response_sent = false;
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(g_spdk_nvmf_tgt, ctx->nqn);
+	if (!subsystem) {
+		SPDK_ERRLOG("Unable to find subsystem with NQN %s\n", ctx->nqn);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+		nvmf_rpc_host_ctx_free(ctx);
+		return;
+	}
+}
+SPDK_RPC_REGISTER("nvmf_subsystem_allow_any_host", nvmf_rpc_subsystem_allow_any_host,
+		  SPDK_RPC_RUNTIME)
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_tgt_opts_decoder[] = {
+	{"max_queue_depth", offsetof(struct spdk_nvmf_tgt_opts, max_queue_depth), spdk_json_decode_uint16, true},
+	{"max_qpairs_per_ctrlr", offsetof(struct spdk_nvmf_tgt_opts, max_qpairs_per_ctrlr), spdk_json_decode_uint16, true},
+	{"in_capsule_data_size", offsetof(struct spdk_nvmf_tgt_opts, in_capsule_data_size), spdk_json_decode_uint32, true},
+	{"max_io_size", offsetof(struct spdk_nvmf_tgt_opts, max_io_size), spdk_json_decode_uint32, true},
+	{"max_subsystems", offsetof(struct spdk_nvmf_tgt_opts, max_subsystems), spdk_json_decode_uint32, true},
+	{"io_unit_size", offsetof(struct spdk_nvmf_tgt_opts, io_unit_size), spdk_json_decode_uint32, true},
+};
+
+static void
+nvmf_rpc_subsystem_set_tgt_opts(struct spdk_jsonrpc_request *request,
+				const struct spdk_json_val *params)
+{
+	struct spdk_nvmf_tgt_opts *opts;
+	struct spdk_json_write_ctx *w;
+
+	if (g_spdk_nvmf_tgt_opts != NULL) {
+		SPDK_ERRLOG("this RPC must not be called more than once.\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Must not call more than once");
+		return;
+	}
+
+	opts = calloc(1, sizeof(*opts));
+	if (opts == NULL) {
+		SPDK_ERRLOG("malloc() failed for target options\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Out of memory");
+		return;
+	}
+
+	spdk_nvmf_tgt_opts_init(opts);
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, nvmf_rpc_subsystem_tgt_opts_decoder,
+					    SPDK_COUNTOF(nvmf_rpc_subsystem_tgt_opts_decoder), opts)) {
+			free(opts);
+			SPDK_ERRLOG("spdk_json_decode_object() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+	}
+
+	g_spdk_nvmf_tgt_opts = opts;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_nvmf_target_options", nvmf_rpc_subsystem_set_tgt_opts, SPDK_RPC_STARTUP)
+
+static int decode_conn_sched(const struct spdk_json_val *val, void *out)
+{
+	enum spdk_nvmf_connect_sched *sched = out;
+
+	if (spdk_json_strequal(val, "roundrobin") == true) {
+		*sched = CONNECT_SCHED_ROUND_ROBIN;
+	} else if (spdk_json_strequal(val, "hostip") == true) {
+		*sched = CONNECT_SCHED_HOST_IP;
+	} else {
+		SPDK_ERRLOG("Invalid connection scheduling parameter\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct spdk_json_object_decoder nvmf_rpc_subsystem_tgt_conf_decoder[] = {
+	{"acceptor_poll_rate", offsetof(struct spdk_nvmf_tgt_conf, acceptor_poll_rate), spdk_json_decode_uint32, true},
+	{"conn_sched", offsetof(struct spdk_nvmf_tgt_conf, conn_sched), decode_conn_sched, true},
+};
+
+static void
+nvmf_rpc_subsystem_set_tgt_conf(struct spdk_jsonrpc_request *request,
+				const struct spdk_json_val *params)
+{
+	struct spdk_nvmf_tgt_conf *conf;
+	struct spdk_json_write_ctx *w;
+
+	if (g_spdk_nvmf_tgt_conf != NULL) {
+		SPDK_ERRLOG("this RPC must not be called more than once.\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Must not call more than once");
+		return;
+	}
+
+	conf = calloc(1, sizeof(*conf));
+	if (conf == NULL) {
+		SPDK_ERRLOG("calloc() failed for target config\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 "Out of memory");
+		return;
+	}
+
+	conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
+	conf->conn_sched = DEFAULT_CONN_SCHED;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, nvmf_rpc_subsystem_tgt_conf_decoder,
+					    SPDK_COUNTOF(nvmf_rpc_subsystem_tgt_conf_decoder), conf)) {
+			free(conf);
+			SPDK_ERRLOG("spdk_json_decode_object() failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+	}
+
+	g_spdk_nvmf_tgt_conf = conf;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_nvmf_target_config", nvmf_rpc_subsystem_set_tgt_conf, SPDK_RPC_STARTUP)
+
+struct nvmf_rpc_create_transport_ctx {
+	char				*trtype;
+	struct spdk_nvmf_transport_opts opts;
+	struct spdk_jsonrpc_request	*request;
+};
+
+static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[] = {
+	{	"trtype", offsetof(struct nvmf_rpc_create_transport_ctx, trtype), spdk_json_decode_string},
+	{
+		"max_queue_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_queue_depth),
+		spdk_json_decode_uint16, true
+	},
+	{
+		"max_qpairs_per_ctrlr", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_qpairs_per_ctrlr),
+		spdk_json_decode_uint16, true
+	},
+	{
+		"in_capsule_data_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.in_capsule_data_size),
+		spdk_json_decode_uint32, true
+	},
+	{
+		"max_io_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_io_size),
+		spdk_json_decode_uint32, true
+	},
+	{
+		"io_unit_size", offsetof(struct nvmf_rpc_create_transport_ctx, opts.io_unit_size),
+		spdk_json_decode_uint32, true
+	},
+	{
+		"max_aq_depth", offsetof(struct nvmf_rpc_create_transport_ctx, opts.max_aq_depth),
+		spdk_json_decode_uint32, true
+	},
+};
+
+static void
+nvmf_rpc_create_transport_ctx_free(struct nvmf_rpc_create_transport_ctx *ctx)
+{
+	free(ctx->trtype);
+	free(ctx);
+}
+
+static void
+nvmf_rpc_tgt_add_transport_done(void *cb_arg, int status)
+{
+	struct nvmf_rpc_create_transport_ctx *ctx = cb_arg;
+	struct spdk_jsonrpc_request *request;
+	struct spdk_json_write_ctx *w;
+
+	request = ctx->request;
+	nvmf_rpc_create_transport_ctx_free(ctx);
+
+	if (status) {
+		SPDK_ERRLOG("Failed to add transport to tgt.(%d)\n", status);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Failed to add transport to tgt.(%d)\n",
+						     status);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+nvmf_rpc_create_transport(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct nvmf_rpc_create_transport_ctx *ctx;
+	enum spdk_nvme_transport_type trtype;
+	struct spdk_nvmf_transport *transport;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		return;
+	}
+
+	/* Decode parameters the first time to get the transport type */
+	if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_create_transport_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_create_transport_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvme_transport_id_parse_trtype(&trtype, ctx->trtype)) {
+		SPDK_ERRLOG("Invalid transport type '%s'\n", ctx->trtype);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Invalid transport type '%s'\n", ctx->trtype);
+		nvmf_rpc_create_transport_ctx_free(ctx);
+		return;
+	}
+
+	/* Initialize all the transport options (based on transport type) and decode the
+	 * parameters again to update any options passed in rpc create transport call.
+	 */
+	spdk_nvmf_transport_opts_init(trtype, &ctx->opts);
+	if (spdk_json_decode_object(params, nvmf_rpc_create_transport_decoder,
+				    SPDK_COUNTOF(nvmf_rpc_create_transport_decoder),
+				    ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+		nvmf_rpc_create_transport_ctx_free(ctx);
+		return;
+	}
+
+	if (spdk_nvmf_tgt_get_transport(g_spdk_nvmf_tgt, trtype)) {
+		SPDK_ERRLOG("Transport type '%s' already exists\n", ctx->trtype);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Transport type '%s' already exists\n", ctx->trtype);
+		nvmf_rpc_create_transport_ctx_free(ctx);
+		return;
+	}
+
+	transport = spdk_nvmf_transport_create(trtype, &ctx->opts);
+
+	if (!transport) {
+		SPDK_ERRLOG("Transport type '%s' create failed\n", ctx->trtype);
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						     "Transport type '%s' create failed\n", ctx->trtype);
+		nvmf_rpc_create_transport_ctx_free(ctx);
+		return;
+	}
+
+	/* add transport to target */
+	ctx->request = request;
+	spdk_nvmf_tgt_add_transport(g_spdk_nvmf_tgt, transport, nvmf_rpc_tgt_add_transport_done, ctx);
+}
+
+SPDK_RPC_REGISTER("nvmf_create_transport", nvmf_rpc_create_transport, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc_deprecated.c b/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc_deprecated.c
new file mode 100644
index 00000000..30e5d04c
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/nvmf_rpc_deprecated.c
@@ -0,0 +1,620 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "event_nvmf.h"
+
+#include "spdk/bdev.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+static int
+hex_nybble_to_num(char c)
+{
+	if (c >= '0' && c <= '9') {
+		return c - '0';
+	}
+
+	if (c >= 'a' && c <= 'f') {
+		return c - 'a' + 0xA;
+	}
+
+	if (c >= 'A' && c <= 'F') {
+		return c - 'A' + 0xA;
+	}
+
+	return -1;
+}
+
+static int
+hex_byte_to_num(const char *str)
+{
+	int hi, lo;
+
+	hi = hex_nybble_to_num(str[0]);
+	if (hi < 0) {
+		return hi;
+	}
+
+	lo = hex_nybble_to_num(str[1]);
+	if (lo < 0) {
+		return lo;
+	}
+
+	return hi * 16 + lo;
+}
+
+static int
+decode_hex_string_be(const char *str, uint8_t *out, size_t size)
+{
+	size_t i;
+
+	/* Decode a string in "ABCDEF012345" format to its binary representation */
+	for (i = 0; i < size; i++) {
+		int num = hex_byte_to_num(str);
+
+		if (num < 0) {
+			/* Invalid hex byte or end of string */
+			return -1;
+		}
+
+		out[i] = (uint8_t)num;
+		str += 2;
+	}
+
+	if (i != size || *str != '\0') {
+		/* Length mismatch */
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+decode_ns_nguid(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		/* 16-byte NGUID */
+		rc = decode_hex_string_be(str, out, 16);
+	}
+
+	free(str);
+	return rc;
+}
+
+static int
+decode_ns_eui64(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		/* 8-byte EUI-64 */
+		rc = decode_hex_string_be(str, out, 8);
+	}
+
+	free(str);
+	return rc;
+}
+
+static int
+decode_ns_uuid(const struct spdk_json_val *val, void *out)
+{
+	char *str = NULL;
+	int rc;
+
+	rc = spdk_json_decode_string(val, &str);
+	if (rc == 0) {
+		rc = spdk_uuid_parse(out, str);
+	}
+
+	free(str);
+	return rc;
+}
+
+struct rpc_listen_address {
+	char *transport;
+	char *adrfam;
+	char *traddr;
+	char *trsvcid;
+};
+
+#define RPC_MAX_LISTEN_ADDRESSES 255
+#define RPC_MAX_HOSTS 255
+#define RPC_MAX_NAMESPACES 255
+
+struct rpc_listen_addresses {
+	size_t num_listen_address;
+	struct rpc_listen_address addresses[RPC_MAX_LISTEN_ADDRESSES];
+};
+
+static const struct spdk_json_object_decoder rpc_listen_address_decoders[] = {
+	/* NOTE: "transport" is kept for compatibility; new code should use "trtype" */
+	{"transport", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+	{"trtype", offsetof(struct rpc_listen_address, transport), spdk_json_decode_string, true},
+	{"adrfam", offsetof(struct rpc_listen_address, adrfam), spdk_json_decode_string, true},
+	{"traddr", offsetof(struct rpc_listen_address, traddr), spdk_json_decode_string},
+	{"trsvcid", offsetof(struct rpc_listen_address, trsvcid), spdk_json_decode_string},
+};
+
+static int
+decode_rpc_listen_address(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_listen_address *req = (struct rpc_listen_address *)out;
+	if (spdk_json_decode_object(val, rpc_listen_address_decoders,
+				    SPDK_COUNTOF(rpc_listen_address_decoders),
+				    req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+free_rpc_listen_address(struct rpc_listen_address *r)
+{
+	free(r->transport);
+	free(r->adrfam);
+	free(r->traddr);
+	free(r->trsvcid);
+}
+
+static int
+rpc_listen_address_to_trid(const struct rpc_listen_address *address,
+			   struct spdk_nvme_transport_id *trid)
+{
+	size_t len;
+
+	memset(trid, 0, sizeof(*trid));
+
+	if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, address->transport)) {
+		SPDK_ERRLOG("Invalid transport type: %s\n", address->transport);
+		return -EINVAL;
+	}
+
+	if (address->adrfam) {
+		if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, address->adrfam)) {
+			SPDK_ERRLOG("Invalid adrfam: %s\n", address->adrfam);
+			return -EINVAL;
+		}
+	} else {
+		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+	}
+
+	len = strlen(address->traddr);
+	if (len > sizeof(trid->traddr) - 1) {
+		SPDK_ERRLOG("Transport address longer than %zu characters: %s\n",
+			    sizeof(trid->traddr) - 1, address->traddr);
+		return -EINVAL;
+	}
+	memcpy(trid->traddr, address->traddr, len + 1);
+
+	len = strlen(address->trsvcid);
+	if (len > sizeof(trid->trsvcid) - 1) {
+		SPDK_ERRLOG("Transport service id longer than %zu characters: %s\n",
+			    sizeof(trid->trsvcid) - 1, address->trsvcid);
+		return -EINVAL;
+	}
+	memcpy(trid->trsvcid, address->trsvcid, len + 1);
+
+	return 0;
+}
+
+static int
+decode_rpc_listen_addresses(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_listen_addresses *listen_addresses = out;
+	return spdk_json_decode_array(val, decode_rpc_listen_address, &listen_addresses->addresses,
+				      RPC_MAX_LISTEN_ADDRESSES,
+				      &listen_addresses->num_listen_address, sizeof(struct rpc_listen_address));
+}
+
+struct rpc_hosts {
+	size_t num_hosts;
+	char *hosts[RPC_MAX_HOSTS];
+};
+
+static int
+decode_rpc_hosts(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_hosts *rpc_hosts = out;
+
+	return spdk_json_decode_array(val, spdk_json_decode_string, rpc_hosts->hosts, RPC_MAX_HOSTS,
+				      &rpc_hosts->num_hosts, sizeof(char *));
+}
+
+
+struct spdk_nvmf_ns_params {
+	char *bdev_name;
+	uint32_t nsid;
+	char nguid[16];
+	char eui64[8];
+	struct spdk_uuid uuid;
+};
+
+struct rpc_namespaces {
+	size_t num_ns;
+	struct spdk_nvmf_ns_params ns_params[RPC_MAX_NAMESPACES];
+};
+
+
+static const struct spdk_json_object_decoder rpc_ns_params_decoders[] = {
+	{"nsid", offsetof(struct spdk_nvmf_ns_params, nsid), spdk_json_decode_uint32, true},
+	{"bdev_name", offsetof(struct spdk_nvmf_ns_params, bdev_name), spdk_json_decode_string},
+	{"nguid", offsetof(struct spdk_nvmf_ns_params, nguid), decode_ns_nguid, true},
+	{"eui64", offsetof(struct spdk_nvmf_ns_params, eui64), decode_ns_eui64, true},
+	{"uuid", offsetof(struct spdk_nvmf_ns_params, uuid), decode_ns_uuid, true},
+};
+
+static void
+free_rpc_ns_params(struct spdk_nvmf_ns_params *ns_params)
+{
+	free(ns_params->bdev_name);
+}
+
+static void
+free_rpc_namespaces(struct rpc_namespaces *r)
+{
+	size_t i;
+
+	for (i = 0; i < r->num_ns; i++) {
+		free_rpc_ns_params(&r->ns_params[i]);
+	}
+}
+
+static int
+decode_rpc_ns_params(const struct spdk_json_val *val, void *out)
+{
+	struct spdk_nvmf_ns_params *ns_params = out;
+
+	return spdk_json_decode_object(val, rpc_ns_params_decoders,
+				       SPDK_COUNTOF(rpc_ns_params_decoders),
+				       ns_params);
+}
+
+static int
+decode_rpc_namespaces(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_namespaces *namespaces = out;
+	char *names[RPC_MAX_NAMESPACES] = {0}; /* old format - array of strings (bdev names) */
+	size_t i;
+	int rc;
+
+	/* First try to decode namespaces as an array of objects (new format) */
+	if (spdk_json_decode_array(val, decode_rpc_ns_params, namespaces->ns_params,
+				   SPDK_COUNTOF(namespaces->ns_params),
+				   &namespaces->num_ns, sizeof(*namespaces->ns_params)) == 0) {
+		return 0;
+	}
+
+	/* If that fails, try to decode namespaces as an array of strings (old format) */
+	free_rpc_namespaces(namespaces);
+	memset(namespaces, 0, sizeof(*namespaces));
+	rc = spdk_json_decode_array(val, spdk_json_decode_string, names,
+				    SPDK_COUNTOF(names),
+				    &namespaces->num_ns, sizeof(char *));
+	if (rc == 0) {
+		/* Decoded old format - copy to ns_params (new format) */
+		for (i = 0; i < namespaces->num_ns; i++) {
+			namespaces->ns_params[i].bdev_name = names[i];
+		}
+		return 0;
+	}
+
+	/* Failed to decode - don't leave dangling string pointers around */
+	for (i = 0; i < namespaces->num_ns; i++) {
+		free(names[i]);
+	}
+
+	return rc;
+}
+
+static void
+free_rpc_listen_addresses(struct rpc_listen_addresses *r)
+{
+	size_t i;
+
+	for (i = 0; i < r->num_listen_address; i++) {
+		free_rpc_listen_address(&r->addresses[i]);
+	}
+}
+
+static void
+free_rpc_hosts(struct rpc_hosts *r)
+{
+	size_t i;
+
+	for (i = 0; i < r->num_hosts; i++) {
+		free(r->hosts[i]);
+	}
+}
+
+struct rpc_subsystem {
+	int32_t core;
+	char *mode;
+	char *nqn;
+	struct rpc_listen_addresses listen_addresses;
+	struct rpc_hosts hosts;
+	bool allow_any_host;
+	char *pci_address;
+	char *serial_number;
+	struct rpc_namespaces namespaces;
+	uint32_t num_ns;
+};
+
+static void
+free_rpc_subsystem(struct rpc_subsystem *req)
+{
+	if (req) {
+		free(req->mode);
+		free(req->nqn);
+		free(req->serial_number);
+		free_rpc_namespaces(&req->namespaces);
+		free_rpc_listen_addresses(&req->listen_addresses);
+		free_rpc_hosts(&req->hosts);
+	}
+	free(req);
+}
+
+static void
+spdk_rpc_nvmf_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
+				void *cb_arg, int status)
+{
+	struct spdk_jsonrpc_request *request = cb_arg;
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+
+static const struct spdk_json_object_decoder rpc_subsystem_decoders[] = {
+	{"core", offsetof(struct rpc_subsystem, core), spdk_json_decode_int32, true},
+	{"mode", offsetof(struct rpc_subsystem, mode), spdk_json_decode_string, true},
+	{"nqn", offsetof(struct rpc_subsystem, nqn), spdk_json_decode_string},
+	{"listen_addresses", offsetof(struct rpc_subsystem, listen_addresses), decode_rpc_listen_addresses, true},
+	{"hosts", offsetof(struct rpc_subsystem, hosts), decode_rpc_hosts, true},
+	{"allow_any_host", offsetof(struct rpc_subsystem, allow_any_host), spdk_json_decode_bool, true},
+	{"serial_number", offsetof(struct rpc_subsystem, serial_number), spdk_json_decode_string, true},
+	{"namespaces", offsetof(struct rpc_subsystem, namespaces), decode_rpc_namespaces, true},
+	{"max_namespaces", offsetof(struct rpc_subsystem, num_ns), spdk_json_decode_uint32, true},
+};
+
+struct subsystem_listen_ctx {
+	struct rpc_subsystem *req;
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_jsonrpc_request *request;
+
+	uint32_t idx;
+};
+
+static void
+spdk_rpc_construct_subsystem_listen_done(void *cb_arg, int status)
+{
+	struct subsystem_listen_ctx *ctx = cb_arg;
+	struct rpc_listen_address *addr;
+	struct spdk_nvme_transport_id trid = {0};
+
+	if (status) {
+		goto invalid;
+	}
+
+	addr = &ctx->req->listen_addresses.addresses[ctx->idx];
+	if (rpc_listen_address_to_trid(addr, &trid)) {
+		goto invalid;
+	}
+
+	spdk_nvmf_subsystem_add_listener(ctx->subsystem, &trid);
+
+	ctx->idx++;
+
+	if (ctx->idx < ctx->req->listen_addresses.num_listen_address) {
+		addr = &ctx->req->listen_addresses.addresses[ctx->idx];
+
+		if (rpc_listen_address_to_trid(addr, &trid)) {
+			goto invalid;
+		}
+
+		spdk_nvmf_tgt_listen(g_spdk_nvmf_tgt, &trid, spdk_rpc_construct_subsystem_listen_done, ctx);
+		return;
+	}
+
+	spdk_nvmf_subsystem_start(ctx->subsystem,
+				  spdk_rpc_nvmf_subsystem_started,
+				  ctx->request);
+
+	free_rpc_subsystem(ctx->req);
+	free(ctx);
+
+	return;
+
+invalid:
+	spdk_nvmf_subsystem_destroy(ctx->subsystem);
+	spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 "Invalid parameters");
+	free_rpc_subsystem(ctx->req);
+	free(ctx);
+}
+
+static void
+spdk_rpc_construct_nvmf_subsystem(struct spdk_jsonrpc_request *request,
+				  const struct spdk_json_val *params)
+{
+	struct rpc_subsystem *req;
+	struct spdk_nvmf_subsystem *subsystem;
+	size_t i;
+
+	SPDK_WARNLOG("The construct_nvmf_subsystem RPC is deprecated. Use nvmf_subsystem_create instead.\n");
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		goto invalid;
+	}
+
+	req->core = -1;	/* Explicitly set the core as the uninitialized value */
+
+	if (spdk_json_decode_object(params, rpc_subsystem_decoders,
+				    SPDK_COUNTOF(rpc_subsystem_decoders),
+				    req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	/* Mode is no longer a valid parameter, but print out a nice
+	 * message if it exists to inform users.
+	 */
+	if (req->mode) {
+		SPDK_NOTICELOG("Mode present in the construct NVMe-oF subsystem RPC.\n"
+			       "Mode was removed as a valid parameter.\n");
+		if (strcasecmp(req->mode, "Virtual") == 0) {
+			SPDK_NOTICELOG("Your mode value is 'Virtual' which is now the only possible mode.\n"
+				       "Your RPC will work as expected.\n");
+		} else {
+			SPDK_NOTICELOG("Please remove 'mode' from the RPC.\n");
+			goto invalid;
+		}
+	}
+
+	/* Core is no longer a valid parameter, but print out a nice
+	 * message if it exists to inform users.
+	 */
+	if (req->core != -1) {
+		SPDK_NOTICELOG("Core present in the construct NVMe-oF subsystem RPC.\n"
+			       "Core was removed as an option. Subsystems can now run on all available cores.\n");
+		SPDK_NOTICELOG("Ignoring it and continuing.\n");
+	}
+
+	subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, req->nqn, SPDK_NVMF_SUBTYPE_NVME,
+					       req->num_ns);
+	if (!subsystem) {
+		goto invalid;
+	}
+
+	if (spdk_nvmf_subsystem_set_sn(subsystem, req->serial_number)) {
+		SPDK_ERRLOG("Subsystem %s: invalid serial number '%s'\n", req->nqn, req->serial_number);
+		goto invalid;
+	}
+
+	for (i = 0; i < req->hosts.num_hosts; i++) {
+		spdk_nvmf_subsystem_add_host(subsystem, req->hosts.hosts[i]);
+	}
+
+	spdk_nvmf_subsystem_set_allow_any_host(subsystem, req->allow_any_host);
+
+	for (i = 0; i < req->namespaces.num_ns; i++) {
+		struct spdk_nvmf_ns_params *ns_params = &req->namespaces.ns_params[i];
+		struct spdk_bdev *bdev;
+		struct spdk_nvmf_ns_opts ns_opts;
+
+		bdev = spdk_bdev_get_by_name(ns_params->bdev_name);
+		if (bdev == NULL) {
+			SPDK_ERRLOG("Could not find namespace bdev '%s'\n", ns_params->bdev_name);
+			spdk_nvmf_subsystem_destroy(subsystem);
+			goto invalid;
+		}
+
+		spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts));
+		ns_opts.nsid = ns_params->nsid;
+
+		SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(ns_params->nguid), "size mismatch");
+		memcpy(ns_opts.nguid, ns_params->nguid, sizeof(ns_opts.nguid));
+
+		SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(ns_params->eui64), "size mismatch");
+		memcpy(ns_opts.eui64, ns_params->eui64, sizeof(ns_opts.eui64));
+
+		if (!spdk_mem_all_zero(&ns_params->uuid, sizeof(ns_params->uuid))) {
+			ns_opts.uuid = ns_params->uuid;
+		}
+
+		if (spdk_nvmf_subsystem_add_ns(subsystem, bdev, &ns_opts, sizeof(ns_opts)) == 0) {
+			SPDK_ERRLOG("Unable to add namespace\n");
+			spdk_nvmf_subsystem_destroy(subsystem);
+			goto invalid;
+		}
+	}
+
+	if (req->listen_addresses.num_listen_address > 0) {
+		struct rpc_listen_address *addr;
+		struct spdk_nvme_transport_id trid = {0};
+		struct subsystem_listen_ctx *ctx;
+
+		ctx = calloc(1, sizeof(*ctx));
+		if (!ctx) {
+			spdk_nvmf_subsystem_destroy(subsystem);
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "No Memory");
+			free_rpc_subsystem(req);
+			return;
+		}
+
+		ctx->req = req;
+		ctx->subsystem = subsystem;
+		ctx->request = request;
+		ctx->idx = 0;
+
+		addr = &req->listen_addresses.addresses[0];
+
+		if (rpc_listen_address_to_trid(addr, &trid)) {
+			free(ctx);
+			goto invalid;
+		}
+
+		spdk_nvmf_tgt_listen(g_spdk_nvmf_tgt, &trid, spdk_rpc_construct_subsystem_listen_done, ctx);
+		return;
+	}
+
+	free_rpc_subsystem(req);
+
+	spdk_nvmf_subsystem_start(subsystem,
+				  spdk_rpc_nvmf_subsystem_started,
+				  request);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_subsystem(req);
+}
+SPDK_RPC_REGISTER("construct_nvmf_subsystem", spdk_rpc_construct_nvmf_subsystem, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c b/src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c
new file mode 100644
index 00000000..bb35dcce
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c
@@ -0,0 +1,503 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "event_nvmf.h"
+
+#include "spdk/bdev.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+#include "spdk/log.h"
+#include "spdk/nvme.h"
+#include "spdk/util.h"
+
+enum nvmf_tgt_state {
+	NVMF_TGT_INIT_NONE = 0,
+	NVMF_TGT_INIT_PARSE_CONFIG,
+	NVMF_TGT_INIT_CREATE_POLL_GROUPS,
+	NVMF_TGT_INIT_START_SUBSYSTEMS,
+	NVMF_TGT_INIT_START_ACCEPTOR,
+	NVMF_TGT_RUNNING,
+	NVMF_TGT_FINI_STOP_SUBSYSTEMS,
+	NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
+	NVMF_TGT_FINI_STOP_ACCEPTOR,
+	NVMF_TGT_FINI_FREE_RESOURCES,
+	NVMF_TGT_STOPPED,
+	NVMF_TGT_ERROR,
+};
+
+struct nvmf_tgt_poll_group {
+	struct spdk_nvmf_poll_group *group;
+};
+
+struct nvmf_tgt_host_trid {
+	struct spdk_nvme_transport_id       host_trid;
+	uint32_t                            core;
+	uint32_t                            ref;
+	TAILQ_ENTRY(nvmf_tgt_host_trid)     link;
+};
+
+/* List of host trids that are connected to the target */
+static TAILQ_HEAD(, nvmf_tgt_host_trid) g_nvmf_tgt_host_trids =
+	TAILQ_HEAD_INITIALIZER(g_nvmf_tgt_host_trids);
+
+struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
+
+static enum nvmf_tgt_state g_tgt_state;
+
+/* Round-Robin/IP-based tracking of cores for qpair assignment */
+static uint32_t g_tgt_core;
+
+static struct nvmf_tgt_poll_group *g_poll_groups = NULL;
+static size_t g_num_poll_groups = 0;
+
+static struct spdk_poller *g_acceptor_poller = NULL;
+
+static void nvmf_tgt_advance_state(void);
+
+static void
+_spdk_nvmf_shutdown_cb(void *arg1, void *arg2)
+{
+	/* Still in initialization state, defer shutdown operation */
+	if (g_tgt_state < NVMF_TGT_RUNNING) {
+		spdk_event_call(spdk_event_allocate(spdk_env_get_current_core(),
+						    _spdk_nvmf_shutdown_cb, NULL, NULL));
+		return;
+	} else if (g_tgt_state > NVMF_TGT_RUNNING) {
+		/* Already in Shutdown status, ignore the signal */
+		return;
+	}
+
+	g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
+	nvmf_tgt_advance_state();
+}
+
+static void
+spdk_nvmf_subsystem_fini(void)
+{
+	/* Always let the first core to handle the case */
+	if (spdk_env_get_current_core() != spdk_env_get_first_core()) {
+		spdk_event_call(spdk_event_allocate(spdk_env_get_first_core(),
+						    _spdk_nvmf_shutdown_cb, NULL, NULL));
+	} else {
+		_spdk_nvmf_shutdown_cb(NULL, NULL);
+	}
+}
+
+static void
+nvmf_tgt_poll_group_add(void *arg1, void *arg2)
+{
+	struct spdk_nvmf_qpair *qpair = arg1;
+	struct nvmf_tgt_poll_group *pg = arg2;
+
+	spdk_nvmf_poll_group_add(pg->group, qpair);
+}
+
+/* Round robin selection of cores */
+static uint32_t
+spdk_nvmf_get_core_rr(void)
+{
+	uint32_t core;
+
+	core = g_tgt_core;
+	g_tgt_core = spdk_env_get_next_core(core);
+	if (g_tgt_core == UINT32_MAX) {
+		g_tgt_core = spdk_env_get_first_core();
+	}
+
+	return core;
+}
+
+static void
+nvmf_tgt_remove_host_trid(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvme_transport_id trid_to_remove;
+	struct nvmf_tgt_host_trid *trid = NULL, *tmp_trid = NULL;
+
+	if (g_spdk_nvmf_tgt_conf->conn_sched != CONNECT_SCHED_HOST_IP) {
+		return;
+	}
+
+	if (spdk_nvmf_qpair_get_peer_trid(qpair, &trid_to_remove) != 0) {
+		return;
+	}
+
+	TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
+		if (trid && !strncmp(trid->host_trid.traddr,
+				     trid_to_remove.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
+			trid->ref--;
+			if (trid->ref == 0) {
+				TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
+				free(trid);
+			}
+
+			break;
+		}
+	}
+
+	return;
+}
+
+static uint32_t
+nvmf_tgt_get_qpair_core(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvme_transport_id trid;
+	struct nvmf_tgt_host_trid *tmp_trid = NULL, *new_trid = NULL;
+	int ret;
+	uint32_t core = 0;
+
+	switch (g_spdk_nvmf_tgt_conf->conn_sched) {
+	case CONNECT_SCHED_HOST_IP:
+		ret = spdk_nvmf_qpair_get_peer_trid(qpair, &trid);
+		if (ret) {
+			SPDK_ERRLOG("Invalid host transport Id. Assigning to core %d\n", core);
+			break;
+		}
+
+		TAILQ_FOREACH(tmp_trid, &g_nvmf_tgt_host_trids, link) {
+			if (tmp_trid && !strncmp(tmp_trid->host_trid.traddr,
+						 trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
+				tmp_trid->ref++;
+				core = tmp_trid->core;
+				break;
+			}
+		}
+		if (!tmp_trid) {
+			new_trid = calloc(1, sizeof(*new_trid));
+			if (!new_trid) {
+				SPDK_ERRLOG("Insufficient memory. Assigning to core %d\n", core);
+				break;
+			}
+			/* Get the next available core for the new host */
+			core = spdk_nvmf_get_core_rr();
+			new_trid->core = core;
+			memcpy(new_trid->host_trid.traddr, trid.traddr,
+			       SPDK_NVMF_TRADDR_MAX_LEN + 1);
+			TAILQ_INSERT_TAIL(&g_nvmf_tgt_host_trids, new_trid, link);
+		}
+		break;
+	case CONNECT_SCHED_ROUND_ROBIN:
+	default:
+		core = spdk_nvmf_get_core_rr();
+		break;
+	}
+
+	return core;
+}
+
+static void
+new_qpair(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_event *event;
+	struct nvmf_tgt_poll_group *pg;
+	uint32_t core;
+	uint32_t attempts;
+
+	if (g_tgt_state != NVMF_TGT_RUNNING) {
+		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+		return;
+	}
+
+	for (attempts = 0; attempts < g_num_poll_groups; attempts++) {
+		core = nvmf_tgt_get_qpair_core(qpair);
+		pg = &g_poll_groups[core];
+		if (pg->group != NULL) {
+			break;
+		} else {
+			nvmf_tgt_remove_host_trid(qpair);
+		}
+	}
+
+	if (attempts == g_num_poll_groups) {
+		SPDK_ERRLOG("No poll groups exist.\n");
+		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+		return;
+	}
+
+	event = spdk_event_allocate(core, nvmf_tgt_poll_group_add, qpair, pg);
+	spdk_event_call(event);
+}
+
+static int
+acceptor_poll(void *arg)
+{
+	struct spdk_nvmf_tgt *tgt = arg;
+
+	spdk_nvmf_tgt_accept(tgt, new_qpair);
+
+	return -1;
+}
+
+static void
+nvmf_tgt_destroy_poll_group_done(void *ctx)
+{
+	g_tgt_state = NVMF_TGT_FINI_STOP_ACCEPTOR;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_destroy_poll_group(void *ctx)
+{
+	struct nvmf_tgt_poll_group *pg;
+
+	pg = &g_poll_groups[spdk_env_get_current_core()];
+
+	if (pg->group) {
+		spdk_nvmf_poll_group_destroy(pg->group);
+		pg->group = NULL;
+	}
+}
+
+static void
+nvmf_tgt_create_poll_group_done(void *ctx)
+{
+	g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_create_poll_group(void *ctx)
+{
+	struct nvmf_tgt_poll_group *pg;
+
+	pg = &g_poll_groups[spdk_env_get_current_core()];
+
+	pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt);
+}
+
+static void
+nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
+			   void *cb_arg, int status)
+{
+	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+
+	if (subsystem) {
+		spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
+		return;
+	}
+
+	g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
+			   void *cb_arg, int status)
+{
+	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+
+	if (subsystem) {
+		spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
+		return;
+	}
+
+	g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_destroy_done(void *ctx, int status)
+{
+	struct nvmf_tgt_host_trid *trid, *tmp_trid;
+
+	g_tgt_state = NVMF_TGT_STOPPED;
+
+	TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
+		TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
+		free(trid);
+	}
+
+	free(g_spdk_nvmf_tgt_conf);
+	g_spdk_nvmf_tgt_conf = NULL;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_parse_conf_done(int status)
+{
+	g_tgt_state = (status == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR;
+	nvmf_tgt_advance_state();
+}
+
+static void
+nvmf_tgt_parse_conf_start(void *ctx)
+{
+	if (spdk_nvmf_parse_conf(nvmf_tgt_parse_conf_done)) {
+		SPDK_ERRLOG("spdk_nvmf_parse_conf() failed\n");
+		g_tgt_state = NVMF_TGT_ERROR;
+		nvmf_tgt_advance_state();
+	}
+}
+
+static void
+nvmf_tgt_advance_state(void)
+{
+	enum nvmf_tgt_state prev_state;
+	int rc = -1;
+
+	do {
+		prev_state = g_tgt_state;
+
+		switch (g_tgt_state) {
+		case NVMF_TGT_INIT_NONE: {
+			g_tgt_state = NVMF_TGT_INIT_PARSE_CONFIG;
+
+			/* Find the maximum core number */
+			g_num_poll_groups = spdk_env_get_last_core() + 1;
+			assert(g_num_poll_groups > 0);
+
+			g_poll_groups = calloc(g_num_poll_groups, sizeof(*g_poll_groups));
+			if (g_poll_groups == NULL) {
+				g_tgt_state = NVMF_TGT_ERROR;
+				rc = -ENOMEM;
+				break;
+			}
+
+			g_tgt_core = spdk_env_get_first_core();
+			break;
+		}
+		case NVMF_TGT_INIT_PARSE_CONFIG:
+			/* Send message to self to call parse conf func.
+			 * Prevents it from possibly performing cb before getting
+			 * out of this function, which causes problems. */
+			spdk_thread_send_msg(spdk_get_thread(), nvmf_tgt_parse_conf_start, NULL);
+			break;
+		case NVMF_TGT_INIT_CREATE_POLL_GROUPS:
+			/* Send a message to each thread and create a poll group */
+			spdk_for_each_thread(nvmf_tgt_create_poll_group,
+					     NULL,
+					     nvmf_tgt_create_poll_group_done);
+			break;
+		case NVMF_TGT_INIT_START_SUBSYSTEMS: {
+			struct spdk_nvmf_subsystem *subsystem;
+
+			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
+
+			if (subsystem) {
+				spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
+			} else {
+				g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
+			}
+			break;
+		}
+		case NVMF_TGT_INIT_START_ACCEPTOR:
+			g_acceptor_poller = spdk_poller_register(acceptor_poll, g_spdk_nvmf_tgt,
+					    g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
+			SPDK_INFOLOG(SPDK_LOG_NVMF, "Acceptor running\n");
+			g_tgt_state = NVMF_TGT_RUNNING;
+			break;
+		case NVMF_TGT_RUNNING:
+			spdk_subsystem_init_next(0);
+			break;
+		case NVMF_TGT_FINI_STOP_SUBSYSTEMS: {
+			struct spdk_nvmf_subsystem *subsystem;
+
+			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
+
+			if (subsystem) {
+				spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
+			} else {
+				g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
+			}
+			break;
+		}
+		case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
+			/* Send a message to each thread and destroy the poll group */
+			spdk_for_each_thread(nvmf_tgt_destroy_poll_group,
+					     NULL,
+					     nvmf_tgt_destroy_poll_group_done);
+			break;
+		case NVMF_TGT_FINI_STOP_ACCEPTOR:
+			spdk_poller_unregister(&g_acceptor_poller);
+			g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
+			break;
+		case NVMF_TGT_FINI_FREE_RESOURCES:
+			spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
+			break;
+		case NVMF_TGT_STOPPED:
+			spdk_subsystem_fini_next();
+			return;
+		case NVMF_TGT_ERROR:
+			spdk_subsystem_init_next(rc);
+			return;
+		}
+
+	} while (g_tgt_state != prev_state);
+}
+
+static void
+spdk_nvmf_subsystem_init(void)
+{
+	g_tgt_state = NVMF_TGT_INIT_NONE;
+	nvmf_tgt_advance_state();
+}
+
+static char *
+get_conn_sched_string(enum spdk_nvmf_connect_sched sched)
+{
+	if (sched == CONNECT_SCHED_HOST_IP) {
+		return "hostip";
+	} else {
+		return "roundrobin";
+	}
+}
+
+static void
+spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev)
+{
+	spdk_json_write_array_begin(w);
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "set_nvmf_target_config");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
+	spdk_json_write_named_string(w, "conn_sched",
+				     get_conn_sched_string(g_spdk_nvmf_tgt_conf->conn_sched));
+	spdk_json_write_object_end(w);
+	spdk_json_write_object_end(w);
+
+	spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt);
+	spdk_json_write_array_end(w);
+
+	spdk_event_call(done_ev);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_nvmf = {
+	.name = "nvmf",
+	.init = spdk_nvmf_subsystem_init,
+	.fini = spdk_nvmf_subsystem_fini,
+	.write_config_json = spdk_nvmf_subsystem_write_config_json,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf)
+SPDK_SUBSYSTEM_DEPEND(nvmf, bdev)
diff --git a/src/spdk/lib/event/subsystems/scsi/Makefile b/src/spdk/lib/event/subsystems/scsi/Makefile
new file mode 100644
index 00000000..12bf15e3
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/scsi/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = scsi.c
+LIBNAME = event_scsi
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/scsi/scsi.c b/src/spdk/lib/event/subsystems/scsi/scsi.c
new file mode 100644
index 00000000..a37ebf61
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/scsi/scsi.c
@@ -0,0 +1,65 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/scsi.h"
+
+#include "spdk_internal/event.h"
+
+static void
+spdk_scsi_subsystem_init(void)
+{
+	int rc;
+
+	rc = spdk_scsi_init();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_scsi_subsystem_fini(void)
+{
+	spdk_scsi_fini();
+	spdk_subsystem_fini_next();
+}
+
+static struct spdk_subsystem g_spdk_subsystem_scsi = {
+	.name = "scsi",
+	.init = spdk_scsi_subsystem_init,
+	.fini = spdk_scsi_subsystem_fini,
+	.config = NULL,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_scsi);
+SPDK_SUBSYSTEM_DEPEND(scsi, bdev)
diff --git a/src/spdk/lib/event/subsystems/vhost/Makefile b/src/spdk/lib/event/subsystems/vhost/Makefile
new file mode 100644
index 00000000..2e0d61fe
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/vhost/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vhost.c
+LIBNAME = event_vhost
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/event/subsystems/vhost/vhost.c b/src/spdk/lib/event/subsystems/vhost/vhost.c
new file mode 100644
index 00000000..1fdbc6aa
--- /dev/null
+++ b/src/spdk/lib/event/subsystems/vhost/vhost.c
@@ -0,0 +1,71 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/vhost.h"
+
+#include "spdk_internal/event.h"
+
+static void
+spdk_vhost_subsystem_init(void)
+{
+	int rc = 0;
+
+	rc = spdk_vhost_init();
+
+	spdk_subsystem_init_next(rc);
+}
+
+static void
+spdk_vhost_subsystem_fini_done(void)
+{
+	spdk_subsystem_fini_next();
+}
+
+static void
+spdk_vhost_subsystem_fini(void)
+{
+	spdk_vhost_fini(spdk_vhost_subsystem_fini_done);
+}
+
+static struct spdk_subsystem g_spdk_subsystem_vhost = {
+	.name = "vhost",
+	.init = spdk_vhost_subsystem_init,
+	.fini = spdk_vhost_subsystem_fini,
+	.config = NULL,
+	.write_config_json = spdk_vhost_config_json,
+};
+
+SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_vhost);
+SPDK_SUBSYSTEM_DEPEND(vhost, scsi)
diff --git a/src/spdk/lib/ioat/Makefile b/src/spdk/lib/ioat/Makefile
new file mode 100644
index 00000000..d59d607f
--- /dev/null
+++ b/src/spdk/lib/ioat/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = ioat.c
+LIBNAME = ioat
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/ioat/ioat.c b/src/spdk/lib/ioat/ioat.c
new file mode 100644
index 00000000..d8c15bf3
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat.c
@@ -0,0 +1,733 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "ioat_internal.h"
+
+#include "spdk/env.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+struct ioat_driver {
+	pthread_mutex_t			lock;
+	TAILQ_HEAD(, spdk_ioat_chan)	attached_chans;
+};
+
+static struct ioat_driver g_ioat_driver = {
+	.lock = PTHREAD_MUTEX_INITIALIZER,
+	.attached_chans = TAILQ_HEAD_INITIALIZER(g_ioat_driver.attached_chans),
+};
+
+static uint64_t
+ioat_get_chansts(struct spdk_ioat_chan *ioat)
+{
+	return spdk_mmio_read_8(&ioat->regs->chansts);
+}
+
+static void
+ioat_write_chancmp(struct spdk_ioat_chan *ioat, uint64_t addr)
+{
+	spdk_mmio_write_8(&ioat->regs->chancmp, addr);
+}
+
+static void
+ioat_write_chainaddr(struct spdk_ioat_chan *ioat, uint64_t addr)
+{
+	spdk_mmio_write_8(&ioat->regs->chainaddr, addr);
+}
+
+static inline void
+ioat_suspend(struct spdk_ioat_chan *ioat)
+{
+	ioat->regs->chancmd = SPDK_IOAT_CHANCMD_SUSPEND;
+}
+
+static inline void
+ioat_reset(struct spdk_ioat_chan *ioat)
+{
+	ioat->regs->chancmd = SPDK_IOAT_CHANCMD_RESET;
+}
+
+static inline uint32_t
+ioat_reset_pending(struct spdk_ioat_chan *ioat)
+{
+	uint8_t cmd;
+
+	cmd = ioat->regs->chancmd;
+	return (cmd & SPDK_IOAT_CHANCMD_RESET) == SPDK_IOAT_CHANCMD_RESET;
+}
+
+static int
+ioat_map_pci_bar(struct spdk_ioat_chan *ioat)
+{
+	int regs_bar, rc;
+	void *addr;
+	uint64_t phys_addr, size;
+
+	regs_bar = 0;
+	rc = spdk_pci_device_map_bar(ioat->device, regs_bar, &addr, &phys_addr, &size);
+	if (rc != 0 || addr == NULL) {
+		SPDK_ERRLOG("pci_device_map_range failed with error code %d\n",
+			    rc);
+		return -1;
+	}
+
+	ioat->regs = (volatile struct spdk_ioat_registers *)addr;
+
+	return 0;
+}
+
+static int
+ioat_unmap_pci_bar(struct spdk_ioat_chan *ioat)
+{
+	int rc = 0;
+	void *addr = (void *)ioat->regs;
+
+	if (addr) {
+		rc = spdk_pci_device_unmap_bar(ioat->device, 0, addr);
+	}
+	return rc;
+}
+
+
+static inline uint32_t
+ioat_get_active(struct spdk_ioat_chan *ioat)
+{
+	return (ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1);
+}
+
+static inline uint32_t
+ioat_get_ring_space(struct spdk_ioat_chan *ioat)
+{
+	return (1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1;
+}
+
+static uint32_t
+ioat_get_ring_index(struct spdk_ioat_chan *ioat, uint32_t index)
+{
+	return index & ((1 << ioat->ring_size_order) - 1);
+}
+
+static void
+ioat_get_ring_entry(struct spdk_ioat_chan *ioat, uint32_t index,
+		    struct ioat_descriptor **desc,
+		    union spdk_ioat_hw_desc **hw_desc)
+{
+	uint32_t i = ioat_get_ring_index(ioat, index);
+
+	*desc = &ioat->ring[i];
+	*hw_desc = &ioat->hw_ring[i];
+}
+
+static void
+ioat_submit_single(struct spdk_ioat_chan *ioat)
+{
+	ioat->head++;
+}
+
+static void
+ioat_flush(struct spdk_ioat_chan *ioat)
+{
+	ioat->regs->dmacount = (uint16_t)ioat->head;
+}
+
+static struct ioat_descriptor *
+ioat_prep_null(struct spdk_ioat_chan *ioat)
+{
+	struct ioat_descriptor *desc;
+	union spdk_ioat_hw_desc *hw_desc;
+
+	if (ioat_get_ring_space(ioat) < 1) {
+		return NULL;
+	}
+
+	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+	hw_desc->dma.u.control_raw = 0;
+	hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY;
+	hw_desc->dma.u.control.null = 1;
+	hw_desc->dma.u.control.completion_update = 1;
+
+	hw_desc->dma.size = 8;
+	hw_desc->dma.src_addr = 0;
+	hw_desc->dma.dest_addr = 0;
+
+	desc->callback_fn = NULL;
+	desc->callback_arg = NULL;
+
+	ioat_submit_single(ioat);
+
+	return desc;
+}
+
+static struct ioat_descriptor *
+ioat_prep_copy(struct spdk_ioat_chan *ioat, uint64_t dst,
+	       uint64_t src, uint32_t len)
+{
+	struct ioat_descriptor *desc;
+	union spdk_ioat_hw_desc *hw_desc;
+
+	assert(len <= ioat->max_xfer_size);
+
+	if (ioat_get_ring_space(ioat) < 1) {
+		return NULL;
+	}
+
+	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+	hw_desc->dma.u.control_raw = 0;
+	hw_desc->dma.u.control.op = SPDK_IOAT_OP_COPY;
+	hw_desc->dma.u.control.completion_update = 1;
+
+	hw_desc->dma.size = len;
+	hw_desc->dma.src_addr = src;
+	hw_desc->dma.dest_addr = dst;
+
+	desc->callback_fn = NULL;
+	desc->callback_arg = NULL;
+
+	ioat_submit_single(ioat);
+
+	return desc;
+}
+
+static struct ioat_descriptor *
+ioat_prep_fill(struct spdk_ioat_chan *ioat, uint64_t dst,
+	       uint64_t fill_pattern, uint32_t len)
+{
+	struct ioat_descriptor *desc;
+	union spdk_ioat_hw_desc *hw_desc;
+
+	assert(len <= ioat->max_xfer_size);
+
+	if (ioat_get_ring_space(ioat) < 1) {
+		return NULL;
+	}
+
+	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);
+
+	hw_desc->fill.u.control_raw = 0;
+	hw_desc->fill.u.control.op = SPDK_IOAT_OP_FILL;
+	hw_desc->fill.u.control.completion_update = 1;
+
+	hw_desc->fill.size = len;
+	hw_desc->fill.src_data = fill_pattern;
+	hw_desc->fill.dest_addr = dst;
+
+	desc->callback_fn = NULL;
+	desc->callback_arg = NULL;
+
+	ioat_submit_single(ioat);
+
+	return desc;
+}
+
+static int ioat_reset_hw(struct spdk_ioat_chan *ioat)
+{
+	int timeout;
+	uint64_t status;
+	uint32_t chanerr;
+	int rc;
+
+	status = ioat_get_chansts(ioat);
+	if (is_ioat_active(status) || is_ioat_idle(status)) {
+		ioat_suspend(ioat);
+	}
+
+	timeout = 20; /* in milliseconds */
+	while (is_ioat_active(status) || is_ioat_idle(status)) {
+		spdk_delay_us(1000);
+		timeout--;
+		if (timeout == 0) {
+			SPDK_ERRLOG("timed out waiting for suspend\n");
+			return -1;
+		}
+		status = ioat_get_chansts(ioat);
+	}
+
+	/*
+	 * Clear any outstanding errors.
+	 * CHANERR is write-1-to-clear, so write the current CHANERR bits back to reset everything.
+	 */
+	chanerr = ioat->regs->chanerr;
+	ioat->regs->chanerr = chanerr;
+
+	if (ioat->regs->cbver < SPDK_IOAT_VER_3_3) {
+		rc = spdk_pci_device_cfg_read32(ioat->device, &chanerr,
+						SPDK_IOAT_PCI_CHANERR_INT_OFFSET);
+		if (rc) {
+			SPDK_ERRLOG("failed to read the internal channel error register\n");
+			return -1;
+		}
+
+		spdk_pci_device_cfg_write32(ioat->device, chanerr,
+					    SPDK_IOAT_PCI_CHANERR_INT_OFFSET);
+	}
+
+	ioat_reset(ioat);
+
+	timeout = 20;
+	while (ioat_reset_pending(ioat)) {
+		spdk_delay_us(1000);
+		timeout--;
+		if (timeout == 0) {
+			SPDK_ERRLOG("timed out waiting for reset\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ioat_process_channel_events(struct spdk_ioat_chan *ioat)
+{
+	struct ioat_descriptor *desc;
+	uint64_t status, completed_descriptor, hw_desc_phys_addr;
+	uint32_t tail;
+
+	if (ioat->head == ioat->tail) {
+		return 0;
+	}
+
+	status = *ioat->comp_update;
+	completed_descriptor = status & SPDK_IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
+
+	if (is_ioat_halted(status)) {
+		SPDK_ERRLOG("Channel halted (%x)\n", ioat->regs->chanerr);
+		return -1;
+	}
+
+	if (completed_descriptor == ioat->last_seen) {
+		return 0;
+	}
+
+	do {
+		tail = ioat_get_ring_index(ioat, ioat->tail);
+		desc = &ioat->ring[tail];
+
+		if (desc->callback_fn) {
+			desc->callback_fn(desc->callback_arg);
+		}
+
+		hw_desc_phys_addr = desc->phys_addr;
+		ioat->tail++;
+	} while (hw_desc_phys_addr != completed_descriptor);
+
+	ioat->last_seen = hw_desc_phys_addr;
+	return 0;
+}
+
+static void
+ioat_channel_destruct(struct spdk_ioat_chan *ioat)
+{
+	ioat_unmap_pci_bar(ioat);
+
+	if (ioat->ring) {
+		free(ioat->ring);
+	}
+
+	if (ioat->hw_ring) {
+		spdk_dma_free(ioat->hw_ring);
+	}
+
+	if (ioat->comp_update) {
+		spdk_dma_free((void *)ioat->comp_update);
+		ioat->comp_update = NULL;
+	}
+}
+
+static int
+ioat_channel_start(struct spdk_ioat_chan *ioat)
+{
+	uint8_t xfercap, version;
+	uint64_t status;
+	int i, num_descriptors;
+	uint64_t comp_update_bus_addr = 0;
+	uint64_t phys_addr;
+
+	if (ioat_map_pci_bar(ioat) != 0) {
+		SPDK_ERRLOG("ioat_map_pci_bar() failed\n");
+		return -1;
+	}
+
+	version = ioat->regs->cbver;
+	if (version < SPDK_IOAT_VER_3_0) {
+		SPDK_ERRLOG(" unsupported IOAT version %u.%u\n",
+			    version >> 4, version & 0xF);
+		return -1;
+	}
+
+	/* Always support DMA copy */
+	ioat->dma_capabilities = SPDK_IOAT_ENGINE_COPY_SUPPORTED;
+	if (ioat->regs->dmacapability & SPDK_IOAT_DMACAP_BFILL) {
+		ioat->dma_capabilities |= SPDK_IOAT_ENGINE_FILL_SUPPORTED;
+	}
+	xfercap = ioat->regs->xfercap;
+
+	/* Only bits [4:0] are valid. */
+	xfercap &= 0x1f;
+	if (xfercap == 0) {
+		/* 0 means 4 GB max transfer size. */
+		ioat->max_xfer_size = 1ULL << 32;
+	} else if (xfercap < 12) {
+		/* XFERCAP must be at least 12 (4 KB) according to the spec. */
+		SPDK_ERRLOG("invalid XFERCAP value %u\n", xfercap);
+		return -1;
+	} else {
+		ioat->max_xfer_size = 1U << xfercap;
+	}
+
+	ioat->comp_update = spdk_dma_zmalloc(sizeof(*ioat->comp_update), SPDK_IOAT_CHANCMP_ALIGN,
+					     &comp_update_bus_addr);
+	if (ioat->comp_update == NULL) {
+		return -1;
+	}
+
+	ioat->ring_size_order = IOAT_DEFAULT_ORDER;
+
+	num_descriptors = 1 << ioat->ring_size_order;
+
+	ioat->ring = calloc(num_descriptors, sizeof(struct ioat_descriptor));
+	if (!ioat->ring) {
+		return -1;
+	}
+
+	ioat->hw_ring = spdk_dma_zmalloc(num_descriptors * sizeof(union spdk_ioat_hw_desc), 64,
+					 NULL);
+	if (!ioat->hw_ring) {
+		return -1;
+	}
+
+	for (i = 0; i < num_descriptors; i++) {
+		phys_addr = spdk_vtophys(&ioat->hw_ring[i]);
+		if (phys_addr == SPDK_VTOPHYS_ERROR) {
+			SPDK_ERRLOG("Failed to translate descriptor %u to physical address\n", i);
+			return -1;
+		}
+
+		ioat->ring[i].phys_addr = phys_addr;
+		ioat->hw_ring[ioat_get_ring_index(ioat, i - 1)].generic.next = phys_addr;
+	}
+
+	ioat->head = 0;
+	ioat->tail = 0;
+	ioat->last_seen = 0;
+
+	ioat_reset_hw(ioat);
+
+	ioat->regs->chanctrl = SPDK_IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+	ioat_write_chancmp(ioat, comp_update_bus_addr);
+	ioat_write_chainaddr(ioat, ioat->ring[0].phys_addr);
+
+	ioat_prep_null(ioat);
+	ioat_flush(ioat);
+
+	i = 100;
+	while (i-- > 0) {
+		spdk_delay_us(100);
+		status = ioat_get_chansts(ioat);
+		if (is_ioat_idle(status)) {
+			break;
+		}
+	}
+
+	if (is_ioat_idle(status)) {
+		ioat_process_channel_events(ioat);
+	} else {
+		SPDK_ERRLOG("could not start channel: status = %p\n error = %#x\n",
+			    (void *)status, ioat->regs->chanerr);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Caller must hold g_ioat_driver.lock */
+static struct spdk_ioat_chan *
+ioat_attach(struct spdk_pci_device *device)
+{
+	struct spdk_ioat_chan *ioat;
+	uint32_t cmd_reg;
+
+	ioat = calloc(1, sizeof(struct spdk_ioat_chan));
+	if (ioat == NULL) {
+		return NULL;
+	}
+
+	/* Enable PCI busmaster. */
+	spdk_pci_device_cfg_read32(device, &cmd_reg, 4);
+	cmd_reg |= 0x4;
+	spdk_pci_device_cfg_write32(device, cmd_reg, 4);
+
+	ioat->device = device;
+
+	if (ioat_channel_start(ioat) != 0) {
+		ioat_channel_destruct(ioat);
+		free(ioat);
+		return NULL;
+	}
+
+	return ioat;
+}
+
+struct ioat_enum_ctx {
+	spdk_ioat_probe_cb probe_cb;
+	spdk_ioat_attach_cb attach_cb;
+	void *cb_ctx;
+};
+
+/* This function must only be called while holding g_ioat_driver.lock */
+static int
+ioat_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
+{
+	struct ioat_enum_ctx *enum_ctx = ctx;
+	struct spdk_ioat_chan *ioat;
+
+	/* Verify that this device is not already attached */
+	TAILQ_FOREACH(ioat, &g_ioat_driver.attached_chans, tailq) {
+		/*
+		 * NOTE: This assumes that the PCI abstraction layer will use the same device handle
+		 *  across enumerations; we could compare by BDF instead if this is not true.
+		 */
+		if (pci_dev == ioat->device) {
+			return 0;
+		}
+	}
+
+	if (enum_ctx->probe_cb(enum_ctx->cb_ctx, pci_dev)) {
+		/*
+		 * Since I/OAT init is relatively quick, just perform the full init during probing.
+		 *  If this turns out to be a bottleneck later, this can be changed to work like
+		 *  NVMe with a list of devices to initialize in parallel.
+		 */
+		ioat = ioat_attach(pci_dev);
+		if (ioat == NULL) {
+			SPDK_ERRLOG("ioat_attach() failed\n");
+			return -1;
+		}
+
+		TAILQ_INSERT_TAIL(&g_ioat_driver.attached_chans, ioat, tailq);
+
+		enum_ctx->attach_cb(enum_ctx->cb_ctx, pci_dev, ioat);
+	}
+
+	return 0;
+}
+
+int
+spdk_ioat_probe(void *cb_ctx, spdk_ioat_probe_cb probe_cb, spdk_ioat_attach_cb attach_cb)
+{
+	int rc;
+	struct ioat_enum_ctx enum_ctx;
+
+	pthread_mutex_lock(&g_ioat_driver.lock);
+
+	enum_ctx.probe_cb = probe_cb;
+	enum_ctx.attach_cb = attach_cb;
+	enum_ctx.cb_ctx = cb_ctx;
+
+	rc = spdk_pci_ioat_enumerate(ioat_enum_cb, &enum_ctx);
+
+	pthread_mutex_unlock(&g_ioat_driver.lock);
+
+	return rc;
+}
+
+void
+spdk_ioat_detach(struct spdk_ioat_chan *ioat)
+{
+	struct ioat_driver	*driver = &g_ioat_driver;
+
+	/* ioat should be in the free list (not registered to a thread)
+	 * when calling ioat_detach().
+	 */
+	pthread_mutex_lock(&driver->lock);
+	TAILQ_REMOVE(&driver->attached_chans, ioat, tailq);
+	pthread_mutex_unlock(&driver->lock);
+
+	ioat_channel_destruct(ioat);
+	free(ioat);
+}
+
+#define _2MB_PAGE(ptr)		((ptr) & ~(0x200000 - 1))
+#define _2MB_OFFSET(ptr)	((ptr) &  (0x200000 - 1))
+
+int
+spdk_ioat_submit_copy(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+		      void *dst, const void *src, uint64_t nbytes)
+{
+	struct ioat_descriptor	*last_desc;
+	uint64_t	remaining, op_size;
+	uint64_t	vdst, vsrc;
+	uint64_t	vdst_page, vsrc_page;
+	uint64_t	pdst_page, psrc_page;
+	uint32_t	orig_head;
+
+	if (!ioat) {
+		return -EINVAL;
+	}
+
+	orig_head = ioat->head;
+
+	vdst = (uint64_t)dst;
+	vsrc = (uint64_t)src;
+	vdst_page = vsrc_page = 0;
+	pdst_page = psrc_page = SPDK_VTOPHYS_ERROR;
+
+	remaining = nbytes;
+	while (remaining) {
+		if (_2MB_PAGE(vsrc) != vsrc_page) {
+			vsrc_page = _2MB_PAGE(vsrc);
+			psrc_page = spdk_vtophys((void *)vsrc_page);
+		}
+
+		if (_2MB_PAGE(vdst) != vdst_page) {
+			vdst_page = _2MB_PAGE(vdst);
+			pdst_page = spdk_vtophys((void *)vdst_page);
+		}
+		op_size = remaining;
+		op_size = spdk_min(op_size, (0x200000 - _2MB_OFFSET(vsrc)));
+		op_size = spdk_min(op_size, (0x200000 - _2MB_OFFSET(vdst)));
+		op_size = spdk_min(op_size, ioat->max_xfer_size);
+		remaining -= op_size;
+
+		last_desc = ioat_prep_copy(ioat,
+					   pdst_page + _2MB_OFFSET(vdst),
+					   psrc_page + _2MB_OFFSET(vsrc),
+					   op_size);
+
+		if (remaining == 0 || last_desc == NULL) {
+			break;
+		}
+
+		vsrc += op_size;
+		vdst += op_size;
+
+	}
+	/* Issue null descriptor for null transfer */
+	if (nbytes == 0) {
+		last_desc = ioat_prep_null(ioat);
+	}
+
+	if (last_desc) {
+		last_desc->callback_fn = cb_fn;
+		last_desc->callback_arg = cb_arg;
+	} else {
+		/*
+		 * Ran out of descriptors in the ring - reset head to leave things as they were
+		 * in case we managed to fill out any descriptors.
+		 */
+		ioat->head = orig_head;
+		return -ENOMEM;
+	}
+
+	ioat_flush(ioat);
+	return 0;
+}
+
+int
+spdk_ioat_submit_fill(struct spdk_ioat_chan *ioat, void *cb_arg, spdk_ioat_req_cb cb_fn,
+		      void *dst, uint64_t fill_pattern, uint64_t nbytes)
+{
+	struct ioat_descriptor	*last_desc = NULL;
+	uint64_t	remaining, op_size;
+	uint64_t	vdst;
+	uint32_t	orig_head;
+
+	if (!ioat) {
+		return -EINVAL;
+	}
+
+	if (!(ioat->dma_capabilities & SPDK_IOAT_ENGINE_FILL_SUPPORTED)) {
+		SPDK_ERRLOG("Channel does not support memory fill\n");
+		return -1;
+	}
+
+	orig_head = ioat->head;
+
+	vdst = (uint64_t)dst;
+	remaining = nbytes;
+
+	while (remaining) {
+		op_size = remaining;
+		op_size = spdk_min(op_size, (0x200000 - _2MB_OFFSET(vdst)));
+		op_size = spdk_min(op_size, ioat->max_xfer_size);
+		remaining -= op_size;
+
+		last_desc = ioat_prep_fill(ioat,
+					   spdk_vtophys((void *)vdst),
+					   fill_pattern,
+					   op_size);
+
+		if (remaining == 0 || last_desc == NULL) {
+			break;
+		}
+
+		vdst += op_size;
+	}
+
+	if (last_desc) {
+		last_desc->callback_fn = cb_fn;
+		last_desc->callback_arg = cb_arg;
+	} else {
+		/*
+		 * Ran out of descriptors in the ring - reset head to leave things as they were
+		 * in case we managed to fill out any descriptors.
+		 */
+		ioat->head = orig_head;
+		return -ENOMEM;
+	}
+
+	ioat_flush(ioat);
+	return 0;
+}
+
+uint32_t
+spdk_ioat_get_dma_capabilities(struct spdk_ioat_chan *ioat)
+{
+	if (!ioat) {
+		return 0;
+	}
+	return ioat->dma_capabilities;
+}
+
+int
+spdk_ioat_process_events(struct spdk_ioat_chan *ioat)
+{
+	return ioat_process_channel_events(ioat);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("ioat", SPDK_LOG_IOAT)
diff --git a/src/spdk/lib/ioat/ioat_internal.h b/src/spdk/lib/ioat/ioat_internal.h
new file mode 100644
index 00000000..19593bb0
--- /dev/null
+++ b/src/spdk/lib/ioat/ioat_internal.h
@@ -0,0 +1,100 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IOAT_INTERNAL_H__
+#define __IOAT_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/ioat.h"
+#include "spdk/ioat_spec.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+
+/* Allocate 1 << 15 (32K) descriptors per channel by default. */
+#define IOAT_DEFAULT_ORDER			15
+
+struct ioat_descriptor {
+	uint64_t		phys_addr;
+	spdk_ioat_req_cb	callback_fn;
+	void			*callback_arg;
+};
+
+/* One of these per allocated PCI device. */
+struct spdk_ioat_chan {
+	/* Opaque handle to upper layer */
+	struct spdk_pci_device		*device;
+	uint64_t            max_xfer_size;
+	volatile struct spdk_ioat_registers *regs;
+
+	volatile uint64_t   *comp_update;
+
+	uint32_t            head;
+	uint32_t            tail;
+
+	uint32_t            ring_size_order;
+	uint64_t            last_seen;
+
+	struct ioat_descriptor		*ring;
+	union spdk_ioat_hw_desc		*hw_ring;
+	uint32_t			dma_capabilities;
+
+	/* tailq entry for attached_chans */
+	TAILQ_ENTRY(spdk_ioat_chan)	tailq;
+};
+
+static inline uint32_t
+is_ioat_active(uint64_t status)
+{
+	return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_ACTIVE;
+}
+
+static inline uint32_t
+is_ioat_idle(uint64_t status)
+{
+	return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_IDLE;
+}
+
+static inline uint32_t
+is_ioat_halted(uint64_t status)
+{
+	return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_HALTED;
+}
+
+static inline uint32_t
+is_ioat_suspended(uint64_t status)
+{
+	return (status & SPDK_IOAT_CHANSTS_STATUS) == SPDK_IOAT_CHANSTS_SUSPENDED;
+}
+
+#endif /* __IOAT_INTERNAL_H__ */
diff --git a/src/spdk/lib/iscsi/Makefile b/src/spdk/lib/iscsi/Makefile
new file mode 100644
index 00000000..624bbf95
--- /dev/null
+++ b/src/spdk/lib/iscsi/Makefile
@@ -0,0 +1,45 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib
+C_SRCS = acceptor.c conn.c \
+	 init_grp.c iscsi.c md5.c param.c portal_grp.c \
+	 tgt_node.c iscsi_subsystem.c \
+	 iscsi_rpc.c task.c
+LIBNAME = iscsi
+LOCAL_SYS_LIBS = -lcrypto
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/iscsi/acceptor.c b/src/spdk/lib/iscsi/acceptor.c
new file mode 100644
index 00000000..9b13de30
--- /dev/null
+++ b/src/spdk/lib/iscsi/acceptor.c
@@ -0,0 +1,91 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/log.h"
+#include "spdk/sock.h"
+#include "spdk/string.h"
+#include "iscsi/acceptor.h"
+#include "iscsi/conn.h"
+#include "iscsi/portal_grp.h"
+
+#define ACCEPT_TIMEOUT_US 1000 /* 1ms */
+
+static int
+spdk_iscsi_portal_accept(void *arg)
+{
+	struct spdk_iscsi_portal	*portal = arg;
+	struct spdk_sock		*sock;
+	int				rc;
+	int				count = 0;
+
+	if (portal->sock == NULL) {
+		return -1;
+	}
+
+	while (1) {
+		sock = spdk_sock_accept(portal->sock);
+		if (sock != NULL) {
+			rc = spdk_iscsi_conn_construct(portal, sock);
+			if (rc < 0) {
+				spdk_sock_close(&sock);
+				SPDK_ERRLOG("spdk_iscsi_connection_construct() failed\n");
+				break;
+			}
+			count++;
+		} else {
+			if (errno != EAGAIN && errno != EWOULDBLOCK) {
+				SPDK_ERRLOG("accept error(%d): %s\n", errno, spdk_strerror(errno));
+			}
+			break;
+		}
+	}
+
+	return count;
+}
+
+void
+spdk_iscsi_acceptor_start(struct spdk_iscsi_portal *p)
+{
+	p->acceptor_poller = spdk_poller_register(spdk_iscsi_portal_accept, p, ACCEPT_TIMEOUT_US);
+}
+
+void
+spdk_iscsi_acceptor_stop(struct spdk_iscsi_portal *p)
+{
+	spdk_poller_unregister(&p->acceptor_poller);
+}
diff --git a/src/spdk/lib/iscsi/acceptor.h b/src/spdk/lib/iscsi/acceptor.h
new file mode 100644
index 00000000..9060ee7d
--- /dev/null
+++ b/src/spdk/lib/iscsi/acceptor.h
@@ -0,0 +1,43 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ACCEPTOR_H_
+#define SPDK_ACCEPTOR_H_
+
+struct spdk_iscsi_portal;
+
+void spdk_iscsi_acceptor_start(struct spdk_iscsi_portal *p);
+void spdk_iscsi_acceptor_stop(struct spdk_iscsi_portal *p);
+
+#endif /* SPDK_ACCEPTOR_H_ */
diff --git a/src/spdk/lib/iscsi/conn.c b/src/spdk/lib/iscsi/conn.c
new file mode 100644
index 00000000..d5cd5d1e
--- /dev/null
+++ b/src/spdk/lib/iscsi/conn.c
@@ -0,0 +1,1470 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+#include "spdk/queue.h"
+#include "spdk/trace.h"
+#include "spdk/net.h"
+#include "spdk/sock.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "iscsi/task.h"
+#include "iscsi/conn.h"
+#include "iscsi/tgt_node.h"
+#include "iscsi/portal_grp.h"
+
+#define SPDK_ISCSI_CONNECTION_MEMSET(conn)		\
+	memset(&(conn)->portal, 0, sizeof(*(conn)) -	\
+		offsetof(struct spdk_iscsi_conn, portal));
+
+static int g_connections_per_lcore;
+static uint32_t *g_num_connections;
+
+struct spdk_iscsi_conn *g_conns_array = MAP_FAILED;
+static int g_conns_array_fd = -1;
+static char g_shm_name[64];
+
+static pthread_mutex_t g_conns_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static struct spdk_poller *g_shutdown_timer = NULL;
+
+static uint32_t spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask);
+
+static void spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2);
+static void spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn);
+static void spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group,
+				    struct spdk_sock *sock);
+
+static struct spdk_iscsi_conn *
+allocate_conn(void)
+{
+	struct spdk_iscsi_conn	*conn;
+	int				i;
+
+	pthread_mutex_lock(&g_conns_mutex);
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		conn = &g_conns_array[i];
+		if (!conn->is_valid) {
+			SPDK_ISCSI_CONNECTION_MEMSET(conn);
+			conn->is_valid = 1;
+			pthread_mutex_unlock(&g_conns_mutex);
+			return conn;
+		}
+	}
+	pthread_mutex_unlock(&g_conns_mutex);
+
+	return NULL;
+}
+
+static void
+free_conn(struct spdk_iscsi_conn *conn)
+{
+	free(conn->portal_host);
+	free(conn->portal_port);
+	conn->is_valid = 0;
+}
+
+static struct spdk_iscsi_conn *
+spdk_find_iscsi_connection_by_id(int cid)
+{
+	if (g_conns_array[cid].is_valid == 1) {
+		return &g_conns_array[cid];
+	} else {
+		return NULL;
+	}
+}
+
+int spdk_initialize_iscsi_conns(void)
+{
+	size_t conns_size = sizeof(struct spdk_iscsi_conn) * MAX_ISCSI_CONNECTIONS;
+	uint32_t i, last_core;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init\n");
+
+	snprintf(g_shm_name, sizeof(g_shm_name), "/spdk_iscsi_conns.%d", spdk_app_get_shm_id());
+	g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600);
+	if (g_conns_array_fd < 0) {
+		SPDK_ERRLOG("could not shm_open %s\n", g_shm_name);
+		goto err;
+	}
+
+	if (ftruncate(g_conns_array_fd, conns_size) != 0) {
+		SPDK_ERRLOG("could not ftruncate\n");
+		goto err;
+	}
+	g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			     g_conns_array_fd, 0);
+
+	if (g_conns_array == MAP_FAILED) {
+		fprintf(stderr, "could not mmap cons array file %s (%d)\n", g_shm_name, errno);
+		goto err;
+	}
+
+	memset(g_conns_array, 0, conns_size);
+
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		g_conns_array[i].id = i;
+	}
+
+	last_core = spdk_env_get_last_core();
+	g_num_connections = calloc(last_core + 1, sizeof(uint32_t));
+	if (!g_num_connections) {
+		SPDK_ERRLOG("Could not allocate array size=%u for g_num_connections\n",
+			    last_core + 1);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	if (g_conns_array != MAP_FAILED) {
+		munmap(g_conns_array, conns_size);
+		g_conns_array = MAP_FAILED;
+	}
+
+	if (g_conns_array_fd >= 0) {
+		close(g_conns_array_fd);
+		g_conns_array_fd = -1;
+		shm_unlink(g_shm_name);
+	}
+
+	return -1;
+}
+
+static void
+spdk_iscsi_poll_group_add_conn_sock(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_poll_group *poll_group;
+	int rc;
+
+	assert(conn->lcore == spdk_env_get_current_core());
+
+	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
+
+	rc = spdk_sock_group_add_sock(poll_group->sock_group, conn->sock, spdk_iscsi_conn_sock_cb, conn);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to add sock=%p of conn=%p\n", conn->sock, conn);
+	}
+}
+
+static void
+spdk_iscsi_poll_group_remove_conn_sock(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_poll_group *poll_group;
+	int rc;
+
+	assert(conn->lcore == spdk_env_get_current_core());
+
+	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
+
+	rc = spdk_sock_group_remove_sock(poll_group->sock_group, conn->sock);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn);
+	}
+}
+
+static void
+spdk_iscsi_poll_group_add_conn(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_poll_group *poll_group;
+
+	assert(conn->lcore == spdk_env_get_current_core());
+
+	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
+
+	conn->is_stopped = false;
+	STAILQ_INSERT_TAIL(&poll_group->connections, conn, link);
+	spdk_iscsi_poll_group_add_conn_sock(conn);
+}
+
+static void
+spdk_iscsi_poll_group_remove_conn(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_poll_group *poll_group;
+
+	assert(conn->lcore == spdk_env_get_current_core());
+
+	poll_group = &g_spdk_iscsi.poll_group[conn->lcore];
+
+	conn->is_stopped = true;
+	STAILQ_REMOVE(&poll_group->connections, conn, spdk_iscsi_conn, link);
+}
+
+/**
+ * \brief Create an iSCSI connection from the given parameters and schedule it
+ *        on a reactor.
+ *
+ * \code
+ *
+ * # identify reactor where the new connections work item will be scheduled
+ * reactor = spdk_iscsi_conn_allocate_reactor()
+ * allocate spdk_iscsi_conn object
+ * initialize spdk_iscsi_conn object
+ * schedule iSCSI connection work item on reactor
+ *
+ * \endcode
+ */
+int
+spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal,
+			  struct spdk_sock *sock)
+{
+	struct spdk_iscsi_conn *conn;
+	int bufsize, i, rc;
+
+	conn = allocate_conn();
+	if (conn == NULL) {
+		SPDK_ERRLOG("Could not allocate connection.\n");
+		return -1;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	conn->timeout = g_spdk_iscsi.timeout;
+	conn->nopininterval = g_spdk_iscsi.nopininterval;
+	conn->nopininterval *= spdk_get_ticks_hz(); /* seconds to TSC */
+	conn->nop_outstanding = false;
+	conn->data_out_cnt = 0;
+	conn->data_in_cnt = 0;
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	conn->MaxRecvDataSegmentLength = 8192; // RFC3720(12.12)
+
+	conn->portal = portal;
+	conn->pg_tag = portal->group->tag;
+	conn->portal_host = strdup(portal->host);
+	conn->portal_port = strdup(portal->port);
+	conn->portal_cpumask = portal->cpumask;
+	conn->sock = sock;
+
+	conn->state = ISCSI_CONN_STATE_INVALID;
+	conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
+	conn->ttt = 0;
+
+	conn->partial_text_parameter = NULL;
+
+	for (i = 0; i < MAX_CONNECTION_PARAMS; i++) {
+		conn->conn_param_state_negotiated[i] = false;
+	}
+
+	for (i = 0; i < MAX_SESSION_PARAMS; i++) {
+		conn->sess_param_state_negotiated[i] = false;
+	}
+
+	for (i = 0; i < DEFAULT_MAXR2T; i++) {
+		conn->outstanding_r2t_tasks[i] = NULL;
+	}
+
+	TAILQ_INIT(&conn->write_pdu_list);
+	TAILQ_INIT(&conn->snack_pdu_list);
+	TAILQ_INIT(&conn->queued_r2t_tasks);
+	TAILQ_INIT(&conn->active_r2t_tasks);
+	TAILQ_INIT(&conn->queued_datain_tasks);
+	memset(&conn->open_lun_descs, 0, sizeof(conn->open_lun_descs));
+
+	rc = spdk_sock_getaddr(sock, conn->target_addr, sizeof conn->target_addr, NULL,
+			       conn->initiator_addr, sizeof conn->initiator_addr, NULL);
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_sock_getaddr() failed\n");
+		goto error_return;
+	}
+
+	bufsize = 2 * 1024 * 1024;
+	rc = spdk_sock_set_recvbuf(conn->sock, bufsize);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_sock_set_recvbuf failed\n");
+	}
+
+	bufsize = 32 * 1024 * 1024 / g_spdk_iscsi.MaxConnections;
+	if (bufsize > 2 * 1024 * 1024) {
+		bufsize = 2 * 1024 * 1024;
+	}
+	rc = spdk_sock_set_sendbuf(conn->sock, bufsize);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_sock_set_sendbuf failed\n");
+	}
+
+	/* set low water mark */
+	rc = spdk_sock_set_recvlowat(conn->sock, 1);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
+		goto error_return;
+	}
+
+	/* set default params */
+	rc = spdk_iscsi_conn_params_init(&conn->params);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_conn_params_init() failed\n");
+error_return:
+		spdk_iscsi_param_free(conn->params);
+		free_conn(conn);
+		return -1;
+	}
+	conn->logout_timer = NULL;
+	conn->shutdown_timer = NULL;
+	SPDK_NOTICELOG("Launching connection on acceptor thread\n");
+	conn->pending_task_cnt = 0;
+	conn->pending_activate_event = false;
+
+	conn->lcore = spdk_env_get_current_core();
+	__sync_fetch_and_add(&g_num_connections[conn->lcore], 1);
+
+	spdk_iscsi_poll_group_add_conn(conn);
+	return 0;
+}
+
+void
+spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	if (pdu->task) {
+		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
+			if (pdu->task->scsi.offset > 0) {
+				conn->data_in_cnt--;
+				if (pdu->bhs.flags & ISCSI_DATAIN_STATUS) {
+					/* Free the primary task after the last subtask done */
+					conn->data_in_cnt--;
+					spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
+				}
+			}
+		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_RSP &&
+			   pdu->task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
+			if (pdu->task->scsi.offset > 0) {
+				spdk_iscsi_task_put(spdk_iscsi_task_get_primary(pdu->task));
+			}
+		}
+		spdk_iscsi_task_put(pdu->task);
+	}
+	spdk_put_pdu(pdu);
+}
+
+static int spdk_iscsi_conn_free_tasks(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
+	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
+
+	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
+		TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
+		spdk_iscsi_conn_free_pdu(conn, pdu);
+	}
+
+	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
+		TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
+		if (pdu->task) {
+			spdk_iscsi_task_put(pdu->task);
+		}
+		spdk_put_pdu(pdu);
+	}
+
+	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
+		if (!iscsi_task->is_queued) {
+			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
+			spdk_iscsi_task_put(iscsi_task);
+		}
+	}
+
+	if (conn->pending_task_cnt) {
+		return -1;
+	}
+
+	return 0;
+}
+
+static void spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
+{
+	if (conn == NULL) {
+		return;
+	}
+
+	spdk_iscsi_param_free(conn->params);
+
+	/*
+	 * Each connection pre-allocates its next PDU - make sure these get
+	 *  freed here.
+	 */
+	spdk_put_pdu(conn->pdu_in_progress);
+
+	free_conn(conn);
+}
+
+static void spdk_iscsi_remove_conn(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_sess *sess;
+	int idx;
+	uint32_t i, j;
+
+	idx = -1;
+	sess = conn->sess;
+	conn->sess = NULL;
+	if (sess == NULL) {
+		spdk_iscsi_conn_free(conn);
+		return;
+	}
+
+	for (i = 0; i < sess->connections; i++) {
+		if (sess->conns[i] == conn) {
+			idx = i;
+			break;
+		}
+	}
+
+	if (sess->connections < 1) {
+		SPDK_ERRLOG("zero connection\n");
+		sess->connections = 0;
+	} else {
+		if (idx < 0) {
+			SPDK_ERRLOG("remove conn not found\n");
+		} else {
+			for (j = idx; j < sess->connections - 1; j++) {
+				sess->conns[j] = sess->conns[j + 1];
+			}
+			sess->conns[sess->connections - 1] = NULL;
+		}
+		sess->connections--;
+	}
+
+	SPDK_NOTICELOG("Terminating connections(tsih %d): %d\n", sess->tsih, sess->connections);
+
+	if (sess->connections == 0) {
+		/* cleanup last connection */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "cleanup last conn free sess\n");
+		spdk_free_sess(sess);
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "cleanup free conn\n");
+	spdk_iscsi_conn_free(conn);
+}
+
+static void
+spdk_iscsi_conn_cleanup_backend(struct spdk_iscsi_conn *conn)
+{
+	int rc;
+	struct spdk_iscsi_tgt_node *target;
+
+	if (conn->sess->connections > 1) {
+		/* connection specific cleanup */
+	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
+		/* clean up all tasks to all LUNs for session */
+		target = conn->sess->target;
+		if (target != NULL) {
+			rc = spdk_iscsi_tgt_node_cleanup_luns(conn, target);
+			if (rc < 0) {
+				SPDK_ERRLOG("target abort failed\n");
+			}
+		}
+	}
+}
+
+static void
+_spdk_iscsi_conn_free(struct spdk_iscsi_conn *conn)
+{
+	pthread_mutex_lock(&g_conns_mutex);
+	spdk_iscsi_remove_conn(conn);
+	pthread_mutex_unlock(&g_conns_mutex);
+}
+
+static int
+_spdk_iscsi_conn_check_shutdown(void *arg)
+{
+	struct spdk_iscsi_conn *conn = arg;
+	int rc;
+
+	rc = spdk_iscsi_conn_free_tasks(conn);
+	if (rc < 0) {
+		return -1;
+	}
+
+	spdk_poller_unregister(&conn->shutdown_timer);
+
+	spdk_iscsi_conn_stop(conn);
+	_spdk_iscsi_conn_free(conn);
+
+	return -1;
+}
+
+static void
+_spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
+{
+	int rc;
+
+	spdk_clear_all_transfer_task(conn, NULL);
+	spdk_iscsi_poll_group_remove_conn_sock(conn);
+	spdk_sock_close(&conn->sock);
+	spdk_poller_unregister(&conn->logout_timer);
+	spdk_poller_unregister(&conn->flush_poller);
+
+	rc = spdk_iscsi_conn_free_tasks(conn);
+	if (rc < 0) {
+		/* The connection cannot be freed yet. Check back later. */
+		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_shutdown, conn, 1000);
+	} else {
+		spdk_iscsi_conn_stop(conn);
+		_spdk_iscsi_conn_free(conn);
+	}
+}
+
+static int
+_spdk_iscsi_conn_check_pending_tasks(void *arg)
+{
+	struct spdk_iscsi_conn *conn = arg;
+
+	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
+		return -1;
+	}
+
+	spdk_poller_unregister(&conn->shutdown_timer);
+
+	_spdk_iscsi_conn_destruct(conn);
+
+	return -1;
+}
+
+void
+spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn)
+{
+	conn->state = ISCSI_CONN_STATE_EXITED;
+
+	if (conn->sess != NULL && conn->pending_task_cnt > 0) {
+		spdk_iscsi_conn_cleanup_backend(conn);
+	}
+
+	if (conn->dev != NULL && spdk_scsi_dev_has_pending_tasks(conn->dev)) {
+		conn->shutdown_timer = spdk_poller_register(_spdk_iscsi_conn_check_pending_tasks, conn, 1000);
+	} else {
+		_spdk_iscsi_conn_destruct(conn);
+	}
+}
+
+static int
+spdk_iscsi_get_active_conns(void)
+{
+	struct spdk_iscsi_conn *conn;
+	int num = 0;
+	int i;
+
+	pthread_mutex_lock(&g_conns_mutex);
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		conn = spdk_find_iscsi_connection_by_id(i);
+		if (conn == NULL) {
+			continue;
+		}
+		num++;
+	}
+	pthread_mutex_unlock(&g_conns_mutex);
+	return num;
+}
+
+static void
+spdk_iscsi_conns_cleanup(void)
+{
+	free(g_num_connections);
+	munmap(g_conns_array, sizeof(struct spdk_iscsi_conn) *
+	       MAX_ISCSI_CONNECTIONS);
+	shm_unlink(g_shm_name);
+	if (g_conns_array_fd >= 0) {
+		close(g_conns_array_fd);
+		g_conns_array_fd = -1;
+	}
+}
+
+static void
+spdk_iscsi_conn_check_shutdown_cb(void *arg1, void *arg2)
+{
+	spdk_iscsi_conns_cleanup();
+	spdk_shutdown_iscsi_conns_done();
+}
+
+static int
+spdk_iscsi_conn_check_shutdown(void *arg)
+{
+	struct spdk_event *event;
+
+	if (spdk_iscsi_get_active_conns() == 0) {
+		spdk_poller_unregister(&g_shutdown_timer);
+		event = spdk_event_allocate(spdk_env_get_current_core(),
+					    spdk_iscsi_conn_check_shutdown_cb, NULL, NULL);
+		spdk_event_call(event);
+	}
+
+	return -1;
+}
+
+static void
+spdk_iscsi_conn_close_lun(struct spdk_iscsi_conn *conn, int lun_id)
+{
+	struct spdk_scsi_desc *desc;
+
+	desc = conn->open_lun_descs[lun_id];
+	if (desc != NULL) {
+		spdk_scsi_lun_free_io_channel(desc);
+		spdk_scsi_lun_close(desc);
+		conn->open_lun_descs[lun_id] = NULL;
+	}
+}
+
+static void
+spdk_iscsi_conn_close_luns(struct spdk_iscsi_conn *conn)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		spdk_iscsi_conn_close_lun(conn, i);
+	}
+}
+
+static void
+_iscsi_conn_remove_lun(void *arg1, void *arg2)
+{
+	struct spdk_iscsi_conn *conn = arg1;
+	struct spdk_scsi_lun *lun = arg2;
+	int lun_id = spdk_scsi_lun_get_id(lun);
+	struct spdk_iscsi_pdu *pdu, *tmp_pdu;
+	struct spdk_iscsi_task *iscsi_task, *tmp_iscsi_task;
+
+	/* If a connection is already in stating status, just return */
+	if (conn->state >= ISCSI_CONN_STATE_EXITING) {
+		return;
+	}
+
+	spdk_clear_all_transfer_task(conn, lun);
+	TAILQ_FOREACH_SAFE(pdu, &conn->write_pdu_list, tailq, tmp_pdu) {
+		if (pdu->task && (lun == pdu->task->scsi.lun)) {
+			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
+			spdk_iscsi_conn_free_pdu(conn, pdu);
+		}
+	}
+
+	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, tmp_pdu) {
+		if (pdu->task && (lun == pdu->task->scsi.lun)) {
+			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
+			spdk_iscsi_task_put(pdu->task);
+			spdk_put_pdu(pdu);
+		}
+	}
+
+	TAILQ_FOREACH_SAFE(iscsi_task, &conn->queued_datain_tasks, link, tmp_iscsi_task) {
+		if ((!iscsi_task->is_queued) && (lun == iscsi_task->scsi.lun)) {
+			TAILQ_REMOVE(&conn->queued_datain_tasks, iscsi_task, link);
+			spdk_iscsi_task_put(iscsi_task);
+		}
+	}
+
+	spdk_iscsi_conn_close_lun(conn, lun_id);
+}
+
+static void
+spdk_iscsi_conn_remove_lun(struct spdk_scsi_lun *lun, void *remove_ctx)
+{
+	struct spdk_iscsi_conn *conn = remove_ctx;
+	struct spdk_event *event;
+
+	event = spdk_event_allocate(conn->lcore, _iscsi_conn_remove_lun,
+				    conn, lun);
+	spdk_event_call(event);
+}
+
+static void
+spdk_iscsi_conn_open_luns(struct spdk_iscsi_conn *conn)
+{
+	int i, rc;
+	struct spdk_scsi_lun *lun;
+	struct spdk_scsi_desc *desc;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		lun = spdk_scsi_dev_get_lun(conn->dev, i);
+		if (lun == NULL) {
+			continue;
+		}
+
+		rc = spdk_scsi_lun_open(lun, spdk_iscsi_conn_remove_lun, conn, &desc);
+		if (rc != 0) {
+			goto error;
+		}
+
+		rc = spdk_scsi_lun_allocate_io_channel(desc);
+		if (rc != 0) {
+			spdk_scsi_lun_close(desc);
+			goto error;
+		}
+
+		conn->open_lun_descs[i] = desc;
+	}
+
+	return;
+
+error:
+	spdk_iscsi_conn_close_luns(conn);
+}
+
+/**
+ *  This function will stop executing the specified connection.
+ */
+static void
+spdk_iscsi_conn_stop(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	if (conn->state == ISCSI_CONN_STATE_EXITED && conn->sess != NULL &&
+	    conn->sess->session_type == SESSION_TYPE_NORMAL &&
+	    conn->full_feature) {
+		target = conn->sess->target;
+		pthread_mutex_lock(&target->mutex);
+		target->num_active_conns--;
+		pthread_mutex_unlock(&target->mutex);
+
+		spdk_iscsi_conn_close_luns(conn);
+	}
+
+	assert(conn->lcore == spdk_env_get_current_core());
+
+	__sync_fetch_and_sub(&g_num_connections[conn->lcore], 1);
+	spdk_iscsi_poll_group_remove_conn(conn);
+}
+
+void spdk_shutdown_iscsi_conns(void)
+{
+	struct spdk_iscsi_conn	*conn;
+	int			i;
+
+	pthread_mutex_lock(&g_conns_mutex);
+
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		conn = spdk_find_iscsi_connection_by_id(i);
+		if (conn == NULL) {
+			continue;
+		}
+
+		/* Do not set conn->state if the connection has already started exiting.
+		  * This ensures we do not move a connection from EXITED state back to EXITING.
+		  */
+		if (conn->state < ISCSI_CONN_STATE_EXITING) {
+			conn->state = ISCSI_CONN_STATE_EXITING;
+		}
+	}
+
+	pthread_mutex_unlock(&g_conns_mutex);
+	g_shutdown_timer = spdk_poller_register(spdk_iscsi_conn_check_shutdown, NULL,
+						1000);
+}
+
+int
+spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn, const char *conn_match,
+		      int drop_all)
+{
+	struct spdk_iscsi_conn	*xconn;
+	const char			*xconn_match;
+	int				i, num;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_drop_conns\n");
+
+	num = 0;
+	pthread_mutex_lock(&g_conns_mutex);
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		xconn = spdk_find_iscsi_connection_by_id(i);
+
+		if (xconn == NULL) {
+			continue;
+		}
+
+		if (xconn == conn) {
+			continue;
+		}
+
+		if (!drop_all && xconn->initiator_port == NULL) {
+			continue;
+		}
+
+		xconn_match =
+			drop_all ? xconn->initiator_name : spdk_scsi_port_get_name(xconn->initiator_port);
+
+		if (!strcasecmp(conn_match, xconn_match) &&
+		    conn->target == xconn->target) {
+
+			if (num == 0) {
+				/*
+				 * Only print this message before we report the
+				 *  first dropped connection.
+				 */
+				SPDK_ERRLOG("drop old connections %s by %s\n",
+					    conn->target->name, conn_match);
+			}
+
+			SPDK_ERRLOG("exiting conn by %s (%s)\n",
+				    xconn_match, xconn->initiator_addr);
+			if (xconn->sess != NULL) {
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=%u\n", xconn->sess->tsih);
+			} else {
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "TSIH=xx\n");
+			}
+
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CID=%u\n", xconn->cid);
+
+			/* Do not set xconn->state if the connection has already started exiting.
+			  * This ensures we do not move a connection from EXITED state back to EXITING.
+			  */
+			if (xconn->state < ISCSI_CONN_STATE_EXITING) {
+				xconn->state = ISCSI_CONN_STATE_EXITING;
+			}
+			num++;
+		}
+	}
+
+	pthread_mutex_unlock(&g_conns_mutex);
+
+	if (num != 0) {
+		SPDK_ERRLOG("exiting %d conns\n", num);
+	}
+
+	return 0;
+}
+
+/**
+ * \brief Reads data for the specified iSCSI connection from its TCP socket.
+ *
+ * The TCP socket is marked as non-blocking, so this function may not read
+ * all data requested.
+ *
+ * Returns SPDK_ISCSI_CONNECTION_FATAL if the recv() operation indicates a fatal
+ * error with the TCP connection (including if the TCP connection was closed
+ * unexpectedly.
+ *
+ * Otherwise returns the number of bytes successfully read.
+ */
+int
+spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int bytes,
+			  void *buf)
+{
+	int ret;
+
+	if (bytes == 0) {
+		return 0;
+	}
+
+	ret = spdk_sock_recv(conn->sock, buf, bytes);
+
+	if (ret > 0) {
+		spdk_trace_record(TRACE_ISCSI_READ_FROM_SOCKET_DONE, conn->id, ret, 0, 0);
+		return ret;
+	}
+
+	if (ret < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+			return 0;
+		}
+
+		/* For connect reset issue, do not output error log */
+		if (errno == ECONNRESET) {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_sock_recv() failed, errno %d: %s\n",
+				      errno, spdk_strerror(errno));
+		} else {
+			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
+				    errno, spdk_strerror(errno));
+		}
+	}
+
+	/* connection closed */
+	return SPDK_ISCSI_CONNECTION_FATAL;
+}
+
+void
+spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
+
+	spdk_iscsi_task_mgmt_response(task->conn, task);
+	spdk_iscsi_task_put(task);
+}
+
+static void
+process_completed_read_subtask_list(struct spdk_iscsi_conn *conn,
+				    struct spdk_iscsi_task *primary)
+{
+	struct spdk_iscsi_task *subtask, *tmp;
+
+	TAILQ_FOREACH_SAFE(subtask, &primary->subtask_list, subtask_link, tmp) {
+		if (subtask->scsi.offset == primary->bytes_completed) {
+			TAILQ_REMOVE(&primary->subtask_list, subtask, subtask_link);
+			primary->bytes_completed += subtask->scsi.length;
+			spdk_iscsi_task_response(conn, subtask);
+			spdk_iscsi_task_put(subtask);
+		} else {
+			break;
+		}
+	}
+}
+
+static void
+process_read_task_completion(struct spdk_iscsi_conn *conn,
+			     struct spdk_iscsi_task *task,
+			     struct spdk_iscsi_task *primary)
+{
+	struct spdk_iscsi_task *tmp;
+
+	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
+		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
+			spdk_scsi_task_copy_status(&tmp->scsi, &task->scsi);
+		}
+	}
+
+	if ((task != primary) &&
+	    (task->scsi.offset != primary->bytes_completed)) {
+		TAILQ_FOREACH(tmp, &primary->subtask_list, subtask_link) {
+			if (task->scsi.offset < tmp->scsi.offset) {
+				TAILQ_INSERT_BEFORE(tmp, task, subtask_link);
+				return;
+			}
+		}
+
+		TAILQ_INSERT_TAIL(&primary->subtask_list, task, subtask_link);
+		return;
+	}
+
+	primary->bytes_completed += task->scsi.length;
+	spdk_iscsi_task_response(conn, task);
+
+	if ((task != primary) ||
+	    (task->scsi.transfer_len == task->scsi.length)) {
+		spdk_iscsi_task_put(task);
+	}
+	process_completed_read_subtask_list(conn, primary);
+
+	spdk_iscsi_conn_handle_queued_datain_tasks(conn);
+}
+
+void
+spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_iscsi_task *primary;
+	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
+	struct spdk_iscsi_conn *conn = task->conn;
+	struct spdk_iscsi_pdu *pdu = task->pdu;
+
+	spdk_trace_record(TRACE_ISCSI_TASK_DONE, conn->id, 0, (uintptr_t)task, 0);
+
+	task->is_queued = false;
+	primary = spdk_iscsi_task_get_primary(task);
+
+	if (spdk_iscsi_task_is_read(primary)) {
+		process_read_task_completion(conn, task, primary);
+	} else {
+		primary->bytes_completed += task->scsi.length;
+		if (task != primary) {
+			if (task->scsi.status == SPDK_SCSI_STATUS_GOOD) {
+				primary->scsi.data_transferred += task->scsi.data_transferred;
+			} else {
+				spdk_scsi_task_copy_status(&primary->scsi, &task->scsi);
+			}
+		}
+
+		if (primary->bytes_completed == primary->scsi.transfer_len) {
+			spdk_del_transfer_task(conn, primary->tag);
+			spdk_iscsi_task_response(conn, primary);
+			/*
+			 * Check if this is the last task completed for an iSCSI write
+			 *  that required child subtasks.  If task != primary, we know
+			 *  for sure that it was part of an iSCSI write with child subtasks.
+			 *  The trickier case is when the last task completed was the initial
+			 *  task - in this case the task will have a smaller length than
+			 *  the overall transfer length.
+			 */
+			if (task != primary || task->scsi.length != task->scsi.transfer_len) {
+				TAILQ_REMOVE(&conn->active_r2t_tasks, primary, link);
+				spdk_iscsi_task_put(primary);
+			}
+		}
+		spdk_iscsi_task_put(task);
+	}
+	if (!task->parent) {
+		spdk_trace_record(TRACE_ISCSI_PDU_COMPLETED, 0, 0, (uintptr_t)pdu, 0);
+	}
+}
+
+static int
+spdk_iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest,
+			  int data_digest)
+{
+	int data_len, enable_digest, total;
+
+	enable_digest = 1;
+	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
+		enable_digest = 0;
+	}
+
+	total = ISCSI_BHS_LEN;
+
+	total += (4 * pdu->bhs.total_ahs_len);
+
+	if (enable_digest && header_digest) {
+		total += ISCSI_DIGEST_LEN;
+	}
+
+	data_len = DGET24(pdu->bhs.data_segment_len);
+	if (data_len > 0) {
+		total += ISCSI_ALIGN(data_len);
+		if (enable_digest && data_digest) {
+			total += ISCSI_DIGEST_LEN;
+		}
+	}
+
+	return total;
+}
+
+void
+spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn)
+{
+	uint64_t	tsc;
+
+	/**
+	  * This function will be executed by nop_poller of iSCSI polling group, so
+	  * we need to check the connection state first, then do the nop interval
+	  * expiration check work.
+	  */
+	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
+	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
+		return;
+	}
+
+	/* Check for nop interval expiration */
+	tsc = spdk_get_ticks();
+	if (conn->nop_outstanding) {
+		if ((tsc - conn->last_nopin) > (conn->timeout  * spdk_get_ticks_hz())) {
+			SPDK_ERRLOG("Timed out waiting for NOP-Out response from initiator\n");
+			SPDK_ERRLOG("  tsc=0x%lx, last_nopin=0x%lx\n", tsc, conn->last_nopin);
+			SPDK_ERRLOG("  initiator=%s, target=%s\n", conn->initiator_name,
+				    conn->target_short_name);
+			conn->state = ISCSI_CONN_STATE_EXITING;
+		}
+	} else if (tsc - conn->last_nopin > conn->nopininterval) {
+		spdk_iscsi_send_nopin(conn);
+	}
+}
+
+/**
+ * \brief Makes one attempt to flush response PDUs back to the initiator.
+ *
+ * Builds a list of iovecs for response PDUs that must be sent back to the
+ * initiator and passes it to writev().
+ *
+ * Since the socket is non-blocking, writev() may not be able to flush all
+ * of the iovecs, and may even partially flush one of the iovecs.  In this
+ * case, the partially flushed PDU will remain on the write_pdu_list with
+ * an offset pointing to the next byte to be flushed.
+ *
+ * Returns 0 if all PDUs were flushed.
+ *
+ * Returns 1 if some PDUs could not be flushed due to lack of send buffer
+ * space.
+ *
+ * Returns -1 if an exception error occurred indicating the TCP connection
+ * should be closed.
+ */
+static int
+spdk_iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn)
+{
+	const int array_size = 32;
+	struct iovec	iovec_array[array_size];
+	struct iovec	*iov = iovec_array;
+	int iovec_cnt = 0;
+	int bytes = 0;
+	int total_length = 0;
+	uint32_t writev_offset;
+	struct spdk_iscsi_pdu *pdu;
+	int pdu_length;
+
+	pdu = TAILQ_FIRST(&conn->write_pdu_list);
+
+	if (pdu == NULL) {
+		return 0;
+	}
+
+	/*
+	 * Build up a list of iovecs for the first few PDUs in the
+	 *  connection's write_pdu_list.
+	 */
+	while (pdu != NULL && ((array_size - iovec_cnt) >= 5)) {
+		pdu_length = spdk_iscsi_get_pdu_length(pdu,
+						       conn->header_digest,
+						       conn->data_digest);
+		iovec_cnt += spdk_iscsi_build_iovecs(conn,
+						     &iovec_array[iovec_cnt],
+						     pdu);
+		total_length += pdu_length;
+		pdu = TAILQ_NEXT(pdu, tailq);
+	}
+
+	/*
+	 * Check if the first PDU was partially written out the last time
+	 *  this function was called, and if so adjust the iovec array
+	 *  accordingly.
+	 */
+	writev_offset = TAILQ_FIRST(&conn->write_pdu_list)->writev_offset;
+	total_length -= writev_offset;
+	while (writev_offset > 0) {
+		if (writev_offset >= iov->iov_len) {
+			writev_offset -= iov->iov_len;
+			iov++;
+			iovec_cnt--;
+		} else {
+			iov->iov_len -= writev_offset;
+			iov->iov_base = (char *)iov->iov_base + writev_offset;
+			writev_offset = 0;
+		}
+	}
+
+	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovec_cnt);
+
+	bytes = spdk_sock_writev(conn->sock, iov, iovec_cnt);
+	if (bytes == -1) {
+		if (errno == EWOULDBLOCK || errno == EAGAIN) {
+			return 1;
+		} else {
+			SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
+				    errno, spdk_strerror(errno));
+			return -1;
+		}
+	}
+
+	spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0);
+
+	pdu = TAILQ_FIRST(&conn->write_pdu_list);
+
+	/*
+	 * Free any PDUs that were fully written.  If a PDU was only
+	 *  partially written, update its writev_offset so that next
+	 *  time only the unwritten portion will be sent to writev().
+	 */
+	while (bytes > 0) {
+		pdu_length = spdk_iscsi_get_pdu_length(pdu,
+						       conn->header_digest,
+						       conn->data_digest);
+		pdu_length -= pdu->writev_offset;
+
+		if (bytes >= pdu_length) {
+			bytes -= pdu_length;
+			TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq);
+
+			if ((conn->full_feature) &&
+			    (conn->sess->ErrorRecoveryLevel >= 1) &&
+			    spdk_iscsi_is_deferred_free_pdu(pdu)) {
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n",
+					      from_be32(&pdu->bhs.stat_sn));
+				TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu,
+						  tailq);
+			} else {
+				spdk_iscsi_conn_free_pdu(conn, pdu);
+			}
+
+			pdu = TAILQ_FIRST(&conn->write_pdu_list);
+		} else {
+			pdu->writev_offset += bytes;
+			bytes = 0;
+		}
+	}
+
+	return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1;
+}
+
+/**
+ * \brief Flushes response PDUs back to the initiator.
+ *
+ * This function may return without all PDUs having flushed to the
+ * underlying TCP socket buffer - for example, in the case where the
+ * socket buffer is already full.
+ *
+ * During normal RUNNING connection state, if not all PDUs are flushed,
+ * then subsequent calls to this routine will eventually flush
+ * remaining PDUs.
+ *
+ * During other connection states (EXITING or LOGGED_OUT), this
+ * function will spin until all PDUs have successfully been flushed.
+ *
+ * Returns 0 for success and when all PDUs were able to be flushed.
+ *
+ * Returns 1 for success but when some PDUs could not be flushed due
+ * to lack of TCP buffer space.
+ *
+ * Returns -1 for an exceptional error indicating the TCP connection
+ * should be closed.
+ */
+static int
+spdk_iscsi_conn_flush_pdus(void *_conn)
+{
+	struct spdk_iscsi_conn *conn = _conn;
+	int rc;
+
+	if (conn->state == ISCSI_CONN_STATE_RUNNING) {
+		rc = spdk_iscsi_conn_flush_pdus_internal(conn);
+		if (rc == 0 && conn->flush_poller != NULL) {
+			spdk_poller_unregister(&conn->flush_poller);
+		} else if (rc == 1 && conn->flush_poller == NULL) {
+			conn->flush_poller = spdk_poller_register(spdk_iscsi_conn_flush_pdus, conn, 50);
+		}
+	} else {
+		/*
+		 * If the connection state is not RUNNING, then
+		 * keep trying to flush PDUs until our list is
+		 * empty - to make sure all data is sent before
+		 * closing the connection.
+		 */
+		do {
+			rc = spdk_iscsi_conn_flush_pdus_internal(conn);
+		} while (rc == 1);
+	}
+
+	if (rc < 0 && conn->state < ISCSI_CONN_STATE_EXITING) {
+		/*
+		 * If the poller has already started destruction of the connection,
+		 *  i.e. the socket read failed, then the connection state may already
+		 *  be EXITED.  We don't want to set it back to EXITING in that case.
+		 */
+		conn->state = ISCSI_CONN_STATE_EXITING;
+	}
+
+	return -1;
+}
+
+void
+spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq);
+	spdk_iscsi_conn_flush_pdus(conn);
+}
+
+#define GET_PDU_LOOP_COUNT	16
+
+static int
+spdk_iscsi_conn_handle_incoming_pdus(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_pdu *pdu;
+	int i, rc;
+
+	/* Read new PDUs from network */
+	for (i = 0; i < GET_PDU_LOOP_COUNT; i++) {
+		rc = spdk_iscsi_read_pdu(conn, &pdu);
+		if (rc == 0) {
+			break;
+		} else if (rc == SPDK_ISCSI_CONNECTION_FATAL) {
+			return rc;
+		}
+
+		if (conn->state == ISCSI_CONN_STATE_LOGGED_OUT) {
+			SPDK_ERRLOG("pdu received after logout\n");
+			spdk_put_pdu(pdu);
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+
+		rc = spdk_iscsi_execute(conn, pdu);
+		spdk_put_pdu(pdu);
+		if (rc != 0) {
+			SPDK_ERRLOG("spdk_iscsi_execute() fatal error on %s(%s)\n",
+				    conn->target_port != NULL ? spdk_scsi_port_get_name(conn->target_port) : "NULL",
+				    conn->initiator_port != NULL ? spdk_scsi_port_get_name(conn->initiator_port) : "NULL");
+			return rc;
+		}
+
+		spdk_trace_record(TRACE_ISCSI_TASK_EXECUTED, 0, 0, (uintptr_t)pdu, 0);
+		if (conn->is_stopped) {
+			break;
+		}
+	}
+
+	return i;
+}
+
+static void
+spdk_iscsi_conn_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
+{
+	struct spdk_iscsi_conn *conn = arg;
+	int rc;
+
+	assert(conn != NULL);
+
+	if ((conn->state == ISCSI_CONN_STATE_EXITED) ||
+	    (conn->state == ISCSI_CONN_STATE_EXITING)) {
+		return;
+	}
+
+	/* Handle incoming PDUs */
+	rc = spdk_iscsi_conn_handle_incoming_pdus(conn);
+	if (rc < 0) {
+		conn->state = ISCSI_CONN_STATE_EXITING;
+		spdk_iscsi_conn_flush_pdus(conn);
+	}
+}
+
+static void
+spdk_iscsi_conn_full_feature_migrate(void *arg1, void *arg2)
+{
+	struct spdk_iscsi_conn *conn = arg1;
+
+	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
+		spdk_iscsi_conn_open_luns(conn);
+	}
+
+	/* The poller has been unregistered, so now we can re-register it on the new core. */
+	conn->lcore = spdk_env_get_current_core();
+	spdk_iscsi_poll_group_add_conn(conn);
+}
+
+void
+spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn)
+{
+	int				lcore;
+	struct spdk_event		*event;
+	struct spdk_iscsi_tgt_node *target;
+
+	lcore = spdk_iscsi_conn_allocate_reactor(conn->portal->cpumask);
+	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
+		target = conn->sess->target;
+		pthread_mutex_lock(&target->mutex);
+		target->num_active_conns++;
+		if (target->num_active_conns == 1) {
+			/**
+			 * This is the only active connection for this target node.
+			 *  Save the lcore in the target node so it can be used for
+			 *  any other connections to this target node.
+			 */
+			target->lcore = lcore;
+		} else {
+			/**
+			 * There are other active connections for this target node.
+			 *  Ignore the lcore specified by the allocator and use the
+			 *  the target node's lcore to ensure this connection runs on
+			 *  the same lcore as other connections for this target node.
+			 */
+			lcore = target->lcore;
+		}
+		pthread_mutex_unlock(&target->mutex);
+	}
+
+	spdk_iscsi_poll_group_remove_conn_sock(conn);
+	spdk_poller_unregister(&conn->flush_poller);
+	spdk_iscsi_conn_stop(conn);
+
+	__sync_fetch_and_add(&g_num_connections[lcore], 1);
+	conn->last_nopin = spdk_get_ticks();
+	event = spdk_event_allocate(lcore, spdk_iscsi_conn_full_feature_migrate,
+				    conn, NULL);
+	spdk_event_call(event);
+}
+
+void
+spdk_iscsi_conn_set_min_per_core(int count)
+{
+	g_connections_per_lcore = count;
+}
+
+int
+spdk_iscsi_conn_get_min_per_core(void)
+{
+	return g_connections_per_lcore;
+}
+
+static uint32_t
+spdk_iscsi_conn_allocate_reactor(const struct spdk_cpuset *cpumask)
+{
+	uint32_t i, selected_core;
+	int32_t num_pollers, min_pollers;
+
+	min_pollers = INT_MAX;
+	selected_core = spdk_env_get_first_core();
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		if (!spdk_cpuset_get_cpu(cpumask, i)) {
+			continue;
+		}
+
+		/* This core is running. Check how many pollers it already has. */
+		num_pollers = g_num_connections[i];
+
+		if ((num_pollers > 0) && (num_pollers < g_connections_per_lcore)) {
+			/* Fewer than the maximum connections per core,
+			 * but at least 1. Use this core.
+			 */
+			return i;
+		} else if (num_pollers < min_pollers) {
+			/* Track the core that has the minimum number of pollers
+			 * to be used if no cores meet our criteria
+			 */
+			selected_core = i;
+			min_pollers = num_pollers;
+		}
+	}
+
+	return selected_core;
+}
+
+static int
+logout_timeout(void *arg)
+{
+	struct spdk_iscsi_conn *conn = arg;
+
+	spdk_iscsi_conn_destruct(conn);
+
+	return -1;
+}
+
+void
+spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn)
+{
+	conn->state = ISCSI_CONN_STATE_LOGGED_OUT;
+	conn->logout_timer = spdk_poller_register(logout_timeout, conn, ISCSI_LOGOUT_TIMEOUT * 1000000);
+}
+
+SPDK_TRACE_REGISTER_FN(iscsi_conn_trace)
+{
+	spdk_trace_register_owner(OWNER_ISCSI_CONN, 'c');
+	spdk_trace_register_object(OBJECT_ISCSI_PDU, 'p');
+	spdk_trace_register_description("ISCSI_READ_FROM_SOCKET_DONE", "",
+					TRACE_ISCSI_READ_FROM_SOCKET_DONE,
+					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
+	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_START", "", TRACE_ISCSI_FLUSH_WRITEBUF_START,
+					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "iovec: ");
+	spdk_trace_register_description("ISCSI_FLUSH_WRITEBUF_DONE", "", TRACE_ISCSI_FLUSH_WRITEBUF_DONE,
+					OWNER_ISCSI_CONN, OBJECT_NONE, 0, 0, "");
+	spdk_trace_register_description("ISCSI_READ_PDU", "", TRACE_ISCSI_READ_PDU,
+					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 1, 0, "opc:   ");
+	spdk_trace_register_description("ISCSI_TASK_DONE", "", TRACE_ISCSI_TASK_DONE,
+					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 0, 0, "");
+	spdk_trace_register_description("ISCSI_TASK_QUEUE", "", TRACE_ISCSI_TASK_QUEUE,
+					OWNER_ISCSI_CONN, OBJECT_SCSI_TASK, 1, 1, "pdu:   ");
+	spdk_trace_register_description("ISCSI_TASK_EXECUTED", "", TRACE_ISCSI_TASK_EXECUTED,
+					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
+	spdk_trace_register_description("ISCSI_PDU_COMPLETED", "", TRACE_ISCSI_PDU_COMPLETED,
+					OWNER_ISCSI_CONN, OBJECT_ISCSI_PDU, 0, 0, "");
+}
diff --git a/src/spdk/lib/iscsi/conn.h b/src/spdk/lib/iscsi/conn.h
new file mode 100644
index 00000000..4a91e698
--- /dev/null
+++ b/src/spdk/lib/iscsi/conn.h
@@ -0,0 +1,193 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ISCSI_CONN_H
+#define SPDK_ISCSI_CONN_H
+
+#include "spdk/stdinc.h"
+
+#include "iscsi/iscsi.h"
+#include "spdk/queue.h"
+#include "spdk/cpuset.h"
+#include "spdk/scsi.h"
+
+/*
+ * MAX_CONNECTION_PARAMS: The numbers of the params in conn_param_table
+ * MAX_SESSION_PARAMS: The numbers of the params in sess_param_table
+ */
+#define MAX_CONNECTION_PARAMS 14
+#define MAX_SESSION_PARAMS 19
+
+#define MAX_ADDRBUF 64
+#define MAX_INITIATOR_ADDR (MAX_ADDRBUF)
+#define MAX_TARGET_ADDR (MAX_ADDRBUF)
+
+#define OWNER_ISCSI_CONN		0x1
+
+#define OBJECT_ISCSI_PDU		0x1
+
+#define TRACE_GROUP_ISCSI		0x1
+#define TRACE_ISCSI_READ_FROM_SOCKET_DONE	SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x0)
+#define TRACE_ISCSI_FLUSH_WRITEBUF_START	SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x1)
+#define TRACE_ISCSI_FLUSH_WRITEBUF_DONE		SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x2)
+#define TRACE_ISCSI_READ_PDU			SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x3)
+#define TRACE_ISCSI_TASK_DONE			SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x4)
+#define TRACE_ISCSI_TASK_QUEUE			SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x5)
+#define TRACE_ISCSI_TASK_EXECUTED		SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x6)
+#define TRACE_ISCSI_PDU_COMPLETED		SPDK_TPOINT_ID(TRACE_GROUP_ISCSI, 0x7)
+
+struct spdk_poller;
+
+struct spdk_iscsi_conn {
+	int				id;
+	int				is_valid;
+	/*
+	 * All fields below this point are reinitialized each time the
+	 *  connection object is allocated.  Make sure to update the
+	 *  SPDK_ISCSI_CONNECTION_MEMSET() macro if changing which fields
+	 *  are initialized when allocated.
+	 */
+	struct spdk_iscsi_portal	*portal;
+	int				pg_tag;
+	char				*portal_host;
+	char				*portal_port;
+	struct spdk_cpuset		*portal_cpumask;
+	uint32_t			lcore;
+	struct spdk_sock		*sock;
+	struct spdk_iscsi_sess		*sess;
+
+	enum iscsi_connection_state	state;
+	int				login_phase;
+
+	uint64_t	last_flush;
+	uint64_t	last_fill;
+	uint64_t	last_nopin;
+
+	/* Timer used to destroy connection after logout if initiator does
+	 *  not close the connection.
+	 */
+	struct spdk_poller *logout_timer;
+
+	/* Timer used to wait for connection to close
+	 */
+	struct spdk_poller *shutdown_timer;
+
+	struct spdk_iscsi_pdu *pdu_in_progress;
+
+	TAILQ_HEAD(, spdk_iscsi_pdu) write_pdu_list;
+	TAILQ_HEAD(, spdk_iscsi_pdu) snack_pdu_list;
+
+	int pending_r2t;
+	struct spdk_iscsi_task *outstanding_r2t_tasks[DEFAULT_MAXR2T];
+
+	uint16_t cid;
+
+	/* IP address */
+	char initiator_addr[MAX_INITIATOR_ADDR];
+	char target_addr[MAX_TARGET_ADDR];
+
+	/* Initiator/Target port binds */
+	char				initiator_name[MAX_INITIATOR_NAME];
+	struct spdk_scsi_port		*initiator_port;
+	char				target_short_name[MAX_TARGET_NAME];
+	struct spdk_scsi_port		*target_port;
+	struct spdk_iscsi_tgt_node	*target;
+	struct spdk_scsi_dev		*dev;
+
+	/* for fast access */
+	int header_digest;
+	int data_digest;
+	int full_feature;
+
+	struct iscsi_param *params;
+	bool sess_param_state_negotiated[MAX_SESSION_PARAMS];
+	bool conn_param_state_negotiated[MAX_CONNECTION_PARAMS];
+	struct iscsi_chap_auth auth;
+	int authenticated;
+	int req_auth;
+	int req_mutual;
+	uint32_t pending_task_cnt;
+	uint32_t data_out_cnt;
+	uint32_t data_in_cnt;
+	bool pending_activate_event;
+
+	int timeout;
+	uint64_t nopininterval;
+	bool nop_outstanding;
+
+	/*
+	 * This is the maximum data segment length that iscsi target can send
+	 *  to the initiator on this connection.  Not to be confused with the
+	 *  maximum data segment length that initiators can send to iscsi target, which
+	 *  is statically defined as SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH.
+	 */
+	int MaxRecvDataSegmentLength;
+
+	uint32_t StatSN;
+	uint32_t exp_statsn;
+	uint32_t ttt; /* target transfer tag */
+	char *partial_text_parameter;
+
+	STAILQ_ENTRY(spdk_iscsi_conn) link;
+	struct spdk_poller	*flush_poller;
+	bool			is_stopped;  /* Set true when connection is stopped for migration */
+	TAILQ_HEAD(queued_r2t_tasks, spdk_iscsi_task)	queued_r2t_tasks;
+	TAILQ_HEAD(active_r2t_tasks, spdk_iscsi_task)	active_r2t_tasks;
+	TAILQ_HEAD(queued_datain_tasks, spdk_iscsi_task)	queued_datain_tasks;
+
+	struct spdk_scsi_desc	*open_lun_descs[SPDK_SCSI_DEV_MAX_LUN];
+};
+
+extern struct spdk_iscsi_conn *g_conns_array;
+
+int spdk_initialize_iscsi_conns(void);
+void spdk_shutdown_iscsi_conns(void);
+
+int spdk_iscsi_conn_construct(struct spdk_iscsi_portal *portal, struct spdk_sock *sock);
+void spdk_iscsi_conn_destruct(struct spdk_iscsi_conn *conn);
+void spdk_iscsi_conn_handle_nop(struct spdk_iscsi_conn *conn);
+void spdk_iscsi_conn_migration(struct spdk_iscsi_conn *conn);
+void spdk_iscsi_conn_logout(struct spdk_iscsi_conn *conn);
+int spdk_iscsi_drop_conns(struct spdk_iscsi_conn *conn,
+			  const char *conn_match, int drop_all);
+void spdk_iscsi_conn_set_min_per_core(int count);
+int spdk_iscsi_conn_get_min_per_core(void);
+
+int spdk_iscsi_conn_read_data(struct spdk_iscsi_conn *conn, int len,
+			      void *buf);
+void spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu);
+
+void spdk_iscsi_conn_free_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu);
+
+#endif /* SPDK_ISCSI_CONN_H */
diff --git a/src/spdk/lib/iscsi/init_grp.c b/src/spdk/lib/iscsi/init_grp.c
new file mode 100644
index 00000000..33b7bfc3
--- /dev/null
+++ b/src/spdk/lib/iscsi/init_grp.c
@@ -0,0 +1,786 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/conf.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/init_grp.h"
+
+static struct spdk_iscsi_init_grp *
+spdk_iscsi_init_grp_create(int tag)
+{
+	struct spdk_iscsi_init_grp *ig;
+
+	ig = calloc(1, sizeof(*ig));
+	if (ig == NULL) {
+		SPDK_ERRLOG("calloc() failed for initiator group\n");
+		return NULL;
+	}
+
+	ig->tag = tag;
+	TAILQ_INIT(&ig->initiator_head);
+	TAILQ_INIT(&ig->netmask_head);
+	return ig;
+}
+
+static struct spdk_iscsi_initiator_name *
+spdk_iscsi_init_grp_find_initiator(struct spdk_iscsi_init_grp *ig, char *name)
+{
+	struct spdk_iscsi_initiator_name *iname;
+
+	TAILQ_FOREACH(iname, &ig->initiator_head, tailq) {
+		if (!strcmp(iname->name, name)) {
+			return iname;
+		}
+	}
+	return NULL;
+}
+
+static int
+spdk_iscsi_init_grp_add_initiator(struct spdk_iscsi_init_grp *ig, char *name)
+{
+	struct spdk_iscsi_initiator_name *iname;
+	char *p;
+
+	if (ig->ninitiators >= MAX_INITIATOR) {
+		SPDK_ERRLOG("> MAX_INITIATOR(=%d) is not allowed\n", MAX_INITIATOR);
+		return -EPERM;
+	}
+
+	iname = spdk_iscsi_init_grp_find_initiator(ig, name);
+	if (iname != NULL) {
+		return -EEXIST;
+	}
+
+	iname = malloc(sizeof(*iname));
+	if (iname == NULL) {
+		SPDK_ERRLOG("malloc() failed for initiator name str\n");
+		return -ENOMEM;
+	}
+
+	iname->name = strdup(name);
+	if (iname->name == NULL) {
+		SPDK_ERRLOG("strdup() failed for initiator name\n");
+		free(iname);
+		return -ENOMEM;
+	}
+
+	/* Replace "ALL" by "ANY" if set */
+	p = strstr(iname->name, "ALL");
+	if (p != NULL) {
+		SPDK_WARNLOG("Please use \"%s\" instead of \"%s\"\n", "ANY", "ALL");
+		SPDK_WARNLOG("Converting \"%s\" to \"%s\" automatically\n", "ALL", "ANY");
+		memcpy(p, "ANY", 3);
+	}
+
+	TAILQ_INSERT_TAIL(&ig->initiator_head, iname, tailq);
+	ig->ninitiators++;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "InitiatorName %s\n", name);
+	return 0;
+}
+
+static int
+spdk_iscsi_init_grp_delete_initiator(struct spdk_iscsi_init_grp *ig, char *name)
+{
+	struct spdk_iscsi_initiator_name *iname;
+
+	iname = spdk_iscsi_init_grp_find_initiator(ig, name);
+	if (iname == NULL) {
+		return -ENOENT;
+	}
+
+	TAILQ_REMOVE(&ig->initiator_head, iname, tailq);
+	ig->ninitiators--;
+	free(iname->name);
+	free(iname);
+	return 0;
+}
+
+static int
+spdk_iscsi_init_grp_add_initiators(struct spdk_iscsi_init_grp *ig, int num_inames, char **inames)
+{
+	int i;
+	int rc;
+
+	for (i = 0; i < num_inames; i++) {
+		rc = spdk_iscsi_init_grp_add_initiator(ig, inames[i]);
+		if (rc < 0) {
+			goto cleanup;
+		}
+	}
+	return 0;
+
+cleanup:
+	for (; i > 0; --i) {
+		spdk_iscsi_init_grp_delete_initiator(ig, inames[i - 1]);
+	}
+	return rc;
+}
+
+static void
+spdk_iscsi_init_grp_delete_all_initiators(struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_initiator_name *iname, *tmp;
+
+	TAILQ_FOREACH_SAFE(iname, &ig->initiator_head, tailq, tmp) {
+		TAILQ_REMOVE(&ig->initiator_head, iname, tailq);
+		ig->ninitiators--;
+		free(iname->name);
+		free(iname);
+	}
+}
+
+static int
+spdk_iscsi_init_grp_delete_initiators(struct spdk_iscsi_init_grp *ig, int num_inames, char **inames)
+{
+	int i;
+	int rc;
+
+	for (i = 0; i < num_inames; i++) {
+		rc = spdk_iscsi_init_grp_delete_initiator(ig, inames[i]);
+		if (rc < 0) {
+			goto cleanup;
+		}
+	}
+	return 0;
+
+cleanup:
+	for (; i > 0; --i) {
+		rc = spdk_iscsi_init_grp_add_initiator(ig, inames[i - 1]);
+		if (rc != 0) {
+			spdk_iscsi_init_grp_delete_all_initiators(ig);
+			break;
+		}
+	}
+	return -1;
+}
+
+static struct spdk_iscsi_initiator_netmask *
+spdk_iscsi_init_grp_find_netmask(struct spdk_iscsi_init_grp *ig, const char *mask)
+{
+	struct spdk_iscsi_initiator_netmask *netmask;
+
+	TAILQ_FOREACH(netmask, &ig->netmask_head, tailq) {
+		if (!strcmp(netmask->mask, mask)) {
+			return netmask;
+		}
+	}
+	return NULL;
+}
+
+static int
+spdk_iscsi_init_grp_add_netmask(struct spdk_iscsi_init_grp *ig, char *mask)
+{
+	struct spdk_iscsi_initiator_netmask *imask;
+	char *p;
+
+	if (ig->nnetmasks >= MAX_NETMASK) {
+		SPDK_ERRLOG("> MAX_NETMASK(=%d) is not allowed\n", MAX_NETMASK);
+		return -EPERM;
+	}
+
+	imask = spdk_iscsi_init_grp_find_netmask(ig, mask);
+	if (imask != NULL) {
+		return -EEXIST;
+	}
+
+	imask = malloc(sizeof(*imask));
+	if (imask == NULL) {
+		SPDK_ERRLOG("malloc() failed for inititator mask str\n");
+		return -ENOMEM;
+	}
+
+	imask->mask = strdup(mask);
+	if (imask->mask == NULL) {
+		SPDK_ERRLOG("strdup() failed for initiator mask\n");
+		free(imask);
+		return -ENOMEM;
+	}
+
+	/* Replace "ALL" by "ANY" if set */
+	p = strstr(imask->mask, "ALL");
+	if (p != NULL) {
+		SPDK_WARNLOG("Please use \"%s\" instead of \"%s\"\n", "ANY", "ALL");
+		SPDK_WARNLOG("Converting \"%s\" to \"%s\" automatically\n", "ALL", "ANY");
+		memcpy(p, "ANY", 3);
+	}
+
+	TAILQ_INSERT_TAIL(&ig->netmask_head, imask, tailq);
+	ig->nnetmasks++;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Netmask %s\n", mask);
+	return 0;
+}
+
+static int
+spdk_iscsi_init_grp_delete_netmask(struct spdk_iscsi_init_grp *ig, char *mask)
+{
+	struct spdk_iscsi_initiator_netmask *imask;
+
+	imask = spdk_iscsi_init_grp_find_netmask(ig, mask);
+	if (imask == NULL) {
+		return -ENOENT;
+	}
+
+	TAILQ_REMOVE(&ig->netmask_head, imask, tailq);
+	ig->nnetmasks--;
+	free(imask->mask);
+	free(imask);
+	return 0;
+}
+
+static int
+spdk_iscsi_init_grp_add_netmasks(struct spdk_iscsi_init_grp *ig, int num_imasks, char **imasks)
+{
+	int i;
+	int rc;
+
+	for (i = 0; i < num_imasks; i++) {
+		rc = spdk_iscsi_init_grp_add_netmask(ig, imasks[i]);
+		if (rc != 0) {
+			goto cleanup;
+		}
+	}
+	return 0;
+
+cleanup:
+	for (; i > 0; --i) {
+		spdk_iscsi_init_grp_delete_netmask(ig, imasks[i - 1]);
+	}
+	return rc;
+}
+
+static void
+spdk_iscsi_init_grp_delete_all_netmasks(struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_initiator_netmask *imask, *tmp;
+
+	TAILQ_FOREACH_SAFE(imask, &ig->netmask_head, tailq, tmp) {
+		TAILQ_REMOVE(&ig->netmask_head, imask, tailq);
+		ig->nnetmasks--;
+		free(imask->mask);
+		free(imask);
+	}
+}
+
+static int
+spdk_iscsi_init_grp_delete_netmasks(struct spdk_iscsi_init_grp *ig, int num_imasks, char **imasks)
+{
+	int i;
+	int rc;
+
+	for (i = 0; i < num_imasks; i++) {
+		rc = spdk_iscsi_init_grp_delete_netmask(ig, imasks[i]);
+		if (rc != 0) {
+			goto cleanup;
+		}
+	}
+	return 0;
+
+cleanup:
+	for (; i > 0; --i) {
+		rc = spdk_iscsi_init_grp_add_netmask(ig, imasks[i - 1]);
+		if (rc != 0) {
+			spdk_iscsi_init_grp_delete_all_netmasks(ig);
+			break;
+		}
+	}
+	return -1;
+}
+
+/* Read spdk iscsi target's config file and create initiator group */
+static int
+spdk_iscsi_parse_init_grp(struct spdk_conf_section *sp)
+{
+	int i, rc = 0;
+	const char *val = NULL;
+	int num_initiator_names;
+	int num_initiator_masks;
+	char **initiators = NULL, **netmasks = NULL;
+	int tag = spdk_conf_section_get_num(sp);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "add initiator group %d\n", tag);
+
+	val = spdk_conf_section_get_val(sp, "Comment");
+	if (val != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Comment %s\n", val);
+	}
+
+	/* counts number of definitions */
+	for (i = 0; ; i++) {
+		val = spdk_conf_section_get_nval(sp, "InitiatorName", i);
+		if (val == NULL) {
+			break;
+		}
+	}
+	if (i == 0) {
+		SPDK_ERRLOG("num_initiator_names = 0\n");
+		return -EINVAL;
+	}
+	num_initiator_names = i;
+	if (num_initiator_names > MAX_INITIATOR) {
+		SPDK_ERRLOG("%d > MAX_INITIATOR\n", num_initiator_names);
+		return -E2BIG;
+	}
+	for (i = 0; ; i++) {
+		val = spdk_conf_section_get_nval(sp, "Netmask", i);
+		if (val == NULL) {
+			break;
+		}
+	}
+	if (i == 0) {
+		SPDK_ERRLOG("num_initiator_mask = 0\n");
+		return -EINVAL;
+	}
+	num_initiator_masks = i;
+	if (num_initiator_masks > MAX_NETMASK) {
+		SPDK_ERRLOG("%d > MAX_NETMASK\n", num_initiator_masks);
+		return -E2BIG;
+	}
+
+	initiators = calloc(num_initiator_names, sizeof(char *));
+	if (!initiators) {
+		SPDK_ERRLOG("calloc() failed for temp initiator name array\n");
+		return -ENOMEM;
+	}
+	for (i = 0; i < num_initiator_names; i++) {
+		val = spdk_conf_section_get_nval(sp, "InitiatorName", i);
+		if (!val) {
+			SPDK_ERRLOG("InitiatorName %d not found\n", i);
+			rc = -EINVAL;
+			goto cleanup;
+		}
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "InitiatorName %s\n", val);
+		initiators[i] = strdup(val);
+		if (!initiators[i]) {
+			SPDK_ERRLOG("strdup() failed for temp initiator name\n");
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+	}
+	netmasks = calloc(num_initiator_masks, sizeof(char *));
+	if (!netmasks) {
+		SPDK_ERRLOG("malloc() failed for portal group\n");
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+	for (i = 0; i < num_initiator_masks; i++) {
+		val = spdk_conf_section_get_nval(sp, "Netmask", i);
+		if (!val) {
+			SPDK_ERRLOG("Netmask %d not found\n", i);
+			rc = -EINVAL;
+			goto cleanup;
+		}
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Netmask %s\n", val);
+		netmasks[i] = strdup(val);
+		if (!netmasks[i]) {
+			SPDK_ERRLOG("strdup() failed for temp initiator mask\n");
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+	}
+
+	rc = spdk_iscsi_init_grp_create_from_initiator_list(tag,
+			num_initiator_names, initiators, num_initiator_masks, netmasks);
+
+cleanup:
+	if (initiators) {
+		for (i = 0; i < num_initiator_names; i++) {
+			if (initiators[i]) {
+				free(initiators[i]);
+			}
+		}
+		free(initiators);
+	}
+	if (netmasks) {
+		for (i = 0; i < num_initiator_masks; i++) {
+			if (netmasks[i]) {
+				free(netmasks[i]);
+			}
+		}
+		free(netmasks);
+	}
+	return rc;
+}
+
+int
+spdk_iscsi_init_grp_register(struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_init_grp *tmp;
+	int rc = -1;
+
+	assert(ig != NULL);
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	tmp = spdk_iscsi_init_grp_find_by_tag(ig->tag);
+	if (tmp == NULL) {
+		TAILQ_INSERT_TAIL(&g_spdk_iscsi.ig_head, ig, tailq);
+		rc = 0;
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	return rc;
+}
+
+/*
+ * Create initiator group from list of initiator ip/hostnames and netmasks
+ * The initiator hostname/netmask lists are allocated by the caller on the
+ * heap.  Freed later by common initiator_group_destroy() code
+ */
+int
+spdk_iscsi_init_grp_create_from_initiator_list(int tag,
+		int num_initiator_names,
+		char **initiator_names,
+		int num_initiator_masks,
+		char **initiator_masks)
+{
+	int rc = -1;
+	struct spdk_iscsi_init_grp *ig = NULL;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "add initiator group (from initiator list) tag=%d, #initiators=%d, #masks=%d\n",
+		      tag, num_initiator_names, num_initiator_masks);
+
+	ig = spdk_iscsi_init_grp_create(tag);
+	if (!ig) {
+		SPDK_ERRLOG("initiator group create error (%d)\n", tag);
+		return rc;
+	}
+
+	rc = spdk_iscsi_init_grp_add_initiators(ig, num_initiator_names,
+						initiator_names);
+	if (rc < 0) {
+		SPDK_ERRLOG("add initiator name error\n");
+		goto cleanup;
+	}
+
+	rc = spdk_iscsi_init_grp_add_netmasks(ig, num_initiator_masks,
+					      initiator_masks);
+	if (rc < 0) {
+		SPDK_ERRLOG("add initiator netmask error\n");
+		goto cleanup;
+	}
+
+	rc = spdk_iscsi_init_grp_register(ig);
+	if (rc < 0) {
+		SPDK_ERRLOG("initiator group register error (%d)\n", tag);
+		goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	spdk_iscsi_init_grp_destroy(ig);
+	return rc;
+}
+
+int
+spdk_iscsi_init_grp_add_initiators_from_initiator_list(int tag,
+		int num_initiator_names,
+		char **initiator_names,
+		int num_initiator_masks,
+		char **initiator_masks)
+{
+	int rc = -1;
+	struct spdk_iscsi_init_grp *ig;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "add initiator to initiator group: tag=%d, #initiators=%d, #masks=%d\n",
+		      tag, num_initiator_names, num_initiator_masks);
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	ig = spdk_iscsi_init_grp_find_by_tag(tag);
+	if (!ig) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		SPDK_ERRLOG("initiator group (%d) is not found\n", tag);
+		return rc;
+	}
+
+	rc = spdk_iscsi_init_grp_add_initiators(ig, num_initiator_names,
+						initiator_names);
+	if (rc < 0) {
+		SPDK_ERRLOG("add initiator name error\n");
+		goto error;
+	}
+
+	rc = spdk_iscsi_init_grp_add_netmasks(ig, num_initiator_masks,
+					      initiator_masks);
+	if (rc < 0) {
+		SPDK_ERRLOG("add initiator netmask error\n");
+		spdk_iscsi_init_grp_delete_initiators(ig, num_initiator_names,
+						      initiator_names);
+	}
+
+error:
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return rc;
+}
+
+int
+spdk_iscsi_init_grp_delete_initiators_from_initiator_list(int tag,
+		int num_initiator_names,
+		char **initiator_names,
+		int num_initiator_masks,
+		char **initiator_masks)
+{
+	int rc = -1;
+	struct spdk_iscsi_init_grp *ig;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "delete initiator from initiator group: tag=%d, #initiators=%d, #masks=%d\n",
+		      tag, num_initiator_names, num_initiator_masks);
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	ig = spdk_iscsi_init_grp_find_by_tag(tag);
+	if (!ig) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		SPDK_ERRLOG("initiator group (%d) is not found\n", tag);
+		return rc;
+	}
+
+	rc = spdk_iscsi_init_grp_delete_initiators(ig, num_initiator_names,
+			initiator_names);
+	if (rc < 0) {
+		SPDK_ERRLOG("delete initiator name error\n");
+		goto error;
+	}
+
+	rc = spdk_iscsi_init_grp_delete_netmasks(ig, num_initiator_masks,
+			initiator_masks);
+	if (rc < 0) {
+		SPDK_ERRLOG("delete initiator netmask error\n");
+		spdk_iscsi_init_grp_add_initiators(ig, num_initiator_names,
+						   initiator_names);
+		goto error;
+	}
+
+error:
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return rc;
+}
+
+void
+spdk_iscsi_init_grp_destroy(struct spdk_iscsi_init_grp *ig)
+{
+	if (!ig) {
+		return;
+	}
+
+	spdk_iscsi_init_grp_delete_all_initiators(ig);
+	spdk_iscsi_init_grp_delete_all_netmasks(ig);
+	free(ig);
+};
+
+struct spdk_iscsi_init_grp *
+spdk_iscsi_init_grp_find_by_tag(int tag)
+{
+	struct spdk_iscsi_init_grp *ig;
+
+	TAILQ_FOREACH(ig, &g_spdk_iscsi.ig_head, tailq) {
+		if (ig->tag == tag) {
+			return ig;
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_iscsi_parse_init_grps(void)
+{
+	struct spdk_conf_section *sp;
+	int rc;
+
+	sp = spdk_conf_first_section(NULL);
+	while (sp != NULL) {
+		if (spdk_conf_section_match_prefix(sp, "InitiatorGroup")) {
+			if (spdk_conf_section_get_num(sp) == 0) {
+				SPDK_ERRLOG("Group 0 is invalid\n");
+				return -1;
+			}
+			rc = spdk_iscsi_parse_init_grp(sp);
+			if (rc < 0) {
+				SPDK_ERRLOG("parse_init_group() failed\n");
+				return -1;
+			}
+		}
+		sp = spdk_conf_next_section(sp);
+	}
+	return 0;
+}
+
+void
+spdk_iscsi_init_grps_destroy(void)
+{
+	struct spdk_iscsi_init_grp *ig, *tmp;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_init_grp_array_destroy\n");
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH_SAFE(ig, &g_spdk_iscsi.ig_head, tailq, tmp) {
+		TAILQ_REMOVE(&g_spdk_iscsi.ig_head, ig, tailq);
+		spdk_iscsi_init_grp_destroy(ig);
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+}
+
+struct spdk_iscsi_init_grp *
+spdk_iscsi_init_grp_unregister(int tag)
+{
+	struct spdk_iscsi_init_grp *ig;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH(ig, &g_spdk_iscsi.ig_head, tailq) {
+		if (ig->tag == tag) {
+			TAILQ_REMOVE(&g_spdk_iscsi.ig_head, ig, tailq);
+			pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+			return ig;
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return NULL;
+}
+
+static const char *initiator_group_section = \
+		"\n"
+		"# Users must change the InitiatorGroup section(s) to match the IP\n"
+		"#  addresses and initiator configuration in their environment.\n"
+		"# Netmask can be used to specify a single IP address or a range of IP addresses\n"
+		"#  Netmask 192.168.1.20   <== single IP address\n"
+		"#  Netmask 192.168.1.0/24 <== IP range 192.168.1.*\n";
+
+#define INITIATOR_GROUP_TMPL \
+"[InitiatorGroup%d]\n" \
+"  Comment \"Initiator Group%d\"\n"
+
+#define INITIATOR_TMPL \
+"  InitiatorName "
+
+#define NETMASK_TMPL \
+"  Netmask "
+
+void
+spdk_iscsi_init_grps_config_text(FILE *fp)
+{
+	struct spdk_iscsi_init_grp *ig;
+	struct spdk_iscsi_initiator_name *iname;
+	struct spdk_iscsi_initiator_netmask *imask;
+
+	/* Create initiator group section */
+	fprintf(fp, "%s", initiator_group_section);
+
+	/* Dump initiator groups */
+	TAILQ_FOREACH(ig, &g_spdk_iscsi.ig_head, tailq) {
+		if (NULL == ig) { continue; }
+		fprintf(fp, INITIATOR_GROUP_TMPL, ig->tag, ig->tag);
+
+		/* Dump initiators */
+		fprintf(fp, INITIATOR_TMPL);
+		TAILQ_FOREACH(iname, &ig->initiator_head, tailq) {
+			fprintf(fp, "%s ", iname->name);
+		}
+		fprintf(fp, "\n");
+
+		/* Dump netmasks */
+		fprintf(fp, NETMASK_TMPL);
+		TAILQ_FOREACH(imask, &ig->netmask_head, tailq) {
+			fprintf(fp, "%s ", imask->mask);
+		}
+		fprintf(fp, "\n");
+	}
+}
+
+static void
+spdk_iscsi_init_grp_info_json(struct spdk_iscsi_init_grp *ig,
+			      struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_initiator_name *iname;
+	struct spdk_iscsi_initiator_netmask *imask;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_int32(w, "tag", ig->tag);
+
+	spdk_json_write_named_array_begin(w, "initiators");
+	TAILQ_FOREACH(iname, &ig->initiator_head, tailq) {
+		spdk_json_write_string(w, iname->name);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_named_array_begin(w, "netmasks");
+	TAILQ_FOREACH(imask, &ig->netmask_head, tailq) {
+		spdk_json_write_string(w, imask->mask);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_iscsi_init_grp_config_json(struct spdk_iscsi_init_grp *ig,
+				struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "add_initiator_group");
+
+	spdk_json_write_name(w, "params");
+	spdk_iscsi_init_grp_info_json(ig, w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_iscsi_init_grps_info_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_init_grp *ig;
+
+	TAILQ_FOREACH(ig, &g_spdk_iscsi.ig_head, tailq) {
+		spdk_iscsi_init_grp_info_json(ig, w);
+	}
+}
+
+void
+spdk_iscsi_init_grps_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_init_grp *ig;
+
+	TAILQ_FOREACH(ig, &g_spdk_iscsi.ig_head, tailq) {
+		spdk_iscsi_init_grp_config_json(ig, w);
+	}
+}
diff --git a/src/spdk/lib/iscsi/init_grp.h b/src/spdk/lib/iscsi/init_grp.h
new file mode 100644
index 00000000..ff24ee5b
--- /dev/null
+++ b/src/spdk/lib/iscsi/init_grp.h
@@ -0,0 +1,79 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INIT_GRP_H
+#define SPDK_INIT_GRP_H
+
+#include "spdk/conf.h"
+
+struct spdk_iscsi_initiator_name {
+	char *name;
+	TAILQ_ENTRY(spdk_iscsi_initiator_name) tailq;
+};
+
+struct spdk_iscsi_initiator_netmask {
+	char *mask;
+	TAILQ_ENTRY(spdk_iscsi_initiator_netmask) tailq;
+};
+
+struct spdk_iscsi_init_grp {
+	int ninitiators;
+	TAILQ_HEAD(, spdk_iscsi_initiator_name) initiator_head;
+	int nnetmasks;
+	TAILQ_HEAD(, spdk_iscsi_initiator_netmask) netmask_head;
+	int ref;
+	int tag;
+	TAILQ_ENTRY(spdk_iscsi_init_grp)	tailq;
+};
+
+/* SPDK iSCSI Initiator Group management API */
+int spdk_iscsi_init_grp_create_from_initiator_list(int tag,
+		int num_initiator_names, char **initiator_names,
+		int num_initiator_masks, char **initiator_masks);
+int spdk_iscsi_init_grp_add_initiators_from_initiator_list(int tag,
+		int num_initiator_names, char **initiator_names,
+		int num_initiator_masks, char **initiator_masks);
+int spdk_iscsi_init_grp_delete_initiators_from_initiator_list(int tag,
+		int num_initiator_names, char **initiator_names,
+		int num_initiator_masks, char **initiator_masks);
+int spdk_iscsi_init_grp_register(struct spdk_iscsi_init_grp *ig);
+struct spdk_iscsi_init_grp *spdk_iscsi_init_grp_unregister(int tag);
+struct spdk_iscsi_init_grp *spdk_iscsi_init_grp_find_by_tag(int tag);
+void spdk_iscsi_init_grp_destroy(struct spdk_iscsi_init_grp *ig);
+int spdk_iscsi_parse_init_grps(void);
+void spdk_iscsi_init_grps_destroy(void);
+void spdk_iscsi_init_grps_config_text(FILE *fp);
+void spdk_iscsi_init_grps_info_json(struct spdk_json_write_ctx *w);
+void spdk_iscsi_init_grps_config_json(struct spdk_json_write_ctx *w);
+#endif // SPDK_INIT_GRP_H
diff --git a/src/spdk/lib/iscsi/iscsi.c b/src/spdk/lib/iscsi/iscsi.c
new file mode 100644
index 00000000..7d96c9cb
--- /dev/null
+++ b/src/spdk/lib/iscsi/iscsi.c
@@ -0,0 +1,4583 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/crc32.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/trace.h"
+#include "spdk/string.h"
+#include "spdk/queue.h"
+#include "spdk/net.h"
+
+#include "iscsi/md5.h"
+#include "iscsi/iscsi.h"
+#include "iscsi/param.h"
+#include "iscsi/tgt_node.h"
+#include "iscsi/task.h"
+#include "iscsi/conn.h"
+#include "spdk/scsi.h"
+#include "spdk/bdev.h"
+#include "iscsi/portal_grp.h"
+#include "iscsi/acceptor.h"
+
+#include "spdk_internal/log.h"
+
+#define MAX_TMPBUF 1024
+
+#define SPDK_CRC32C_INITIAL    0xffffffffUL
+#define SPDK_CRC32C_XOR        0xffffffffUL
+
+#ifdef __FreeBSD__
+#define HAVE_SRANDOMDEV 1
+#define HAVE_ARC4RANDOM 1
+#endif
+
+struct spdk_iscsi_globals g_spdk_iscsi = {
+	.mutex = PTHREAD_MUTEX_INITIALIZER,
+	.portal_head = TAILQ_HEAD_INITIALIZER(g_spdk_iscsi.portal_head),
+	.pg_head = TAILQ_HEAD_INITIALIZER(g_spdk_iscsi.pg_head),
+	.ig_head = TAILQ_HEAD_INITIALIZER(g_spdk_iscsi.ig_head),
+	.target_head = TAILQ_HEAD_INITIALIZER(g_spdk_iscsi.target_head),
+	.auth_group_head = TAILQ_HEAD_INITIALIZER(g_spdk_iscsi.auth_group_head),
+};
+
+/* random value generation */
+static void spdk_gen_random(uint8_t *buf, size_t len);
+#ifndef HAVE_SRANDOMDEV
+static void srandomdev(void);
+#endif /* HAVE_SRANDOMDEV */
+#ifndef HAVE_ARC4RANDOM
+//static uint32_t arc4random(void);
+#endif /* HAVE_ARC4RANDOM */
+
+/* convert from/to bin/hex */
+static int spdk_bin2hex(char *buf, size_t len, const uint8_t *data, size_t data_len);
+static int spdk_hex2bin(uint8_t *data, size_t data_len, const char *str);
+
+static int spdk_add_transfer_task(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_task *task);
+
+static int spdk_iscsi_send_r2t(struct spdk_iscsi_conn *conn,
+			       struct spdk_iscsi_task *task, int offset,
+			       int len, uint32_t transfer_tag, uint32_t *R2TSN);
+static int spdk_iscsi_send_r2t_recovery(struct spdk_iscsi_conn *conn,
+					struct spdk_iscsi_task *r2t_task, uint32_t r2t_sn,
+					bool send_new_r2tsn);
+
+static int spdk_create_iscsi_sess(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_tgt_node *target, enum session_type session_type);
+static int spdk_append_iscsi_sess(struct spdk_iscsi_conn *conn,
+				  const char *initiator_port_name, uint16_t tsih, uint16_t cid);
+
+static void spdk_remove_acked_pdu(struct spdk_iscsi_conn *conn, uint32_t ExpStatSN);
+
+static int spdk_iscsi_reject(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu,
+			     int reason);
+
+#define DMIN32(A,B) ((uint32_t) ((uint32_t)(A) > (uint32_t)(B) ? (uint32_t)(B) : (uint32_t)(A)))
+#define DMIN64(A,B) ((uint64_t) ((A) > (B) ? (B) : (A)))
+
+#define MATCH_DIGEST_WORD(BUF, CRC32C) \
+	(    ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0)		\
+	    | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8)		\
+	    | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16)	\
+	    | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24))	\
+	    == (CRC32C))
+
+#define MAKE_DIGEST_WORD(BUF, CRC32C) \
+	(   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
+	    ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
+	    ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
+	    ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
+
+#if 0
+static int
+spdk_match_digest_word(const uint8_t *buf, uint32_t crc32c)
+{
+	uint32_t l;
+
+	l = (buf[0] & 0xffU) << 0;
+	l |= (buf[1] & 0xffU) << 8;
+	l |= (buf[2] & 0xffU) << 16;
+	l |= (buf[3] & 0xffU) << 24;
+	return (l == crc32c);
+}
+
+static uint8_t *
+spdk_make_digest_word(uint8_t *buf, size_t len, uint32_t crc32c)
+{
+	if (len < ISCSI_DIGEST_LEN) {
+		return NULL;
+	}
+
+	buf[0] = (crc32c >> 0) & 0xffU;
+	buf[1] = (crc32c >> 8) & 0xffU;
+	buf[2] = (crc32c >> 16) & 0xffU;
+	buf[3] = (crc32c >> 24) & 0xffU;
+	return buf;
+}
+#endif
+
+#ifndef HAVE_SRANDOMDEV
+static void
+srandomdev(void)
+{
+	unsigned long seed;
+	time_t now;
+	pid_t pid;
+
+	pid = getpid();
+	now = time(NULL);
+	seed = pid ^ now;
+	srandom(seed);
+}
+#endif /* HAVE_SRANDOMDEV */
+
+#ifndef HAVE_ARC4RANDOM
+static int spdk_arc4random_initialized = 0;
+
+static uint32_t
+arc4random(void)
+{
+	uint32_t r;
+	uint32_t r1, r2;
+
+	if (!spdk_arc4random_initialized) {
+		srandomdev();
+		spdk_arc4random_initialized = 1;
+	}
+	r1 = (uint32_t)(random() & 0xffff);
+	r2 = (uint32_t)(random() & 0xffff);
+	r = (r1 << 16) | r2;
+	return r;
+}
+#endif /* HAVE_ARC4RANDOM */
+
+static void
+spdk_gen_random(uint8_t *buf, size_t len)
+{
+#ifdef USE_RANDOM
+	long l;
+	size_t idx;
+
+	srandomdev();
+	for (idx = 0; idx < len; idx++) {
+		l = random();
+		buf[idx] = (uint8_t) l;
+	}
+#else
+	uint32_t r;
+	size_t idx;
+
+	for (idx = 0; idx < len; idx++) {
+		r = arc4random();
+		buf[idx] = (uint8_t) r;
+	}
+#endif /* USE_RANDOM */
+}
+
+static uint64_t
+spdk_iscsi_get_isid(const uint8_t isid[6])
+{
+	return (uint64_t)isid[0] << 40 |
+	       (uint64_t)isid[1] << 32 |
+	       (uint64_t)isid[2] << 24 |
+	       (uint64_t)isid[3] << 16 |
+	       (uint64_t)isid[4] << 8 |
+	       (uint64_t)isid[5];
+}
+
+static int
+spdk_bin2hex(char *buf, size_t len, const uint8_t *data, size_t data_len)
+{
+	const char *digits = "0123456789ABCDEF";
+	size_t total = 0;
+	size_t idx;
+
+	if (len < 3) {
+		return -1;
+	}
+	buf[total] = '0';
+	total++;
+	buf[total] = 'x';
+	total++;
+	buf[total] = '\0';
+
+	for (idx = 0; idx < data_len; idx++) {
+		if (total + 3 > len) {
+			buf[total] = '\0';
+			return - 1;
+		}
+		buf[total] = digits[(data[idx] >> 4) & 0x0fU];
+		total++;
+		buf[total] = digits[data[idx] & 0x0fU];
+		total++;
+	}
+	buf[total] = '\0';
+	return total;
+}
+
+static int
+spdk_hex2bin(uint8_t *data, size_t data_len, const char *str)
+{
+	const char *digits = "0123456789ABCDEF";
+	const char *dp;
+	const char *p;
+	size_t total = 0;
+	int n0, n1;
+
+	p = str;
+	if (p[0] != '0' && (p[1] != 'x' && p[1] != 'X')) {
+		return -1;
+	}
+	p += 2;
+
+	while (p[0] != '\0' && p[1] != '\0') {
+		if (total >= data_len) {
+			return -1;
+		}
+		dp = strchr(digits, toupper((int) p[0]));
+		if (dp == NULL) {
+			return -1;
+		}
+		n0 = (int)(dp - digits);
+		dp = strchr(digits, toupper((int) p[1]));
+		if (dp == NULL) {
+			return -1;
+		}
+		n1 = (int)(dp - digits);
+
+		data[total] = (uint8_t)(((n0 & 0x0fU) << 4) | (n1 & 0x0fU));
+		total++;
+		p += 2;
+	}
+	return total;
+}
+
+static int
+spdk_islun2lun(uint64_t islun)
+{
+	uint64_t fmt_lun;
+	uint64_t method;
+	int lun_i;
+
+	fmt_lun = islun;
+	method = (fmt_lun >> 62) & 0x03U;
+	fmt_lun = fmt_lun >> 48;
+	if (method == 0x00U) {
+		lun_i = (int)(fmt_lun & 0x00ffU);
+	} else if (method == 0x01U) {
+		lun_i = (int)(fmt_lun & 0x3fffU);
+	} else {
+		lun_i = 0xffffU;
+	}
+	return lun_i;
+}
+
+static uint32_t
+spdk_iscsi_pdu_calc_header_digest(struct spdk_iscsi_pdu *pdu)
+{
+	uint32_t crc32c;
+	uint32_t ahs_len_bytes = pdu->bhs.total_ahs_len * 4;
+
+	crc32c = SPDK_CRC32C_INITIAL;
+	crc32c = spdk_crc32c_update(&pdu->bhs, ISCSI_BHS_LEN, crc32c);
+
+	if (ahs_len_bytes) {
+		crc32c = spdk_crc32c_update(pdu->ahs, ahs_len_bytes, crc32c);
+	}
+
+	/* BHS and AHS are always 4-byte multiples in length, so no padding is necessary. */
+	crc32c = crc32c ^ SPDK_CRC32C_XOR;
+	return crc32c;
+}
+
+static uint32_t
+spdk_iscsi_pdu_calc_data_digest(struct spdk_iscsi_pdu *pdu)
+{
+	uint32_t data_len = DGET24(pdu->bhs.data_segment_len);
+	uint32_t crc32c;
+	uint32_t mod;
+
+	crc32c = SPDK_CRC32C_INITIAL;
+	crc32c = spdk_crc32c_update(pdu->data, data_len, crc32c);
+
+	mod = data_len % ISCSI_ALIGNMENT;
+	if (mod != 0) {
+		uint32_t pad_length = ISCSI_ALIGNMENT - mod;
+		uint8_t pad[3] = {0, 0, 0};
+
+		assert(pad_length > 0);
+		assert(pad_length <= sizeof(pad));
+		crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
+	}
+
+	crc32c = crc32c ^ SPDK_CRC32C_XOR;
+	return crc32c;
+}
+
+int
+spdk_iscsi_read_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu **_pdu)
+{
+	struct spdk_iscsi_pdu *pdu;
+	struct spdk_mempool *pool;
+	uint32_t crc32c;
+	int ahs_len;
+	int data_len;
+	int max_segment_len;
+	int rc;
+
+	if (conn->pdu_in_progress == NULL) {
+		conn->pdu_in_progress = spdk_get_pdu();
+		if (conn->pdu_in_progress == NULL) {
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	pdu = conn->pdu_in_progress;
+
+	if (pdu->bhs_valid_bytes < ISCSI_BHS_LEN) {
+		rc = spdk_iscsi_conn_read_data(conn,
+					       ISCSI_BHS_LEN - pdu->bhs_valid_bytes,
+					       (uint8_t *)&pdu->bhs + pdu->bhs_valid_bytes);
+		if (rc < 0) {
+			*_pdu = NULL;
+			spdk_put_pdu(pdu);
+			conn->pdu_in_progress = NULL;
+			return rc;
+		}
+		pdu->bhs_valid_bytes += rc;
+		if (pdu->bhs_valid_bytes < ISCSI_BHS_LEN) {
+			*_pdu = NULL;
+			return SPDK_SUCCESS;
+		}
+	}
+
+	data_len = ISCSI_ALIGN(DGET24(pdu->bhs.data_segment_len));
+
+	/* AHS */
+	ahs_len = pdu->bhs.total_ahs_len * 4;
+	assert(ahs_len <= ISCSI_AHS_LEN);
+	if (pdu->ahs_valid_bytes < ahs_len) {
+		rc = spdk_iscsi_conn_read_data(conn,
+					       ahs_len - pdu->ahs_valid_bytes,
+					       pdu->ahs + pdu->ahs_valid_bytes);
+		if (rc < 0) {
+			*_pdu = NULL;
+			spdk_put_pdu(pdu);
+			conn->pdu_in_progress = NULL;
+			return rc;
+		}
+
+		pdu->ahs_valid_bytes += rc;
+		if (pdu->ahs_valid_bytes < ahs_len) {
+			*_pdu = NULL;
+			return SPDK_SUCCESS;
+		}
+	}
+
+	/* Header Digest */
+	if (conn->header_digest &&
+	    pdu->hdigest_valid_bytes < ISCSI_DIGEST_LEN) {
+		rc = spdk_iscsi_conn_read_data(conn,
+					       ISCSI_DIGEST_LEN - pdu->hdigest_valid_bytes,
+					       pdu->header_digest + pdu->hdigest_valid_bytes);
+		if (rc < 0) {
+			*_pdu = NULL;
+			spdk_put_pdu(pdu);
+			conn->pdu_in_progress = NULL;
+			return rc;
+		}
+
+		pdu->hdigest_valid_bytes += rc;
+		if (pdu->hdigest_valid_bytes < ISCSI_DIGEST_LEN) {
+			*_pdu = NULL;
+			return SPDK_SUCCESS;
+		}
+	}
+
+	/* copy the actual data into local buffer */
+	if (pdu->data_valid_bytes < data_len) {
+		if (pdu->data_buf == NULL) {
+			if (data_len <= spdk_get_immediate_data_buffer_size()) {
+				pool = g_spdk_iscsi.pdu_immediate_data_pool;
+			} else if (data_len <= spdk_get_data_out_buffer_size()) {
+				pool = g_spdk_iscsi.pdu_data_out_pool;
+			} else {
+				SPDK_ERRLOG("Data(%d) > MaxSegment(%d)\n",
+					    data_len, spdk_get_data_out_buffer_size());
+				*_pdu = NULL;
+				spdk_put_pdu(pdu);
+				conn->pdu_in_progress = NULL;
+				return SPDK_ISCSI_CONNECTION_FATAL;
+			}
+			pdu->mobj = spdk_mempool_get(pool);
+			if (pdu->mobj == NULL) {
+				*_pdu = NULL;
+				return SPDK_SUCCESS;
+			}
+			pdu->data_buf = pdu->mobj->buf;
+		}
+
+		rc = spdk_iscsi_conn_read_data(conn,
+					       data_len - pdu->data_valid_bytes,
+					       pdu->data_buf + pdu->data_valid_bytes);
+		if (rc < 0) {
+			*_pdu = NULL;
+			spdk_put_pdu(pdu);
+			conn->pdu_in_progress = NULL;
+			return rc;
+		}
+
+		pdu->data_valid_bytes += rc;
+		if (pdu->data_valid_bytes < data_len) {
+			*_pdu = NULL;
+			return SPDK_SUCCESS;
+		}
+	}
+
+	/* copy out the data digest */
+	if (conn->data_digest && data_len != 0 &&
+	    pdu->ddigest_valid_bytes < ISCSI_DIGEST_LEN) {
+		rc = spdk_iscsi_conn_read_data(conn,
+					       ISCSI_DIGEST_LEN - pdu->ddigest_valid_bytes,
+					       pdu->data_digest + pdu->ddigest_valid_bytes);
+		if (rc < 0) {
+			*_pdu = NULL;
+			spdk_put_pdu(pdu);
+			conn->pdu_in_progress = NULL;
+			return rc;
+		}
+
+		pdu->ddigest_valid_bytes += rc;
+		if (pdu->ddigest_valid_bytes < ISCSI_DIGEST_LEN) {
+			*_pdu = NULL;
+			return SPDK_SUCCESS;
+		}
+	}
+
+	/* All data for this PDU has now been read from the socket. */
+	conn->pdu_in_progress = NULL;
+
+	spdk_trace_record(TRACE_ISCSI_READ_PDU, conn->id, pdu->data_valid_bytes,
+			  (uintptr_t)pdu, pdu->bhs.opcode);
+
+	/* Data Segment */
+	if (data_len != 0) {
+		/*
+		 * Determine the maximum segment length expected for this PDU.
+		 *  This will be used to make sure the initiator did not send
+		 *  us too much immediate data.
+		 *
+		 * This value is specified separately by the initiator and target,
+		 *  and not negotiated.  So we can use the #define safely here,
+		 *  since the value is not dependent on the initiator's maximum
+		 *  segment lengths (FirstBurstLength/MaxRecvDataSegmentLength),
+		 *  and SPDK currently does not allow configuration of these values
+		 *  at runtime.
+		 */
+		if (conn->sess == NULL) {
+			/*
+			 * If the connection does not yet have a session, then
+			 *  login is not complete and we use the 8KB default
+			 *  FirstBurstLength as our maximum data segment length
+			 *  value.
+			 */
+			max_segment_len = DEFAULT_FIRSTBURSTLENGTH;
+		} else if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAOUT) {
+			max_segment_len = spdk_get_data_out_buffer_size();
+		} else if (pdu->bhs.opcode == ISCSI_OP_NOPOUT) {
+			max_segment_len = SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH;
+		} else {
+			max_segment_len = spdk_get_immediate_data_buffer_size();
+		}
+		if (data_len > max_segment_len) {
+			SPDK_ERRLOG("Data(%d) > MaxSegment(%d)\n", data_len, max_segment_len);
+			rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+			spdk_put_pdu(pdu);
+
+			/*
+			 * If spdk_iscsi_reject() was not able to reject the PDU,
+			 * treat it as a fatal connection error.  Otherwise,
+			 * return SUCCESS here so that the caller will continue
+			 * to attempt to read PDUs.
+			 */
+			rc = (rc < 0) ? SPDK_ISCSI_CONNECTION_FATAL : SPDK_SUCCESS;
+			return rc;
+		}
+
+		pdu->data = pdu->data_buf;
+		pdu->data_from_mempool = true;
+		pdu->data_segment_len = data_len;
+	}
+
+	/* check digest */
+	if (conn->header_digest) {
+		crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
+		rc = MATCH_DIGEST_WORD(pdu->header_digest, crc32c);
+		if (rc == 0) {
+			SPDK_ERRLOG("header digest error (%s)\n", conn->initiator_name);
+			spdk_put_pdu(pdu);
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+	if (conn->data_digest && data_len != 0) {
+		crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
+		rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
+		if (rc == 0) {
+			SPDK_ERRLOG("data digest error (%s)\n", conn->initiator_name);
+			spdk_put_pdu(pdu);
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	*_pdu = pdu;
+	return 1;
+}
+
+int
+spdk_iscsi_build_iovecs(struct spdk_iscsi_conn *conn, struct iovec *iovec,
+			struct spdk_iscsi_pdu *pdu)
+{
+	int iovec_cnt = 0;
+	uint32_t crc32c;
+	int enable_digest;
+	int total_ahs_len;
+	int data_len;
+
+	total_ahs_len = pdu->bhs.total_ahs_len;
+	data_len = DGET24(pdu->bhs.data_segment_len);
+
+	enable_digest = 1;
+	if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) {
+		/* this PDU should be sent without digest */
+		enable_digest = 0;
+	}
+
+	/* BHS */
+	iovec[iovec_cnt].iov_base = &pdu->bhs;
+	iovec[iovec_cnt].iov_len = ISCSI_BHS_LEN;
+	iovec_cnt++;
+
+	/* AHS */
+	if (total_ahs_len > 0) {
+		iovec[iovec_cnt].iov_base = pdu->ahs;
+		iovec[iovec_cnt].iov_len = 4 * total_ahs_len;
+		iovec_cnt++;
+	}
+
+	/* Header Digest */
+	if (enable_digest && conn->header_digest) {
+		crc32c = spdk_iscsi_pdu_calc_header_digest(pdu);
+		MAKE_DIGEST_WORD(pdu->header_digest, crc32c);
+
+		iovec[iovec_cnt].iov_base = pdu->header_digest;
+		iovec[iovec_cnt].iov_len = ISCSI_DIGEST_LEN;
+		iovec_cnt++;
+	}
+
+	/* Data Segment */
+	if (data_len > 0) {
+		iovec[iovec_cnt].iov_base = pdu->data;
+		iovec[iovec_cnt].iov_len = ISCSI_ALIGN(data_len);
+		iovec_cnt++;
+	}
+
+	/* Data Digest */
+	if (enable_digest && conn->data_digest && data_len != 0) {
+		crc32c = spdk_iscsi_pdu_calc_data_digest(pdu);
+		MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
+
+		iovec[iovec_cnt].iov_base = pdu->data_digest;
+		iovec[iovec_cnt].iov_len = ISCSI_DIGEST_LEN;
+		iovec_cnt++;
+	}
+
+	return iovec_cnt;
+}
+
+static int
+spdk_iscsi_append_text(struct spdk_iscsi_conn *conn __attribute__((__unused__)),
+		       const char *key, const char *val, uint8_t *data,
+		       int alloc_len, int data_len)
+{
+	int total;
+	int len;
+
+	total = data_len;
+	if (alloc_len < 1) {
+		return 0;
+	}
+	if (total > alloc_len) {
+		total = alloc_len;
+		data[total - 1] = '\0';
+		return total;
+	}
+
+	if (alloc_len - total < 1) {
+		SPDK_ERRLOG("data space small %d\n", alloc_len);
+		return total;
+	}
+	len = snprintf((char *) data + total, alloc_len - total, "%s=%s", key, val);
+	total += len + 1;
+
+	return total;
+}
+
+static int
+spdk_iscsi_append_param(struct spdk_iscsi_conn *conn, const char *key,
+			uint8_t *data, int alloc_len, int data_len)
+{
+	struct iscsi_param *param;
+	int rc;
+
+	param = spdk_iscsi_param_find(conn->params, key);
+	if (param == NULL) {
+		param = spdk_iscsi_param_find(conn->sess->params, key);
+		if (param == NULL) {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "no key %.64s\n", key);
+			return data_len;
+		}
+	}
+	rc = spdk_iscsi_append_text(conn, param->key, param->val, data,
+				    alloc_len, data_len);
+	return rc;
+}
+
+static int
+spdk_iscsi_get_authinfo(struct spdk_iscsi_conn *conn, const char *authuser)
+{
+	int ag_tag;
+	int rc;
+
+	if (conn->sess->target != NULL) {
+		ag_tag = conn->sess->target->chap_group;
+	} else {
+		ag_tag = -1;
+	}
+	if (ag_tag < 0) {
+		ag_tag = g_spdk_iscsi.chap_group;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "ag_tag=%d\n", ag_tag);
+
+	rc = spdk_iscsi_chap_get_authinfo(&conn->auth, authuser, ag_tag);
+	if (rc < 0) {
+		SPDK_ERRLOG("chap_get_authinfo() failed\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+spdk_iscsi_auth_params(struct spdk_iscsi_conn *conn,
+		       struct iscsi_param *params, const char *method, uint8_t *data,
+		       int alloc_len, int data_len)
+{
+	char *in_val;
+	char *in_next;
+	char *new_val;
+	const char *val;
+	const char *user;
+	const char *response;
+	const char *challenge;
+	int total;
+	int rc;
+
+	if (conn == NULL || params == NULL || method == NULL) {
+		return -1;
+	}
+	if (strcasecmp(method, "CHAP") == 0) {
+		/* method OK */
+	} else {
+		SPDK_ERRLOG("unsupported AuthMethod %.64s\n", method);
+		return -1;
+	}
+
+	total = data_len;
+	if (alloc_len < 1) {
+		return 0;
+	}
+	if (total > alloc_len) {
+		total = alloc_len;
+		data[total - 1] = '\0';
+		return total;
+	}
+
+	/* for temporary store */
+	in_val = malloc(ISCSI_TEXT_MAX_VAL_LEN + 1);
+	if (!in_val) {
+		SPDK_ERRLOG("malloc() failed for temporary store\n");
+		return -ENOMEM;
+	}
+
+	/* CHAP method (RFC1994) */
+	if ((val = spdk_iscsi_param_get_val(params, "CHAP_A")) != NULL) {
+		if (conn->auth.chap_phase != ISCSI_CHAP_PHASE_WAIT_A) {
+			SPDK_ERRLOG("CHAP sequence error\n");
+			goto error_return;
+		}
+
+		/* CHAP_A is LIST type */
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", val);
+		in_next = in_val;
+		while ((new_val = spdk_strsepq(&in_next, ",")) != NULL) {
+			if (strcasecmp(new_val, "5") == 0) {
+				/* CHAP with MD5 */
+				break;
+			}
+		}
+		if (new_val == NULL) {
+			snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", "Reject");
+			new_val = in_val;
+			spdk_iscsi_append_text(conn, "CHAP_A", new_val,
+					       data, alloc_len, total);
+			goto error_return;
+		}
+		/* selected algorithm is 5 (MD5) */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "got CHAP_A=%s\n", new_val);
+		total = spdk_iscsi_append_text(conn, "CHAP_A", new_val,
+					       data, alloc_len, total);
+
+		/* Identifier is one octet */
+		spdk_gen_random(conn->auth.chap_id, 1);
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN, "%d",
+			 (int) conn->auth.chap_id[0]);
+		total = spdk_iscsi_append_text(conn, "CHAP_I", in_val,
+					       data, alloc_len, total);
+
+		/* Challenge Value is a variable stream of octets */
+		/* (binary length MUST not exceed 1024 bytes) */
+		conn->auth.chap_challenge_len = ISCSI_CHAP_CHALLENGE_LEN;
+		spdk_gen_random(conn->auth.chap_challenge,
+				conn->auth.chap_challenge_len);
+		spdk_bin2hex(in_val, ISCSI_TEXT_MAX_VAL_LEN,
+			     conn->auth.chap_challenge,
+			     conn->auth.chap_challenge_len);
+		total = spdk_iscsi_append_text(conn, "CHAP_C", in_val,
+					       data, alloc_len, total);
+
+		conn->auth.chap_phase = ISCSI_CHAP_PHASE_WAIT_NR;
+	} else if ((val = spdk_iscsi_param_get_val(params, "CHAP_N")) != NULL) {
+		uint8_t resmd5[SPDK_MD5DIGEST_LEN];
+		uint8_t tgtmd5[SPDK_MD5DIGEST_LEN];
+		struct spdk_md5ctx md5ctx;
+
+		user = val;
+		if (conn->auth.chap_phase != ISCSI_CHAP_PHASE_WAIT_NR) {
+			SPDK_ERRLOG("CHAP sequence error\n");
+			goto error_return;
+		}
+
+		response = spdk_iscsi_param_get_val(params, "CHAP_R");
+		if (response == NULL) {
+			SPDK_ERRLOG("no response\n");
+			goto error_return;
+		}
+		rc = spdk_hex2bin(resmd5, SPDK_MD5DIGEST_LEN, response);
+		if (rc < 0 || rc != SPDK_MD5DIGEST_LEN) {
+			SPDK_ERRLOG("response format error\n");
+			goto error_return;
+		}
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "got CHAP_N/CHAP_R\n");
+
+		rc = spdk_iscsi_get_authinfo(conn, val);
+		if (rc < 0) {
+			//SPDK_ERRLOG("auth user or secret is missing\n");
+			SPDK_ERRLOG("iscsi_get_authinfo() failed\n");
+			goto error_return;
+		}
+		if (conn->auth.user[0] == '\0' || conn->auth.secret[0] == '\0') {
+			//SPDK_ERRLOG("auth user or secret is missing\n");
+			SPDK_ERRLOG("auth failed (user %.64s)\n", user);
+			goto error_return;
+		}
+
+		spdk_md5init(&md5ctx);
+		/* Identifier */
+		spdk_md5update(&md5ctx, conn->auth.chap_id, 1);
+		/* followed by secret */
+		spdk_md5update(&md5ctx, conn->auth.secret,
+			       strlen(conn->auth.secret));
+		/* followed by Challenge Value */
+		spdk_md5update(&md5ctx, conn->auth.chap_challenge,
+			       conn->auth.chap_challenge_len);
+		/* tgtmd5 is expecting Response Value */
+		spdk_md5final(tgtmd5, &md5ctx);
+
+		spdk_bin2hex(in_val, ISCSI_TEXT_MAX_VAL_LEN,
+			     tgtmd5, SPDK_MD5DIGEST_LEN);
+
+#if 0
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "tgtmd5=%s, resmd5=%s\n", in_val, response);
+		spdk_dump("tgtmd5", tgtmd5, SPDK_MD5DIGEST_LEN);
+		spdk_dump("resmd5", resmd5, SPDK_MD5DIGEST_LEN);
+#endif
+
+		/* compare MD5 digest */
+		if (memcmp(tgtmd5, resmd5, SPDK_MD5DIGEST_LEN) != 0) {
+			/* not match */
+			//SPDK_ERRLOG("auth user or secret is missing\n");
+			SPDK_ERRLOG("auth failed (user %.64s)\n", user);
+			goto error_return;
+		}
+		/* OK initiator's secret */
+		conn->authenticated = 1;
+
+		/* mutual CHAP? */
+		val = spdk_iscsi_param_get_val(params, "CHAP_I");
+		if (val != NULL) {
+			conn->auth.chap_mid[0] = (uint8_t) strtol(val, NULL, 10);
+			challenge = spdk_iscsi_param_get_val(params, "CHAP_C");
+			if (challenge == NULL) {
+				SPDK_ERRLOG("CHAP sequence error\n");
+				goto error_return;
+			}
+			rc = spdk_hex2bin(conn->auth.chap_mchallenge,
+					  ISCSI_CHAP_CHALLENGE_LEN,
+					  challenge);
+			if (rc < 0) {
+				SPDK_ERRLOG("challenge format error\n");
+				goto error_return;
+			}
+			conn->auth.chap_mchallenge_len = rc;
+#if 0
+			spdk_dump("MChallenge", conn->auth.chap_mchallenge,
+				  conn->auth.chap_mchallenge_len);
+#endif
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "got CHAP_I/CHAP_C\n");
+
+			if (conn->auth.muser[0] == '\0' || conn->auth.msecret[0] == '\0') {
+				//SPDK_ERRLOG("mutual auth user or secret is missing\n");
+				SPDK_ERRLOG("auth failed (user %.64s)\n", user);
+				goto error_return;
+			}
+
+			spdk_md5init(&md5ctx);
+			/* Identifier */
+			spdk_md5update(&md5ctx, conn->auth.chap_mid, 1);
+			/* followed by secret */
+			spdk_md5update(&md5ctx, conn->auth.msecret,
+				       strlen(conn->auth.msecret));
+			/* followed by Challenge Value */
+			spdk_md5update(&md5ctx, conn->auth.chap_mchallenge,
+				       conn->auth.chap_mchallenge_len);
+			/* tgtmd5 is Response Value */
+			spdk_md5final(tgtmd5, &md5ctx);
+
+			spdk_bin2hex(in_val, ISCSI_TEXT_MAX_VAL_LEN,
+				     tgtmd5, SPDK_MD5DIGEST_LEN);
+
+			total = spdk_iscsi_append_text(conn, "CHAP_N",
+						       conn->auth.muser, data, alloc_len, total);
+			total = spdk_iscsi_append_text(conn, "CHAP_R",
+						       in_val, data, alloc_len, total);
+		} else {
+			/* not mutual */
+			if (conn->req_mutual) {
+				SPDK_ERRLOG("required mutual CHAP\n");
+				goto error_return;
+			}
+		}
+
+		conn->auth.chap_phase = ISCSI_CHAP_PHASE_END;
+	} else {
+		/* not found CHAP keys */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "start CHAP\n");
+		conn->auth.chap_phase = ISCSI_CHAP_PHASE_WAIT_A;
+	}
+
+	free(in_val);
+	return total;
+
+error_return:
+	conn->auth.chap_phase = ISCSI_CHAP_PHASE_WAIT_A;
+	free(in_val);
+	return -1;
+}
+
+static int
+spdk_iscsi_reject(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu,
+		  int reason)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_reject *rsph;
+	uint8_t *data;
+	int total_ahs_len;
+	int data_len;
+	int alloc_len;
+
+	total_ahs_len = pdu->bhs.total_ahs_len;
+	data_len = 0;
+	alloc_len = ISCSI_BHS_LEN + (4 * total_ahs_len);
+
+	if (conn->header_digest) {
+		alloc_len += ISCSI_DIGEST_LEN;
+	}
+
+	data = calloc(1, alloc_len);
+	if (!data) {
+		SPDK_ERRLOG("calloc() failed for data segment\n");
+		return -ENOMEM;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Reject PDU reason=%d\n", reason);
+
+	if (conn->sess != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+			      conn->StatSN, conn->sess->ExpCmdSN,
+			      conn->sess->MaxCmdSN);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN=%u\n", conn->StatSN);
+	}
+
+	memcpy(data, &pdu->bhs, ISCSI_BHS_LEN);
+	data_len += ISCSI_BHS_LEN;
+
+	if (total_ahs_len != 0) {
+		memcpy(data + data_len, pdu->ahs, (4 * total_ahs_len));
+		data_len += (4 * total_ahs_len);
+	}
+
+	if (conn->header_digest) {
+		memcpy(data + data_len, pdu->header_digest, ISCSI_DIGEST_LEN);
+		data_len += ISCSI_DIGEST_LEN;
+	}
+
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		free(data);
+		return -ENOMEM;
+	}
+
+	rsph = (struct iscsi_bhs_reject *)&rsp_pdu->bhs;
+	rsp_pdu->data = data;
+	rsph->opcode = ISCSI_OP_REJECT;
+	rsph->flags |= 0x80;	/* bit 0 is default to 1 */
+	rsph->reason = reason;
+	DSET24(rsph->data_segment_len, data_len);
+
+	rsph->ffffffff = 0xffffffffU;
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (conn->sess != NULL) {
+		to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+		to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+	} else {
+		to_be32(&rsph->exp_cmd_sn, 1);
+		to_be32(&rsph->max_cmd_sn, 1);
+	}
+
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "PDU", (void *)&rsp_pdu->bhs, ISCSI_BHS_LEN);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	return 0;
+}
+
+static int
+spdk_iscsi_check_values(struct spdk_iscsi_conn *conn)
+{
+	if (conn->sess->FirstBurstLength > conn->sess->MaxBurstLength) {
+		SPDK_ERRLOG("FirstBurstLength(%d) > MaxBurstLength(%d)\n",
+			    conn->sess->FirstBurstLength,
+			    conn->sess->MaxBurstLength);
+		return -1;
+	}
+	if (conn->sess->FirstBurstLength > g_spdk_iscsi.FirstBurstLength) {
+		SPDK_ERRLOG("FirstBurstLength(%d) > iSCSI target restriction(%d)\n",
+			    conn->sess->FirstBurstLength, g_spdk_iscsi.FirstBurstLength);
+		return -1;
+	}
+	if (conn->sess->MaxBurstLength > 0x00ffffff) {
+		SPDK_ERRLOG("MaxBurstLength(%d) > 0x00ffffff\n",
+			    conn->sess->MaxBurstLength);
+		return -1;
+	}
+
+	if (conn->MaxRecvDataSegmentLength < 512) {
+		SPDK_ERRLOG("MaxRecvDataSegmentLength(%d) < 512\n",
+			    conn->MaxRecvDataSegmentLength);
+		return -1;
+	}
+	if (conn->MaxRecvDataSegmentLength > 0x00ffffff) {
+		SPDK_ERRLOG("MaxRecvDataSegmentLength(%d) > 0x00ffffff\n",
+			    conn->MaxRecvDataSegmentLength);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * The response function of spdk_iscsi_op_login
+ * return:
+ * 0:success;
+ * -1:error;
+ */
+static int
+spdk_iscsi_op_login_response(struct spdk_iscsi_conn *conn,
+			     struct spdk_iscsi_pdu *rsp_pdu, struct iscsi_param *params)
+{
+	struct iscsi_bhs_login_rsp *rsph;
+	int rc;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	rsph->version_max = ISCSI_VERSION;
+	rsph->version_act = ISCSI_VERSION;
+	DSET24(rsph->data_segment_len, rsp_pdu->data_segment_len);
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (conn->sess != NULL) {
+		to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+		to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+	} else {
+		to_be32(&rsph->exp_cmd_sn, rsp_pdu->cmd_sn);
+		to_be32(&rsph->max_cmd_sn, rsp_pdu->cmd_sn);
+	}
+
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "PDU", (uint8_t *)rsph, ISCSI_BHS_LEN);
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "DATA", rsp_pdu->data, rsp_pdu->data_segment_len);
+
+	/* Set T/CSG/NSG to reserved if login error. */
+	if (rsph->status_class != 0) {
+		rsph->flags &= ~ISCSI_LOGIN_TRANSIT;
+		rsph->flags &= ~ISCSI_LOGIN_CURRENT_STAGE_MASK;
+		rsph->flags &= ~ISCSI_LOGIN_NEXT_STAGE_MASK;
+	}
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	/* after send PDU digest on/off */
+	if (conn->full_feature) {
+		/* update internal variables */
+		rc = spdk_iscsi_copy_param2var(conn);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_copy_param2var() failed\n");
+			spdk_iscsi_param_free(params);
+			return -1;
+		}
+		/* check value */
+		rc = spdk_iscsi_check_values(conn);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_check_values() failed\n");
+			spdk_iscsi_param_free(params);
+			return -1;
+		}
+	}
+
+	spdk_iscsi_param_free(params);
+	return 0;
+}
+
+/*
+ * This function is used to del the original param and update it with new
+ * value
+ * return:
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_update_param(struct spdk_iscsi_conn *conn,
+				 const char *key, const char *value,
+				 const char *list)
+{
+	int rc = 0;
+	struct iscsi_param *new_param, *orig_param;
+	int index;
+
+	orig_param = spdk_iscsi_param_find(conn->params, key);
+	if (orig_param == NULL) {
+		SPDK_ERRLOG("orig_param %s not found\n", key);
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+
+	index = orig_param->state_index;
+	rc = spdk_iscsi_param_del(&conn->params, key);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_del(%s) failed\n", key);
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+	rc = spdk_iscsi_param_add(&conn->params, key, value, list, ISPT_LIST);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_add() failed\n");
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+	new_param = spdk_iscsi_param_find(conn->params, key);
+	if (new_param == NULL) {
+		SPDK_ERRLOG("spdk_iscsi_param_find() failed\n");
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+	new_param->state_index = index;
+	return rc;
+}
+
+/*
+ * The function which is used to handle the part of session discovery
+ * return:
+ * 0, success;
+ * otherwise: error;
+ */
+static int
+spdk_iscsi_op_login_session_discovery_chap(struct spdk_iscsi_conn *conn)
+{
+	int rc = 0;
+
+	if (g_spdk_iscsi.disable_chap) {
+		conn->req_auth = 0;
+		rc = spdk_iscsi_op_login_update_param(conn, "AuthMethod", "None", "None");
+		if (rc < 0) {
+			return rc;
+		}
+	} else if (g_spdk_iscsi.require_chap) {
+		conn->req_auth = 1;
+		rc = spdk_iscsi_op_login_update_param(conn, "AuthMethod", "CHAP", "CHAP");
+		if (rc < 0) {
+			return rc;
+		}
+	}
+	if (g_spdk_iscsi.mutual_chap) {
+		conn->req_mutual = 1;
+	}
+
+	return rc;
+}
+
+/*
+ * This function is used to update the param related with chap
+ * return:
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_negotiate_chap_param(struct spdk_iscsi_conn *conn,
+		struct spdk_iscsi_pdu *rsp_pdu,
+		struct spdk_iscsi_tgt_node *target)
+{
+	int rc;
+
+	if (target->disable_chap) {
+		conn->req_auth = 0;
+		rc = spdk_iscsi_op_login_update_param(conn, "AuthMethod", "None", "None");
+		if (rc < 0) {
+			return rc;
+		}
+	} else if (target->require_chap) {
+		conn->req_auth = 1;
+		rc = spdk_iscsi_op_login_update_param(conn, "AuthMethod", "CHAP", "CHAP");
+		if (rc < 0) {
+			return rc;
+		}
+	}
+
+	if (target->mutual_chap) {
+		conn->req_mutual = 1;
+	}
+
+	if (target->header_digest) {
+		/*
+		 * User specified header digests, so update the list of
+		 *  HeaderDigest values to remove "None" so that only
+		 *  initiators who support CRC32C can connect.
+		 */
+		rc = spdk_iscsi_op_login_update_param(conn, "HeaderDigest", "CRC32C", "CRC32C");
+		if (rc < 0) {
+			return rc;
+		}
+	}
+
+	if (target->data_digest) {
+		/*
+		 * User specified data digests, so update the list of
+		 *  DataDigest values to remove "None" so that only
+		 *  initiators who support CRC32C can connect.
+		 */
+		rc = spdk_iscsi_op_login_update_param(conn, "DataDigest", "CRC32C", "CRC32C");
+		if (rc < 0) {
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This function use to check the session
+ * return:
+ * 0, success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_check_session(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_pdu *rsp_pdu,
+				  char *initiator_port_name, int cid)
+
+{
+	int rc = 0;
+	struct iscsi_bhs_login_rsp *rsph;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	/* check existing session */
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "isid=%"PRIx64", tsih=%u, cid=%u\n",
+		      spdk_iscsi_get_isid(rsph->isid), from_be16(&rsph->tsih), cid);
+	if (rsph->tsih != 0) {
+		/* multiple connections */
+		rc = spdk_append_iscsi_sess(conn, initiator_port_name,
+					    from_be16(&rsph->tsih), cid);
+		if (rc < 0) {
+			SPDK_ERRLOG("isid=%"PRIx64", tsih=%u, cid=%u:"
+				    "spdk_append_iscsi_sess() failed\n",
+				    spdk_iscsi_get_isid(rsph->isid), from_be16(&rsph->tsih),
+				    cid);
+			/* Can't include in session */
+			rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_CONN_ADD_FAIL;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+	} else if (!g_spdk_iscsi.AllowDuplicateIsid) {
+		/* new session, drop old sess by the initiator */
+		spdk_iscsi_drop_conns(conn, initiator_port_name, 0 /* drop old */);
+	}
+
+	return rc;
+}
+
+/*
+ * This function is used to check the target info
+ * return:
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_check_target(struct spdk_iscsi_conn *conn,
+				 struct spdk_iscsi_pdu *rsp_pdu,
+				 const char *target_name,
+				 struct spdk_iscsi_tgt_node **target)
+{
+	bool result;
+	struct iscsi_bhs_login_rsp *rsph;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	*target = spdk_iscsi_find_tgt_node(target_name);
+	if (*target == NULL) {
+		SPDK_WARNLOG("target %s not found\n", target_name);
+		/* Not found */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_TARGET_NOT_FOUND;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+	result = spdk_iscsi_tgt_node_access(conn, *target,
+					    conn->initiator_name,
+					    conn->initiator_addr);
+	if (!result) {
+		SPDK_ERRLOG("access denied\n");
+		/* Not found */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_TARGET_NOT_FOUND;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	return 0;
+}
+
+/*
+ * The function which is used to handle the part of normal login session
+ * return:
+ * 0, success;
+ * SPDK_ISCSI_LOGIN_ERROR_PARAMETER, parameter error;
+ */
+static int
+spdk_iscsi_op_login_session_normal(struct spdk_iscsi_conn *conn,
+				   struct spdk_iscsi_pdu *rsp_pdu,
+				   char *initiator_port_name,
+				   struct iscsi_param *params,
+				   struct spdk_iscsi_tgt_node **target,
+				   int cid)
+{
+	const char *target_name;
+	const char *target_short_name;
+	struct iscsi_bhs_login_rsp *rsph;
+	int rc = 0;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	target_name = spdk_iscsi_param_get_val(params, "TargetName");
+
+	if (target_name == NULL) {
+		SPDK_ERRLOG("TargetName is empty\n");
+		/* Missing parameter */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	memset(conn->target_short_name, 0, MAX_TARGET_NAME);
+	target_short_name = strstr(target_name, ":");
+	if (target_short_name != NULL) {
+		target_short_name++; /* Advance past the ':' */
+		if (strlen(target_short_name) >= MAX_TARGET_NAME) {
+			SPDK_ERRLOG("Target Short Name (%s) is more than %u characters\n",
+				    target_short_name, MAX_TARGET_NAME);
+			return rc;
+		}
+		snprintf(conn->target_short_name, MAX_TARGET_NAME, "%s",
+			 target_short_name);
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	rc = spdk_iscsi_op_login_check_target(conn, rsp_pdu, target_name, target);
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	if (rc < 0) {
+		return rc;
+	}
+
+	conn->target = *target;
+	conn->dev = (*target)->dev;
+	conn->target_port = spdk_scsi_dev_find_port_by_id((*target)->dev,
+			    conn->portal->group->tag);
+
+	rc = spdk_iscsi_op_login_check_session(conn, rsp_pdu,
+					       initiator_port_name, cid);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* force target flags */
+	pthread_mutex_lock(&((*target)->mutex));
+	rc = spdk_iscsi_op_login_negotiate_chap_param(conn, rsp_pdu, *target);
+	pthread_mutex_unlock(&((*target)->mutex));
+
+	return rc;
+}
+
+/*
+ * This function is used to judge the session type
+ * return
+ * 0: success
+ * otherwise, error
+ */
+static int
+spdk_iscsi_op_login_session_type(struct spdk_iscsi_conn *conn,
+				 struct spdk_iscsi_pdu *rsp_pdu,
+				 enum session_type *session_type,
+				 struct iscsi_param *params)
+{
+	const char *session_type_str;
+	struct iscsi_bhs_login_rsp *rsph;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	session_type_str = spdk_iscsi_param_get_val(params, "SessionType");
+	if (session_type_str == NULL) {
+		if (rsph->tsih != 0) {
+			*session_type = SESSION_TYPE_NORMAL;
+		} else {
+			SPDK_ERRLOG("SessionType is empty\n");
+			/* Missing parameter */
+			rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+	} else {
+		if (strcasecmp(session_type_str, "Discovery") == 0) {
+			*session_type = SESSION_TYPE_DISCOVERY;
+		} else if (strcasecmp(session_type_str, "Normal") == 0) {
+			*session_type = SESSION_TYPE_NORMAL;
+		} else {
+			*session_type = SESSION_TYPE_INVALID;
+			SPDK_ERRLOG("SessionType is invalid\n");
+			/* Missing parameter */
+			rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Session Type: %s\n", session_type_str);
+
+	return 0;
+}
+/*
+ * This function is used to initialize the port info
+ * return
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_initialize_port(struct spdk_iscsi_conn *conn,
+				    struct spdk_iscsi_pdu *rsp_pdu,
+				    char *initiator_port_name,
+				    uint32_t name_length,
+				    struct iscsi_param *params)
+{
+	const char *val;
+	struct iscsi_bhs_login_rsp *rsph;
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+
+	/* Initiator Name and Port */
+	val = spdk_iscsi_param_get_val(params, "InitiatorName");
+	if (val == NULL) {
+		SPDK_ERRLOG("InitiatorName is empty\n");
+		/* Missing parameter */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+	snprintf(conn->initiator_name, sizeof(conn->initiator_name), "%s", val);
+	snprintf(initiator_port_name, name_length,
+		 "%s,i,0x%12.12" PRIx64, val, spdk_iscsi_get_isid(rsph->isid));
+	spdk_strlwr(conn->initiator_name);
+	spdk_strlwr(initiator_port_name);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Initiator name: %s\n", conn->initiator_name);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Initiator port: %s\n", initiator_port_name);
+
+	return 0;
+}
+
+/*
+ * This function is used to set the info in the connection data structure
+ * return
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_set_conn_info(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_pdu *rsp_pdu,
+				  char *initiator_port_name,
+				  enum session_type session_type,
+				  struct spdk_iscsi_tgt_node *target, int cid)
+{
+	int rc = 0;
+	struct iscsi_bhs_login_rsp *rsph;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	conn->authenticated = 0;
+	conn->auth.chap_phase = ISCSI_CHAP_PHASE_WAIT_A;
+	conn->cid = cid;
+
+	if (conn->sess == NULL) {
+		/* new session */
+		rc = spdk_create_iscsi_sess(conn, target, session_type);
+		if (rc < 0) {
+			SPDK_ERRLOG("create_sess() failed\n");
+			rsph->status_class = ISCSI_CLASS_TARGET_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_STATUS_NO_RESOURCES;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+
+		/* initialize parameters */
+		conn->StatSN = from_be32(&rsph->stat_sn);
+		conn->sess->initiator_port = spdk_scsi_port_create(spdk_iscsi_get_isid(rsph->isid),
+					     0, initiator_port_name);
+		conn->sess->isid = spdk_iscsi_get_isid(rsph->isid);
+		conn->sess->target = target;
+
+		/* Discovery sessions will not have a target. */
+		if (target != NULL) {
+			conn->sess->queue_depth = target->queue_depth;
+		} else {
+			/*
+			 * Assume discovery sessions have an effective command
+			 *  windows size of 1.
+			 */
+			conn->sess->queue_depth = 1;
+		}
+		conn->sess->ExpCmdSN = rsp_pdu->cmd_sn;
+		conn->sess->MaxCmdSN = rsp_pdu->cmd_sn + conn->sess->queue_depth - 1;
+	}
+
+	conn->initiator_port = conn->sess->initiator_port;
+
+	return 0;
+}
+
+/*
+ * This function is used to set the target info
+ * return
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_set_target_info(struct spdk_iscsi_conn *conn,
+				    struct spdk_iscsi_pdu *rsp_pdu,
+				    enum session_type session_type,
+				    int alloc_len,
+				    struct spdk_iscsi_tgt_node *target)
+{
+	char buf[MAX_TMPBUF];
+	const char *val;
+	int rc = 0;
+	struct spdk_iscsi_portal *portal = conn->portal;
+
+	/* declarative parameters */
+	if (target != NULL) {
+		pthread_mutex_lock(&target->mutex);
+		if (target->alias != NULL) {
+			snprintf(buf, sizeof buf, "%s", target->alias);
+		} else {
+			snprintf(buf, sizeof buf, "%s", "");
+		}
+		pthread_mutex_unlock(&target->mutex);
+		rc = spdk_iscsi_param_set(conn->sess->params, "TargetAlias", buf);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_param_set() failed\n");
+			return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+		}
+	}
+	snprintf(buf, sizeof buf, "%s:%s,%d", portal->host, portal->port,
+		 portal->group->tag);
+	rc = spdk_iscsi_param_set(conn->sess->params, "TargetAddress", buf);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+	snprintf(buf, sizeof buf, "%d", portal->group->tag);
+	rc = spdk_iscsi_param_set(conn->sess->params, "TargetPortalGroupTag", buf);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+
+	/* write in response */
+	if (target != NULL) {
+		val = spdk_iscsi_param_get_val(conn->sess->params, "TargetAlias");
+		if (val != NULL && strlen(val) != 0) {
+			rsp_pdu->data_segment_len = spdk_iscsi_append_param(conn,
+						    "TargetAlias",
+						    rsp_pdu->data,
+						    alloc_len,
+						    rsp_pdu->data_segment_len);
+		}
+		if (session_type == SESSION_TYPE_DISCOVERY) {
+			rsp_pdu->data_segment_len = spdk_iscsi_append_param(conn,
+						    "TargetAddress",
+						    rsp_pdu->data,
+						    alloc_len,
+						    rsp_pdu->data_segment_len);
+		}
+		rsp_pdu->data_segment_len = spdk_iscsi_append_param(conn,
+					    "TargetPortalGroupTag",
+					    rsp_pdu->data,
+					    alloc_len,
+					    rsp_pdu->data_segment_len);
+	}
+
+	return rc;
+}
+
+/*
+ * This function is used to handle the login of iscsi initiator when there is
+ * no session
+ * return:
+ * 0, success;
+ * SPDK_ISCSI_LOGIN_ERROR_PARAMETER, parameter error;
+ * SPDK_ISCSI_LOGIN_ERROR_RESPONSE,  used to notify the login fail.
+ */
+static int
+spdk_iscsi_op_login_phase_none(struct spdk_iscsi_conn *conn,
+			       struct spdk_iscsi_pdu *rsp_pdu,
+			       struct iscsi_param *params,
+			       int alloc_len, int cid)
+{
+	enum session_type session_type;
+	char initiator_port_name[MAX_INITIATOR_NAME];
+	struct iscsi_bhs_login_rsp *rsph;
+	struct spdk_iscsi_tgt_node *target = NULL;
+	int rc = 0;
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+
+	conn->target = NULL;
+	conn->dev = NULL;
+
+	rc = spdk_iscsi_op_login_initialize_port(conn, rsp_pdu,
+			initiator_port_name, MAX_INITIATOR_NAME, params);
+	if (rc < 0) {
+		return rc;
+	}
+
+	rc = spdk_iscsi_op_login_session_type(conn, rsp_pdu, &session_type,
+					      params);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* Target Name and Port */
+	if (session_type == SESSION_TYPE_NORMAL) {
+		rc = spdk_iscsi_op_login_session_normal(conn, rsp_pdu,
+							initiator_port_name,
+							params, &target, cid);
+		if (rc < 0) {
+			return rc;
+		}
+
+	} else if (session_type == SESSION_TYPE_DISCOVERY) {
+		target = NULL;
+		rsph->tsih = 0;
+
+		/* force target flags */
+		pthread_mutex_lock(&g_spdk_iscsi.mutex);
+		rc = spdk_iscsi_op_login_session_discovery_chap(conn);
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		if (rc < 0) {
+			return rc;
+		}
+	} else {
+		SPDK_ERRLOG("unknown session type\n");
+		/* Missing parameter */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	rc = spdk_iscsi_op_login_set_conn_info(conn, rsp_pdu, initiator_port_name,
+					       session_type, target, cid);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* limit conns on discovery session */
+	if (session_type == SESSION_TYPE_DISCOVERY) {
+		conn->sess->MaxConnections = 1;
+		rc = spdk_iscsi_param_set_int(conn->sess->params,
+					      "MaxConnections",
+					      conn->sess->MaxConnections);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+			return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+		}
+	}
+
+	rc = spdk_iscsi_op_login_set_target_info(conn, rsp_pdu, session_type,
+			alloc_len, target);
+	if (rc < 0) {
+		return rc;
+	}
+
+	return rc;
+}
+
+/*
+ * The function which is used to initialize the internal response data
+ * structure of iscsi login function.
+ * return:
+ * 0, success;
+ * otherwise, error;
+ */
+static int
+spdk_iscsi_op_login_rsp_init(struct spdk_iscsi_conn *conn,
+			     struct spdk_iscsi_pdu *pdu, struct spdk_iscsi_pdu *rsp_pdu,
+			     struct iscsi_param **params, int *alloc_len, int *cid)
+{
+
+	struct iscsi_bhs_login_req *reqh;
+	struct iscsi_bhs_login_rsp *rsph;
+	int rc;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	rsph->opcode = ISCSI_OP_LOGIN_RSP;
+	rsph->status_class = ISCSI_CLASS_SUCCESS;
+	rsph->status_detail = ISCSI_LOGIN_ACCEPT;
+	rsp_pdu->data_segment_len = 0;
+
+	/* Default MaxRecvDataSegmentLength - RFC3720(12.12) */
+	if (conn->MaxRecvDataSegmentLength < 8192) {
+		*alloc_len = 8192;
+	} else {
+		*alloc_len = conn->MaxRecvDataSegmentLength;
+	}
+
+	rsp_pdu->data = calloc(1, *alloc_len);
+	if (!rsp_pdu->data) {
+		SPDK_ERRLOG("calloc() failed for data segment\n");
+		return -ENOMEM;
+	}
+
+	reqh = (struct iscsi_bhs_login_req *)&pdu->bhs;
+	rsph->flags |= (reqh->flags & ISCSI_LOGIN_TRANSIT);
+	rsph->flags |= (reqh->flags & ISCSI_LOGIN_CONTINUE);
+	rsph->flags |= (reqh->flags & ISCSI_LOGIN_CURRENT_STAGE_MASK);
+	if (ISCSI_BHS_LOGIN_GET_TBIT(rsph->flags)) {
+		rsph->flags |= (reqh->flags & ISCSI_LOGIN_NEXT_STAGE_MASK);
+	}
+
+	/* We don't need to convert from network byte order. Just store it */
+	memcpy(&rsph->isid, reqh->isid, 6);
+	rsph->tsih = reqh->tsih;
+	rsph->itt = reqh->itt;
+	rsp_pdu->cmd_sn = from_be32(&reqh->cmd_sn);
+	*cid = from_be16(&reqh->cid);
+
+	if (rsph->tsih) {
+		rsph->stat_sn = reqh->exp_stat_sn;
+	}
+
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "PDU", (uint8_t *)&pdu->bhs, ISCSI_BHS_LEN);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "T=%d, C=%d, CSG=%d, NSG=%d, Min=%d, Max=%d, ITT=%x\n",
+		      ISCSI_BHS_LOGIN_GET_TBIT(rsph->flags),
+		      ISCSI_BHS_LOGIN_GET_CBIT(rsph->flags),
+		      ISCSI_BHS_LOGIN_GET_CSG(rsph->flags),
+		      ISCSI_BHS_LOGIN_GET_NSG(rsph->flags),
+		      reqh->version_min, reqh->version_max, from_be32(&rsph->itt));
+
+	if (conn->sess != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "CmdSN=%u, ExpStatSN=%u, StatSN=%u, ExpCmdSN=%u,"
+			      "MaxCmdSN=%u\n", rsp_pdu->cmd_sn,
+			      from_be32(&rsph->stat_sn), conn->StatSN,
+			      conn->sess->ExpCmdSN,
+			      conn->sess->MaxCmdSN);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "CmdSN=%u, ExpStatSN=%u, StatSN=%u\n",
+			      rsp_pdu->cmd_sn, from_be32(&rsph->stat_sn),
+			      conn->StatSN);
+	}
+
+	if (ISCSI_BHS_LOGIN_GET_TBIT(rsph->flags) &&
+	    ISCSI_BHS_LOGIN_GET_CBIT(rsph->flags)) {
+		SPDK_ERRLOG("transit error\n");
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+	/* make sure reqh->version_max < ISCSI_VERSION */
+	if (reqh->version_min > ISCSI_VERSION) {
+		SPDK_ERRLOG("unsupported version %d/%d\n", reqh->version_min,
+			    reqh->version_max);
+		/* Unsupported version */
+		/* set all reserved flag to zero */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_UNSUPPORTED_VERSION;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	if ((ISCSI_BHS_LOGIN_GET_NSG(rsph->flags) == ISCSI_NSG_RESERVED_CODE) &&
+	    ISCSI_BHS_LOGIN_GET_TBIT(rsph->flags)) {
+		/* set NSG to zero */
+		rsph->flags &= ~ISCSI_LOGIN_NEXT_STAGE_MASK;
+		/* also set other bits to zero */
+		rsph->flags &= ~ISCSI_LOGIN_TRANSIT;
+		rsph->flags &= ~ISCSI_LOGIN_CURRENT_STAGE_MASK;
+		SPDK_ERRLOG("Received reserved NSG code: %d\n", ISCSI_NSG_RESERVED_CODE);
+		/* Initiator error */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	/* store incoming parameters */
+	rc = spdk_iscsi_parse_params(params, pdu->data,
+				     pdu->data_segment_len, ISCSI_BHS_LOGIN_GET_CBIT(reqh->flags),
+				     &conn->partial_text_parameter);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_parse_params() failed\n");
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+	}
+	return 0;
+}
+
+/*
+ * This function is used to set the csg bit case in rsp
+ * return:
+ * 0, success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_rsp_handle_csg_bit(struct spdk_iscsi_conn *conn,
+				       struct spdk_iscsi_pdu *rsp_pdu,
+				       struct iscsi_param *params, int alloc_len)
+{
+	const char *auth_method;
+	int rc;
+	struct iscsi_bhs_login_rsp *rsph;
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+
+	switch (ISCSI_BHS_LOGIN_GET_CSG(rsph->flags)) {
+	case ISCSI_SECURITY_NEGOTIATION_PHASE:
+		/* SecurityNegotiation */
+		auth_method = spdk_iscsi_param_get_val(conn->params, "AuthMethod");
+		if (auth_method == NULL) {
+			SPDK_ERRLOG("AuthMethod is empty\n");
+			/* Missing parameter */
+			rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_MISSING_PARMS;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+		if (strcasecmp(auth_method, "None") == 0) {
+			conn->authenticated = 1;
+		} else {
+			rc = spdk_iscsi_auth_params(conn, params, auth_method,
+						    rsp_pdu->data, alloc_len,
+						    rsp_pdu->data_segment_len);
+			if (rc < 0) {
+				SPDK_ERRLOG("iscsi_auth_params() failed\n");
+				/* Authentication failure */
+				rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+				rsph->status_detail = ISCSI_LOGIN_AUTHENT_FAIL;
+				return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+			}
+			rsp_pdu->data_segment_len = rc;
+			if (conn->authenticated == 0) {
+				/* not complete */
+				rsph->flags &= ~ISCSI_LOGIN_TRANSIT;
+			} else {
+				if (conn->auth.chap_phase != ISCSI_CHAP_PHASE_END) {
+					SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CHAP phase not complete");
+				}
+			}
+
+			SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "Negotiated Auth Params",
+				       rsp_pdu->data, rsp_pdu->data_segment_len);
+		}
+		break;
+
+	case ISCSI_OPERATIONAL_NEGOTIATION_PHASE:
+		/* LoginOperationalNegotiation */
+		if (conn->state == ISCSI_CONN_STATE_INVALID) {
+			if (conn->req_auth) {
+				/* Authentication failure */
+				rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+				rsph->status_detail = ISCSI_LOGIN_AUTHENT_FAIL;
+				return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+			} else {
+				/* AuthMethod=None */
+				conn->authenticated = 1;
+			}
+		}
+		if (conn->authenticated == 0) {
+			SPDK_ERRLOG("authentication error\n");
+			/* Authentication failure */
+			rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+			rsph->status_detail = ISCSI_LOGIN_AUTHENT_FAIL;
+			return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+		}
+		break;
+
+	case ISCSI_FULL_FEATURE_PHASE:
+		/* FullFeaturePhase */
+		SPDK_ERRLOG("XXX Login in FullFeaturePhase\n");
+		/* Initiator error */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+
+	default:
+		SPDK_ERRLOG("unknown stage\n");
+		/* Initiator error */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	return 0;
+}
+
+/* This function is used to notify the session info
+ * return
+ * 0: success
+ * otherwise: error
+ */
+static int
+spdk_iscsi_op_login_notify_session_info(struct spdk_iscsi_conn *conn,
+					struct spdk_iscsi_pdu *rsp_pdu)
+{
+	struct spdk_iscsi_portal *portal = conn->portal;
+	struct iscsi_bhs_login_rsp *rsph;
+
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	if (conn->sess->session_type == SESSION_TYPE_NORMAL) {
+		/* normal session */
+		SPDK_NOTICELOG("Login from %s (%s) on %s tgt_node%d"
+			       " (%s:%s,%d), ISID=%"PRIx64", TSIH=%u,"
+			       " CID=%u, HeaderDigest=%s, DataDigest=%s\n",
+			       conn->initiator_name, conn->initiator_addr,
+			       conn->target->name, conn->target->num,
+			       portal->host, portal->port, portal->group->tag,
+			       conn->sess->isid, conn->sess->tsih, conn->cid,
+			       (spdk_iscsi_param_eq_val(conn->params, "HeaderDigest", "CRC32C")
+				? "on" : "off"),
+			       (spdk_iscsi_param_eq_val(conn->params, "DataDigest", "CRC32C")
+				? "on" : "off"));
+	} else if (conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		/* discovery session */
+		SPDK_NOTICELOG("Login(discovery) from %s (%s) on"
+			       " (%s:%s,%d), ISID=%"PRIx64", TSIH=%u,"
+			       " CID=%u, HeaderDigest=%s, DataDigest=%s\n",
+			       conn->initiator_name, conn->initiator_addr,
+			       portal->host, portal->port, portal->group->tag,
+			       conn->sess->isid, conn->sess->tsih, conn->cid,
+			       (spdk_iscsi_param_eq_val(conn->params, "HeaderDigest", "CRC32C")
+				? "on" : "off"),
+			       (spdk_iscsi_param_eq_val(conn->params, "DataDigest", "CRC32C")
+				? "on" : "off"));
+	} else {
+		SPDK_ERRLOG("unknown session type\n");
+		/* Initiator error */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	return 0;
+}
+
+/*
+ * This function is to handle the tbit cases
+ * return
+ * 0: success
+ * otherwise error
+ */
+static int
+spdk_iscsi_op_login_rsp_handle_t_bit(struct spdk_iscsi_conn *conn,
+				     struct spdk_iscsi_pdu *rsp_pdu)
+{
+	int rc;
+	struct iscsi_bhs_login_rsp *rsph;
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+
+	switch (ISCSI_BHS_LOGIN_GET_NSG(rsph->flags)) {
+	case ISCSI_SECURITY_NEGOTIATION_PHASE:
+		/* SecurityNegotiation */
+		conn->login_phase = ISCSI_SECURITY_NEGOTIATION_PHASE;
+		break;
+
+	case ISCSI_OPERATIONAL_NEGOTIATION_PHASE:
+		/* LoginOperationalNegotiation */
+		conn->login_phase = ISCSI_OPERATIONAL_NEGOTIATION_PHASE;
+		break;
+
+	case ISCSI_FULL_FEATURE_PHASE:
+		/* FullFeaturePhase */
+		conn->login_phase = ISCSI_FULL_FEATURE_PHASE;
+		to_be16(&rsph->tsih, conn->sess->tsih);
+
+		rc = spdk_iscsi_op_login_notify_session_info(conn, rsp_pdu);
+		if (rc < 0) {
+			return rc;
+		}
+
+		conn->full_feature = 1;
+		spdk_iscsi_conn_migration(conn);
+		break;
+
+	default:
+		SPDK_ERRLOG("unknown stage\n");
+		/* Initiator error */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	return 0;
+}
+
+/*
+ * This function is used to set the values of the internal data structure used
+ * by spdk_iscsi_op_login function
+ * return:
+ * 0, used to notify the a successful login
+ * SPDK_ISCSI_LOGIN_ERROR_RESPONSE,  used to notify a failure login.
+ */
+static int
+spdk_iscsi_op_login_rsp_handle(struct spdk_iscsi_conn *conn,
+			       struct spdk_iscsi_pdu *rsp_pdu, struct iscsi_param **params,
+			       int alloc_len)
+{
+	int rc;
+	struct iscsi_bhs_login_rsp *rsph;
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+
+	/* negotiate parameters */
+	rc = spdk_iscsi_negotiate_params(conn, params, rsp_pdu->data, alloc_len,
+					 rsp_pdu->data_segment_len);
+	if (rc < 0) {
+		/*
+		 * spdk_iscsi_negotiate_params just returns -1 on failure,
+		 *  so translate this into meaningful response codes and
+		 *  return values.
+		 */
+		rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+		rsph->status_detail = ISCSI_LOGIN_INITIATOR_ERROR;
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	}
+
+	rsp_pdu->data_segment_len = rc;
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "Negotiated Params", rsp_pdu->data, rc);
+
+	/* handle the CSG bit case */
+	rc = spdk_iscsi_op_login_rsp_handle_csg_bit(conn, rsp_pdu, *params,
+			alloc_len);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* handle the T bit case */
+	if (ISCSI_BHS_LOGIN_GET_TBIT(rsph->flags)) {
+		rc = spdk_iscsi_op_login_rsp_handle_t_bit(conn, rsp_pdu);
+	}
+
+	return rc;
+}
+
+static int
+spdk_iscsi_op_login(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	int rc;
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_param *params = NULL;
+	struct iscsi_param **params_p = &params;
+	int alloc_len;
+	int cid;
+
+	if (conn->full_feature && conn->sess != NULL &&
+	    conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	rc = spdk_iscsi_op_login_rsp_init(conn, pdu, rsp_pdu, params_p,
+					  &alloc_len, &cid);
+	if (rc == SPDK_ISCSI_LOGIN_ERROR_RESPONSE || rc == SPDK_ISCSI_LOGIN_ERROR_PARAMETER) {
+		spdk_iscsi_op_login_response(conn, rsp_pdu, *params_p);
+		return rc;
+	}
+
+	/* For other values, we need to directly return */
+	if (rc < 0) {
+		spdk_put_pdu(rsp_pdu);
+		return rc;
+	}
+
+	if (conn->state == ISCSI_CONN_STATE_INVALID) {
+		rc = spdk_iscsi_op_login_phase_none(conn, rsp_pdu, *params_p,
+						    alloc_len, cid);
+		if (rc == SPDK_ISCSI_LOGIN_ERROR_RESPONSE || rc == SPDK_ISCSI_LOGIN_ERROR_PARAMETER) {
+			spdk_iscsi_op_login_response(conn, rsp_pdu, *params_p);
+			return rc;
+		}
+	}
+
+	rc = spdk_iscsi_op_login_rsp_handle(conn, rsp_pdu, params_p, alloc_len);
+	if (rc == SPDK_ISCSI_LOGIN_ERROR_RESPONSE) {
+		spdk_iscsi_op_login_response(conn, rsp_pdu, *params_p);
+		return rc;
+	}
+
+	rc = spdk_iscsi_op_login_response(conn, rsp_pdu, *params_p);
+	if (rc == 0) {
+		conn->state = ISCSI_CONN_STATE_RUNNING;
+	} else {
+		SPDK_ERRLOG("login error - connection will be destroyed\n");
+	}
+
+	return rc;
+}
+
+static int
+spdk_iscsi_op_text(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	struct iscsi_param *params = NULL;
+	struct iscsi_param **params_p = &params;
+	struct spdk_iscsi_pdu *rsp_pdu;
+	uint8_t *data;
+	uint64_t lun;
+	uint32_t task_tag;
+	uint32_t CmdSN;
+	uint32_t ExpStatSN;
+	const char *val;
+	int F_bit, C_bit;
+	int data_len;
+	int alloc_len;
+	int rc;
+	struct iscsi_bhs_text_req *reqh;
+	struct iscsi_bhs_text_resp *rsph;
+
+	data_len = 0;
+	alloc_len = conn->MaxRecvDataSegmentLength;
+
+	reqh = (struct iscsi_bhs_text_req *)&pdu->bhs;
+
+	F_bit = !!(reqh->flags & ISCSI_FLAG_FINAL);
+	C_bit = !!(reqh->flags & ISCSI_TEXT_CONTINUE);
+	lun = from_be64(&reqh->lun);
+	task_tag = from_be32(&reqh->itt);
+	CmdSN = from_be32(&reqh->cmd_sn);
+	pdu->cmd_sn = CmdSN;
+	ExpStatSN = from_be32(&reqh->exp_stat_sn);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "I=%d, F=%d, C=%d, ITT=%x, TTT=%x\n",
+		      reqh->immediate, F_bit, C_bit, task_tag, from_be32(&reqh->ttt));
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "CmdSN=%u, ExpStatSN=%u, StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+		      CmdSN, ExpStatSN, conn->StatSN, conn->sess->ExpCmdSN,
+		      conn->sess->MaxCmdSN);
+
+	if (ExpStatSN != conn->StatSN) {
+#if 0
+		SPDK_ERRLOG("StatSN(%u) error\n", ExpStatSN);
+		return -1;
+#else
+		/* StarPort have a bug */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN(%u) rewound\n", ExpStatSN);
+		conn->StatSN = ExpStatSN;
+#endif
+	}
+
+	if (F_bit && C_bit) {
+		SPDK_ERRLOG("final and continue\n");
+		return -1;
+	}
+
+	/*
+	 * If this is the first text op in a sequence, save the ITT so we can
+	 * compare it against the ITT for subsequent ops in the same sequence.
+	 * If a subsequent text op in same sequence has a different ITT, reject
+	 * that PDU.
+	 */
+	if (conn->sess->current_text_itt == 0xffffffffU) {
+		conn->sess->current_text_itt = task_tag;
+	} else if (conn->sess->current_text_itt != task_tag) {
+		SPDK_ERRLOG("The correct itt is %u, and the current itt is %u...\n",
+			    conn->sess->current_text_itt, task_tag);
+		return spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+	}
+
+	/* store incoming parameters */
+	rc = spdk_iscsi_parse_params(&params, pdu->data, pdu->data_segment_len,
+				     C_bit, &conn->partial_text_parameter);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_parse_params() failed\n");
+		spdk_iscsi_param_free(params);
+		return -1;
+	}
+
+	data = calloc(1, alloc_len);
+	if (!data) {
+		SPDK_ERRLOG("calloc() failed for data segment\n");
+		spdk_iscsi_param_free(params);
+		return -ENOMEM;
+	}
+
+	/* negotiate parameters */
+	data_len = spdk_iscsi_negotiate_params(conn, params_p,
+					       data, alloc_len, data_len);
+	if (data_len < 0) {
+		SPDK_ERRLOG("spdk_iscsi_negotiate_params() failed\n");
+		spdk_iscsi_param_free(*params_p);
+		free(data);
+		return -1;
+	}
+
+	/* sendtargets is special case */
+	val = spdk_iscsi_param_get_val(*params_p, "SendTargets");
+	if (val != NULL) {
+		if (spdk_iscsi_param_eq_val(conn->sess->params,
+					    "SessionType", "Discovery")) {
+			if (strcasecmp(val, "") == 0) {
+				val = "ALL";
+			}
+
+			data_len = spdk_iscsi_send_tgts(conn,
+							conn->initiator_name,
+							conn->initiator_addr,
+							val, data, alloc_len,
+							data_len);
+		} else {
+			if (strcasecmp(val, "") == 0) {
+				val = conn->target->name;
+			}
+
+			if (strcasecmp(val, "ALL") == 0) {
+				/* not in discovery session */
+				data_len = spdk_iscsi_append_text(conn,
+								  "SendTargets",
+								  "Reject", data,
+								  alloc_len,
+								  data_len);
+			} else {
+				data_len = spdk_iscsi_send_tgts(conn,
+								conn->initiator_name,
+								conn->initiator_addr,
+								val, data, alloc_len,
+								data_len);
+			}
+		}
+	} else {
+		if (spdk_iscsi_param_eq_val(conn->sess->params, "SessionType", "Discovery")) {
+			spdk_iscsi_param_free(*params_p);
+			free(data);
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "Negotiated Params", data, data_len);
+
+	/* response PDU */
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		spdk_iscsi_param_free(*params_p);
+		free(data);
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	rsph = (struct iscsi_bhs_text_resp *)&rsp_pdu->bhs;
+
+	rsp_pdu->data = data;
+	rsph->opcode = ISCSI_OP_TEXT_RSP;
+
+	if (F_bit) {
+		rsph->flags |= ISCSI_FLAG_FINAL;
+	}
+
+	if (C_bit) {
+		rsph->flags |= ISCSI_TEXT_CONTINUE;
+	}
+
+	DSET24(rsph->data_segment_len, data_len);
+	to_be64(&rsph->lun, lun);
+	to_be32(&rsph->itt, task_tag);
+
+	if (F_bit) {
+		rsph->ttt = 0xffffffffU;
+		conn->sess->current_text_itt = 0xffffffffU;
+	} else {
+		to_be32(&rsph->ttt, 1 + conn->id);
+	}
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (reqh->immediate == 0) {
+		conn->sess->MaxCmdSN++;
+	}
+
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	/* update internal variables */
+	rc = spdk_iscsi_copy_param2var(conn);
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_iscsi_copy_param2var() failed\n");
+		spdk_iscsi_param_free(*params_p);
+		return -1;
+	}
+
+	/* check value */
+	rc = spdk_iscsi_check_values(conn);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_check_values() failed\n");
+		spdk_iscsi_param_free(*params_p);
+		return -1;
+	}
+
+	spdk_iscsi_param_free(*params_p);
+	return 0;
+}
+
+static int
+spdk_iscsi_op_logout(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	char buf[MAX_TMPBUF];
+	struct spdk_iscsi_pdu *rsp_pdu;
+	uint32_t task_tag;
+	uint32_t CmdSN;
+	uint32_t ExpStatSN;
+	int response;
+	struct iscsi_bhs_logout_req *reqh;
+	struct iscsi_bhs_logout_resp *rsph;
+	uint16_t cid;
+
+	reqh = (struct iscsi_bhs_logout_req *)&pdu->bhs;
+
+	cid = from_be16(&reqh->cid);
+	task_tag = from_be32(&reqh->itt);
+	CmdSN = from_be32(&reqh->cmd_sn);
+	pdu->cmd_sn = CmdSN;
+	ExpStatSN = from_be32(&reqh->exp_stat_sn);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "reason=%d, ITT=%x, cid=%d\n",
+		      reqh->reason, task_tag, cid);
+
+	if (reqh->reason != 0 && conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		SPDK_ERRLOG("only logout with close the session reason can be in discovery session");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	if (conn->sess != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "CmdSN=%u, ExpStatSN=%u, StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+			      CmdSN, ExpStatSN, conn->StatSN,
+			      conn->sess->ExpCmdSN, conn->sess->MaxCmdSN);
+
+		if (CmdSN != conn->sess->ExpCmdSN) {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CmdSN(%u) might have dropped\n", CmdSN);
+			/* ignore error */
+		}
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CmdSN=%u, ExpStatSN=%u, StatSN=%u\n",
+			      CmdSN, ExpStatSN, conn->StatSN);
+	}
+
+	if (ExpStatSN != conn->StatSN) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN(%u/%u) might have dropped\n",
+			      ExpStatSN, conn->StatSN);
+		/* ignore error */
+	}
+
+	if (conn->id == cid) {
+		response = 0; // connection or session closed successfully
+		spdk_iscsi_conn_logout(conn);
+	} else {
+		response = 1;
+	}
+
+	/* response PDU */
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	rsph = (struct iscsi_bhs_logout_resp *)&rsp_pdu->bhs;
+	rsp_pdu->data = NULL;
+	rsph->opcode = ISCSI_OP_LOGOUT_RSP;
+	rsph->flags |= 0x80; /* bit 0 must be 1 */
+	rsph->response = response;
+	DSET24(rsph->data_segment_len, 0);
+	to_be32(&rsph->itt, task_tag);
+
+	if (conn->sess != NULL) {
+		to_be32(&rsph->stat_sn, conn->StatSN);
+		conn->StatSN++;
+
+		if (conn->sess->connections == 1) {
+			conn->sess->MaxCmdSN++;
+		}
+
+		to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+		to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+	} else {
+		to_be32(&rsph->stat_sn, conn->StatSN);
+		conn->StatSN++;
+		to_be32(&rsph->exp_cmd_sn, CmdSN);
+		to_be32(&rsph->max_cmd_sn, CmdSN);
+	}
+
+	rsph->time_2_wait = 0;
+	rsph->time_2_retain = 0;
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	if (conn->sess == NULL) {
+		/*
+		 * login failed but initiator still sent a logout rather than
+		 *  just closing the TCP connection.
+		 */
+		snprintf(buf, sizeof buf, "Logout(login failed) from %s (%s) on"
+			 " (%s:%s,%d)\n",
+			 conn->initiator_name, conn->initiator_addr,
+			 conn->portal_host, conn->portal_port, conn->pg_tag);
+	} else if (spdk_iscsi_param_eq_val(conn->sess->params, "SessionType", "Normal")) {
+		snprintf(buf, sizeof buf, "Logout from %s (%s) on %s tgt_node%d"
+			 " (%s:%s,%d), ISID=%"PRIx64", TSIH=%u,"
+			 " CID=%u, HeaderDigest=%s, DataDigest=%s\n",
+			 conn->initiator_name, conn->initiator_addr,
+			 conn->target->name, conn->target->num,
+			 conn->portal_host, conn->portal_port, conn->pg_tag,
+			 conn->sess->isid, conn->sess->tsih, conn->cid,
+			 (spdk_iscsi_param_eq_val(conn->params, "HeaderDigest", "CRC32C")
+			  ? "on" : "off"),
+			 (spdk_iscsi_param_eq_val(conn->params, "DataDigest", "CRC32C")
+			  ? "on" : "off"));
+	} else {
+		/* discovery session */
+		snprintf(buf, sizeof buf, "Logout(discovery) from %s (%s) on"
+			 " (%s:%s,%d), ISID=%"PRIx64", TSIH=%u,"
+			 " CID=%u, HeaderDigest=%s, DataDigest=%s\n",
+			 conn->initiator_name, conn->initiator_addr,
+			 conn->portal_host, conn->portal_port, conn->pg_tag,
+			 conn->sess->isid, conn->sess->tsih, conn->cid,
+			 (spdk_iscsi_param_eq_val(conn->params, "HeaderDigest", "CRC32C")
+			  ? "on" : "off"),
+			 (spdk_iscsi_param_eq_val(conn->params, "DataDigest", "CRC32C")
+			  ? "on" : "off"));
+	}
+
+	SPDK_NOTICELOG("%s", buf);
+
+	return 0;
+}
+
+/* This function returns the spdk_scsi_task by searching the snack list via
+ * task transfertag and the pdu's opcode
+ */
+static struct spdk_iscsi_task *
+spdk_get_scsi_task_from_ttt(struct spdk_iscsi_conn *conn,
+			    uint32_t transfer_tag)
+{
+	struct spdk_iscsi_pdu *pdu;
+	struct iscsi_bhs_data_in *datain_bhs;
+
+	TAILQ_FOREACH(pdu, &conn->snack_pdu_list, tailq) {
+		if (pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
+			datain_bhs = (struct iscsi_bhs_data_in *)&pdu->bhs;
+			if (from_be32(&datain_bhs->ttt) == transfer_tag) {
+				return pdu->task;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/* This function returns the spdk_scsi_task by searching the snack list via
+ * initiator task tag and the pdu's opcode
+ */
+static struct spdk_iscsi_task *
+spdk_get_scsi_task_from_itt(struct spdk_iscsi_conn *conn,
+			    uint32_t task_tag, enum iscsi_op opcode)
+{
+	struct spdk_iscsi_pdu *pdu;
+
+	TAILQ_FOREACH(pdu, &conn->snack_pdu_list, tailq) {
+		if (pdu->bhs.opcode == opcode &&
+		    pdu->task != NULL &&
+		    pdu->task->tag == task_tag) {
+			return pdu->task;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+spdk_iscsi_send_datain(struct spdk_iscsi_conn *conn,
+		       struct spdk_iscsi_task *task, int datain_flag,
+		       int residual_len, int offset, int DataSN, int len)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_data_in *rsph;
+	uint32_t task_tag;
+	uint32_t transfer_tag;
+	int F_bit, U_bit, O_bit, S_bit;
+	struct spdk_iscsi_task *primary;
+
+	primary = spdk_iscsi_task_get_primary(task);
+
+	/* DATA PDU */
+	rsp_pdu = spdk_get_pdu();
+	rsph = (struct iscsi_bhs_data_in *)&rsp_pdu->bhs;
+	rsp_pdu->data = task->scsi.iovs[0].iov_base + offset;
+	rsp_pdu->data_from_mempool = true;
+
+	task_tag = task->tag;
+	transfer_tag = 0xffffffffU;
+
+	F_bit = datain_flag & ISCSI_FLAG_FINAL;
+	O_bit = datain_flag & ISCSI_DATAIN_OVERFLOW;
+	U_bit = datain_flag & ISCSI_DATAIN_UNDERFLOW;
+	S_bit = datain_flag & ISCSI_DATAIN_STATUS;
+
+	/*
+	 * we need to hold onto this task/cmd because until the
+	 * PDU has been written out
+	 */
+	rsp_pdu->task = task;
+	task->scsi.ref++;
+
+	rsph->opcode = ISCSI_OP_SCSI_DATAIN;
+
+	if (F_bit) {
+		rsph->flags |= ISCSI_FLAG_FINAL;
+	}
+
+	/* we leave the A_bit clear */
+
+	if (F_bit && S_bit)  {
+		if (O_bit) {
+			rsph->flags |= ISCSI_DATAIN_OVERFLOW;
+		}
+
+		if (U_bit) {
+			rsph->flags |= ISCSI_DATAIN_UNDERFLOW;
+		}
+	}
+
+	if (S_bit) {
+		rsph->flags |= ISCSI_DATAIN_STATUS;
+		rsph->status = task->scsi.status;
+	}
+
+	DSET24(rsph->data_segment_len, len);
+
+	to_be32(&rsph->itt, task_tag);
+	to_be32(&rsph->ttt, transfer_tag);
+
+	if (S_bit) {
+		to_be32(&rsph->stat_sn, conn->StatSN);
+		conn->StatSN++;
+	}
+
+	if (F_bit && S_bit && !spdk_iscsi_task_is_immediate(primary)) {
+		conn->sess->MaxCmdSN++;
+	}
+
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	to_be32(&rsph->data_sn, DataSN);
+
+	if (conn->sess->ErrorRecoveryLevel >= 1) {
+		primary->datain_datasn = DataSN;
+	}
+	DataSN++;
+
+	if (task->parent) {
+		offset += primary->scsi.data_transferred;
+	}
+	to_be32(&rsph->buffer_offset, (uint32_t)offset);
+
+	if (F_bit && S_bit) {
+		to_be32(&rsph->res_cnt, residual_len);
+	}
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	return DataSN;
+}
+
+static int
+spdk_iscsi_transfer_in(struct spdk_iscsi_conn *conn,
+		       struct spdk_iscsi_task *task)
+{
+	uint32_t DataSN;
+	int transfer_len;
+	int data_len;
+	int segment_len;
+	int offset;
+	int residual_len = 0;
+	int sent_status;
+	int len;
+	int datain_flag = 0;
+	int datain_seq_cnt;
+	int i;
+	int sequence_end;
+	struct spdk_iscsi_task *primary;
+
+	primary = spdk_iscsi_task_get_primary(task);
+	segment_len = conn->MaxRecvDataSegmentLength;
+	data_len = task->scsi.data_transferred;
+	transfer_len = task->scsi.length;
+
+	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
+		if (task != primary) {
+			conn->data_in_cnt--;
+			/* Handle the case when primary task return success but the subtask failed */
+			if (primary->bytes_completed == primary->scsi.transfer_len &&
+			    primary->scsi.status == SPDK_SCSI_STATUS_GOOD) {
+				conn->data_in_cnt--;
+			}
+		} else {
+			/* handle the case that it is a primary task which has subtasks */
+			if (primary->scsi.transfer_len != primary->scsi.length) {
+				conn->data_in_cnt--;
+			}
+		}
+
+		return 0;
+	}
+
+	if (data_len < transfer_len) {
+		/* underflow */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Underflow %u/%u\n", data_len, transfer_len);
+		residual_len = transfer_len - data_len;
+		transfer_len = data_len;
+		datain_flag |= ISCSI_DATAIN_UNDERFLOW;
+	} else if (data_len > transfer_len) {
+		/* overflow */
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Overflow %u/%u\n", data_len, transfer_len);
+		residual_len = data_len - transfer_len;
+		datain_flag |= ISCSI_DATAIN_OVERFLOW;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Transfer %u\n", transfer_len);
+		residual_len = 0;
+	}
+
+	DataSN = primary->datain_datasn;
+	sent_status = 0;
+
+	/* calculate the number of sequences for all data-in pdus */
+	datain_seq_cnt = 1 + ((transfer_len - 1) / (int)conn->sess->MaxBurstLength);
+	for (i = 0; i < datain_seq_cnt; i++) {
+		offset = i * conn->sess->MaxBurstLength;
+		sequence_end = DMIN32(((i + 1) * conn->sess->MaxBurstLength),
+				      transfer_len);
+
+		/* send data splitted by segment_len */
+		for (; offset < sequence_end; offset += segment_len) {
+			len = DMIN32(segment_len, (sequence_end - offset));
+
+			datain_flag &= ~ISCSI_FLAG_FINAL;
+			datain_flag &= ~ISCSI_DATAIN_STATUS;
+
+			if (offset + len == sequence_end) {
+				/* last PDU in a sequence */
+				datain_flag |= ISCSI_FLAG_FINAL;
+				if (task->scsi.sense_data_len == 0) {
+					/* The last pdu in all data-in pdus */
+					if ((offset + len) == transfer_len &&
+					    (primary->bytes_completed == primary->scsi.transfer_len)) {
+						datain_flag |= ISCSI_DATAIN_STATUS;
+						sent_status = 1;
+					}
+				}
+			}
+
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Transfer=%d, Offset=%d, Len=%d\n",
+				      sequence_end, offset, len);
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN=%u, DataSN=%u, Offset=%u, Len=%d\n",
+				      conn->StatSN, DataSN, offset, len);
+
+			DataSN = spdk_iscsi_send_datain(conn, task, datain_flag, residual_len,
+							offset, DataSN, len);
+		}
+	}
+
+	if (task != primary) {
+		primary->scsi.data_transferred += task->scsi.data_transferred;
+	}
+	primary->datain_datasn = DataSN;
+
+	return sent_status;
+}
+
+/*
+ *  This function compare the input pdu's bhs with the pdu's bhs associated by
+ *  active_r2t_tasks and queued_r2t_tasks in a connection
+ */
+static bool
+spdk_iscsi_compare_pdu_bhs_within_existed_r2t_tasks(struct spdk_iscsi_conn *conn,
+		struct spdk_iscsi_pdu *pdu)
+{
+	struct spdk_iscsi_task	*task;
+
+	TAILQ_FOREACH(task, &conn->active_r2t_tasks, link) {
+		if (!memcmp(&pdu->bhs, spdk_iscsi_task_get_bhs(task), ISCSI_BHS_LEN)) {
+			return true;
+		}
+	}
+
+	TAILQ_FOREACH(task, &conn->queued_r2t_tasks, link) {
+		if (!memcmp(&pdu->bhs, spdk_iscsi_task_get_bhs(task), ISCSI_BHS_LEN)) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void spdk_iscsi_queue_task(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_task *task)
+{
+	spdk_trace_record(TRACE_ISCSI_TASK_QUEUE, conn->id, task->scsi.length,
+			  (uintptr_t)task, (uintptr_t)task->pdu);
+	task->is_queued = true;
+	spdk_scsi_dev_queue_task(conn->dev, &task->scsi);
+}
+
+static void spdk_iscsi_queue_mgmt_task(struct spdk_iscsi_conn *conn,
+				       struct spdk_iscsi_task *task,
+				       enum spdk_scsi_task_func func)
+{
+	spdk_scsi_dev_queue_mgmt_task(conn->dev, &task->scsi, func);
+}
+
+int spdk_iscsi_conn_handle_queued_datain_tasks(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_task *task;
+
+	while (!TAILQ_EMPTY(&conn->queued_datain_tasks) &&
+	       conn->data_in_cnt < MAX_LARGE_DATAIN_PER_CONNECTION) {
+		task = TAILQ_FIRST(&conn->queued_datain_tasks);
+		assert(task->current_datain_offset <= task->scsi.transfer_len);
+
+		if (task->current_datain_offset == 0) {
+			task->scsi.lun = spdk_scsi_dev_get_lun(conn->dev, task->lun_id);
+			if (task->scsi.lun == NULL) {
+				TAILQ_REMOVE(&conn->queued_datain_tasks, task, link);
+				spdk_scsi_task_process_null_lun(&task->scsi);
+				spdk_iscsi_task_cpl(&task->scsi);
+				return 0;
+			}
+			task->current_datain_offset = task->scsi.length;
+			conn->data_in_cnt++;
+			spdk_iscsi_queue_task(conn, task);
+			continue;
+		}
+		if (task->current_datain_offset < task->scsi.transfer_len) {
+			struct spdk_iscsi_task *subtask;
+			uint32_t remaining_size = 0;
+
+			remaining_size = task->scsi.transfer_len - task->current_datain_offset;
+			subtask = spdk_iscsi_task_get(conn, task, spdk_iscsi_task_cpl);
+			assert(subtask != NULL);
+			subtask->scsi.offset = task->current_datain_offset;
+			subtask->scsi.length = DMIN32(SPDK_BDEV_LARGE_BUF_MAX_SIZE, remaining_size);
+			spdk_scsi_task_set_data(&subtask->scsi, NULL, 0);
+			task->current_datain_offset += subtask->scsi.length;
+			conn->data_in_cnt++;
+
+			task->scsi.lun = spdk_scsi_dev_get_lun(conn->dev, task->lun_id);
+			if (task->scsi.lun == NULL) {
+				/* Remove the primary task from the list if this is the last subtask */
+				if (task->current_datain_offset == task->scsi.transfer_len) {
+					TAILQ_REMOVE(&conn->queued_datain_tasks, task, link);
+				}
+				subtask->scsi.transfer_len = subtask->scsi.length;
+				spdk_scsi_task_process_null_lun(&subtask->scsi);
+				spdk_iscsi_task_cpl(&subtask->scsi);
+				return 0;
+			}
+
+			spdk_iscsi_queue_task(conn, subtask);
+		}
+		if (task->current_datain_offset == task->scsi.transfer_len) {
+			TAILQ_REMOVE(&conn->queued_datain_tasks, task, link);
+		}
+	}
+	return 0;
+}
+
+static int spdk_iscsi_op_scsi_read(struct spdk_iscsi_conn *conn,
+				   struct spdk_iscsi_task *task)
+{
+	int32_t remaining_size;
+
+	TAILQ_INIT(&task->subtask_list);
+	task->scsi.dxfer_dir = SPDK_SCSI_DIR_FROM_DEV;
+	task->parent = NULL;
+	task->scsi.offset = 0;
+	task->scsi.length = DMIN32(SPDK_BDEV_LARGE_BUF_MAX_SIZE, task->scsi.transfer_len);
+	spdk_scsi_task_set_data(&task->scsi, NULL, 0);
+
+	remaining_size = task->scsi.transfer_len - task->scsi.length;
+	task->current_datain_offset = 0;
+
+	if (remaining_size == 0) {
+		spdk_iscsi_queue_task(conn, task);
+		return 0;
+	}
+
+	TAILQ_INSERT_TAIL(&conn->queued_datain_tasks, task, link);
+
+	return spdk_iscsi_conn_handle_queued_datain_tasks(conn);
+}
+
+static int
+spdk_iscsi_op_scsi(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	struct spdk_iscsi_task	*task;
+	struct spdk_scsi_dev	*dev;
+	uint8_t *cdb;
+	uint64_t lun;
+	uint32_t task_tag;
+	uint32_t transfer_len;
+	int F_bit, R_bit, W_bit;
+	int lun_i, rc;
+	struct iscsi_bhs_scsi_req *reqh;
+
+	if (conn->sess->session_type != SESSION_TYPE_NORMAL) {
+		SPDK_ERRLOG("ISCSI_OP_SCSI not allowed in discovery and invalid session\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	reqh = (struct iscsi_bhs_scsi_req *)&pdu->bhs;
+
+	F_bit = reqh->final_bit;
+	R_bit = reqh->read_bit;
+	W_bit = reqh->write_bit;
+	lun = from_be64(&reqh->lun);
+	task_tag = from_be32(&reqh->itt);
+	transfer_len = from_be32(&reqh->expected_data_xfer_len);
+	cdb = reqh->cdb;
+
+	SPDK_TRACEDUMP(SPDK_LOG_ISCSI, "CDB", cdb, 16);
+
+	task = spdk_iscsi_task_get(conn, NULL, spdk_iscsi_task_cpl);
+	if (!task) {
+		SPDK_ERRLOG("Unable to acquire task\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	spdk_iscsi_task_associate_pdu(task, pdu);
+	lun_i = spdk_islun2lun(lun);
+	task->lun_id = lun_i;
+	dev = conn->dev;
+	task->scsi.lun = spdk_scsi_dev_get_lun(dev, lun_i);
+
+	if ((R_bit != 0) && (W_bit != 0)) {
+		SPDK_ERRLOG("Bidirectional CDB is not supported\n");
+		spdk_iscsi_task_put(task);
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	task->scsi.cdb = cdb;
+	task->tag = task_tag;
+	task->scsi.transfer_len = transfer_len;
+	task->scsi.target_port = conn->target_port;
+	task->scsi.initiator_port = conn->initiator_port;
+	task->parent = NULL;
+
+	if (task->scsi.lun == NULL) {
+		spdk_scsi_task_process_null_lun(&task->scsi);
+		spdk_iscsi_task_cpl(&task->scsi);
+		return 0;
+	}
+
+	/* no bi-directional support */
+	if (R_bit) {
+		return spdk_iscsi_op_scsi_read(conn, task);
+	} else if (W_bit) {
+		task->scsi.dxfer_dir = SPDK_SCSI_DIR_TO_DEV;
+
+		if ((conn->sess->ErrorRecoveryLevel >= 1) &&
+		    (spdk_iscsi_compare_pdu_bhs_within_existed_r2t_tasks(conn, pdu))) {
+			spdk_iscsi_task_response(conn, task);
+			spdk_iscsi_task_put(task);
+			return 0;
+		}
+
+		if (pdu->data_segment_len > transfer_len) {
+			SPDK_ERRLOG("data segment len(=%d) > task transfer len(=%d)\n",
+				    (int)pdu->data_segment_len, transfer_len);
+			spdk_iscsi_task_put(task);
+			rc = spdk_iscsi_reject(conn, pdu,
+					       ISCSI_REASON_PROTOCOL_ERROR);
+			if (rc < 0) {
+				SPDK_ERRLOG("iscsi_reject() failed\n");
+			}
+			return rc;
+		}
+
+		/* check the ImmediateData and also pdu->data_segment_len */
+		if ((!conn->sess->ImmediateData && (pdu->data_segment_len > 0)) ||
+		    (pdu->data_segment_len > conn->sess->FirstBurstLength)) {
+			spdk_iscsi_task_put(task);
+			rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+			if (rc < 0) {
+				SPDK_ERRLOG("iscsi_reject() failed\n");
+			}
+			return rc;
+		}
+
+		if (F_bit && pdu->data_segment_len < transfer_len) {
+			/* needs R2T */
+			rc = spdk_add_transfer_task(conn, task);
+			if (rc < 0) {
+				SPDK_ERRLOG("add_transfer_task() failed\n");
+				spdk_iscsi_task_put(task);
+				return SPDK_ISCSI_CONNECTION_FATAL;
+			}
+
+			/* Non-immediate writes */
+			if (pdu->data_segment_len == 0) {
+				return 0;
+			} else {
+				/* we are doing the first partial write task */
+				task->scsi.ref++;
+				spdk_scsi_task_set_data(&task->scsi, pdu->data, pdu->data_segment_len);
+				task->scsi.length = pdu->data_segment_len;
+			}
+		}
+
+		if (pdu->data_segment_len == transfer_len) {
+			/* we are doing small writes with no R2T */
+			spdk_scsi_task_set_data(&task->scsi, pdu->data, transfer_len);
+			task->scsi.length = transfer_len;
+		}
+	} else {
+		/* neither R nor W bit set */
+		task->scsi.dxfer_dir = SPDK_SCSI_DIR_NONE;
+		if (transfer_len > 0) {
+			spdk_iscsi_task_put(task);
+			SPDK_ERRLOG("Reject scsi cmd with EDTL > 0 but (R | W) == 0\n");
+			return spdk_iscsi_reject(conn, pdu, ISCSI_REASON_INVALID_PDU_FIELD);
+		}
+	}
+
+	spdk_iscsi_queue_task(conn, task);
+	return 0;
+}
+
+void
+spdk_iscsi_task_mgmt_response(struct spdk_iscsi_conn *conn,
+			      struct spdk_iscsi_task *task)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_task_req *reqh;
+	struct iscsi_bhs_task_resp *rsph;
+
+	if (task->pdu == NULL) {
+		/*
+		 * This was an internally generated task management command,
+		 *  usually from LUN cleanup when a connection closes.
+		 */
+		return;
+	}
+
+	reqh = (struct iscsi_bhs_task_req *)&task->pdu->bhs;
+	/* response PDU */
+	rsp_pdu = spdk_get_pdu();
+	rsph = (struct iscsi_bhs_task_resp *)&rsp_pdu->bhs;
+	rsph->opcode = ISCSI_OP_TASK_RSP;
+	rsph->flags |= 0x80; /* bit 0 default to 1 */
+	switch (task->scsi.response) {
+	case SPDK_SCSI_TASK_MGMT_RESP_COMPLETE:
+		rsph->response = ISCSI_TASK_FUNC_RESP_COMPLETE;
+		break;
+	case SPDK_SCSI_TASK_MGMT_RESP_SUCCESS:
+		rsph->response = ISCSI_TASK_FUNC_RESP_COMPLETE;
+		break;
+	case SPDK_SCSI_TASK_MGMT_RESP_REJECT:
+		rsph->response = ISCSI_TASK_FUNC_REJECTED;
+		break;
+	case SPDK_SCSI_TASK_MGMT_RESP_INVALID_LUN:
+		rsph->response = ISCSI_TASK_FUNC_RESP_LUN_NOT_EXIST;
+		break;
+	case SPDK_SCSI_TASK_MGMT_RESP_TARGET_FAILURE:
+		rsph->response = ISCSI_TASK_FUNC_REJECTED;
+		break;
+	case SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED:
+		rsph->response = ISCSI_TASK_FUNC_RESP_FUNC_NOT_SUPPORTED;
+		break;
+	}
+	rsph->itt = reqh->itt;
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (reqh->immediate == 0) {
+		conn->sess->MaxCmdSN++;
+	}
+
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+}
+
+void spdk_iscsi_task_response(struct spdk_iscsi_conn *conn,
+			      struct spdk_iscsi_task *task)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_scsi_resp *rsph;
+	uint32_t task_tag;
+	uint32_t transfer_len;
+	size_t residual_len;
+	size_t data_len;
+	int O_bit, U_bit;
+	int rc;
+	struct spdk_iscsi_task *primary;
+
+	primary = spdk_iscsi_task_get_primary(task);
+
+	transfer_len = primary->scsi.transfer_len;
+	task_tag = task->tag;
+
+	/* transfer data from logical unit */
+	/* (direction is view of initiator side) */
+	if (spdk_iscsi_task_is_read(primary)) {
+		rc = spdk_iscsi_transfer_in(conn, task);
+		if (rc > 0) {
+			/* sent status by last DATAIN PDU */
+			return;
+		}
+
+		if (primary->bytes_completed != primary->scsi.transfer_len) {
+			return;
+		}
+	}
+
+	O_bit = U_bit = 0;
+	residual_len = 0;
+	data_len = primary->scsi.data_transferred;
+
+	if ((transfer_len != 0) &&
+	    (task->scsi.status == SPDK_SCSI_STATUS_GOOD)) {
+		if (data_len < transfer_len) {
+			/* underflow */
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Underflow %zu/%u\n", data_len, transfer_len);
+			residual_len = transfer_len - data_len;
+			U_bit = 1;
+		} else if (data_len > transfer_len) {
+			/* overflow */
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Overflow %zu/%u\n", data_len, transfer_len);
+			residual_len = data_len - transfer_len;
+			O_bit = 1;
+		} else {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Transfer %u\n", transfer_len);
+		}
+	}
+
+	/* response PDU */
+	rsp_pdu = spdk_get_pdu();
+	assert(rsp_pdu != NULL);
+	rsph = (struct iscsi_bhs_scsi_resp *)&rsp_pdu->bhs;
+	assert(task->scsi.sense_data_len <= sizeof(rsp_pdu->sense.data));
+	memcpy(rsp_pdu->sense.data, task->scsi.sense_data, task->scsi.sense_data_len);
+	to_be16(&rsp_pdu->sense.length, task->scsi.sense_data_len);
+	rsp_pdu->data = (uint8_t *)&rsp_pdu->sense;
+	rsp_pdu->data_from_mempool = true;
+
+	/*
+	 * we need to hold onto this task/cmd because until the
+	 * PDU has been written out
+	 */
+	rsp_pdu->task = task;
+	task->scsi.ref++;
+
+	rsph->opcode = ISCSI_OP_SCSI_RSP;
+	rsph->flags |= 0x80; /* bit 0 is default to 1 */
+
+	if (O_bit) {
+		rsph->flags |= ISCSI_SCSI_OVERFLOW;
+	}
+
+	if (U_bit) {
+		rsph->flags |= ISCSI_SCSI_UNDERFLOW;
+	}
+
+	rsph->status = task->scsi.status;
+	if (task->scsi.sense_data_len) {
+		/* SenseLength (2 bytes) + SenseData  */
+		DSET24(rsph->data_segment_len, 2 + task->scsi.sense_data_len);
+	}
+	to_be32(&rsph->itt, task_tag);
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (!spdk_iscsi_task_is_immediate(primary)) {
+		conn->sess->MaxCmdSN++;
+	}
+
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	to_be32(&rsph->bi_read_res_cnt, 0);
+	to_be32(&rsph->res_cnt, residual_len);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+}
+
+static struct spdk_iscsi_task *
+spdk_get_transfer_task(struct spdk_iscsi_conn *conn, uint32_t transfer_tag)
+{
+	int i;
+
+	for (i = 0; i < conn->pending_r2t; i++) {
+		if (conn->outstanding_r2t_tasks[i]->ttt == transfer_tag) {
+			return (conn->outstanding_r2t_tasks[i]);
+		}
+	}
+
+	return NULL;
+}
+
+static int
+spdk_iscsi_op_task(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	struct iscsi_bhs_task_req *reqh;
+	uint64_t lun;
+	uint32_t task_tag;
+	uint32_t ref_task_tag;
+	uint8_t function;
+	int lun_i;
+	struct spdk_iscsi_task *task;
+	struct spdk_scsi_dev *dev;
+
+	if (conn->sess->session_type != SESSION_TYPE_NORMAL) {
+		SPDK_ERRLOG("ISCSI_OP_TASK not allowed in discovery and invalid session\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	reqh = (struct iscsi_bhs_task_req *)&pdu->bhs;
+	function = reqh->flags & ISCSI_TASK_FUNCTION_MASK;
+	lun = from_be64(&reqh->lun);
+	task_tag = from_be32(&reqh->itt);
+	ref_task_tag = from_be32(&reqh->ref_task_tag);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "I=%d, func=%d, ITT=%x, ref TT=%x, LUN=0x%16.16"PRIx64"\n",
+		      reqh->immediate, function, task_tag, ref_task_tag, lun);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+		      conn->StatSN, conn->sess->ExpCmdSN, conn->sess->MaxCmdSN);
+
+	lun_i = spdk_islun2lun(lun);
+	dev = conn->dev;
+
+	task = spdk_iscsi_task_get(conn, NULL, spdk_iscsi_task_mgmt_cpl);
+	if (!task) {
+		SPDK_ERRLOG("Unable to acquire task\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	spdk_iscsi_task_associate_pdu(task, pdu);
+	task->scsi.target_port = conn->target_port;
+	task->scsi.initiator_port = conn->initiator_port;
+	task->tag = task_tag;
+	task->scsi.lun = spdk_scsi_dev_get_lun(dev, lun_i);
+
+	switch (function) {
+	/* abort task identified by Referenced Task Tag field */
+	case ISCSI_TASK_FUNC_ABORT_TASK:
+		SPDK_NOTICELOG("ABORT_TASK\n");
+
+		task->scsi.abort_id = ref_task_tag;
+
+		spdk_iscsi_queue_mgmt_task(conn, task, SPDK_SCSI_TASK_FUNC_ABORT_TASK);
+		spdk_del_transfer_task(conn, ref_task_tag);
+
+		return SPDK_SUCCESS;
+
+	/* abort all tasks issued via this session on the LUN */
+	case ISCSI_TASK_FUNC_ABORT_TASK_SET:
+		SPDK_NOTICELOG("ABORT_TASK_SET\n");
+
+		spdk_iscsi_queue_mgmt_task(conn, task, SPDK_SCSI_TASK_FUNC_ABORT_TASK_SET);
+		spdk_clear_all_transfer_task(conn, task->scsi.lun);
+
+		return SPDK_SUCCESS;
+
+	case ISCSI_TASK_FUNC_CLEAR_TASK_SET:
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		SPDK_NOTICELOG("CLEAR_TASK_SET (Unsupported)\n");
+		break;
+
+	case ISCSI_TASK_FUNC_CLEAR_ACA:
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		SPDK_NOTICELOG("CLEAR_ACA (Unsupported)\n");
+		break;
+
+	case ISCSI_TASK_FUNC_LOGICAL_UNIT_RESET:
+		SPDK_NOTICELOG("LOGICAL_UNIT_RESET\n");
+
+		spdk_iscsi_queue_mgmt_task(conn, task, SPDK_SCSI_TASK_FUNC_LUN_RESET);
+		spdk_clear_all_transfer_task(conn, task->scsi.lun);
+		return SPDK_SUCCESS;
+
+	case ISCSI_TASK_FUNC_TARGET_WARM_RESET:
+		SPDK_NOTICELOG("TARGET_WARM_RESET (Unsupported)\n");
+
+#if 0
+		spdk_iscsi_drop_conns(conn, conn->initiator_name, 1 /* drop all */);
+		rc = spdk_iscsi_tgt_node_reset(conn->sess->target, lun);
+		if (rc < 0) {
+			SPDK_ERRLOG("tgt_node reset failed\n");
+		}
+#else
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+#endif
+		break;
+
+	case ISCSI_TASK_FUNC_TARGET_COLD_RESET:
+		SPDK_NOTICELOG("TARGET_COLD_RESET\n");
+
+#if 0
+		spdk_iscsi_drop_conns(conn, conn->initiator_name, 1 /* drop all */);
+
+		rc = spdk_iscsi_tgt_node_reset(conn->sess->target, lun);
+		if (rc < 0) {
+			SPDK_ERRLOG("tgt_node reset failed\n");
+		}
+
+		conn->state = ISCSI_CONN_STATE_EXITING;
+#else
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+#endif
+		break;
+
+	case ISCSI_TASK_FUNC_TASK_REASSIGN:
+		SPDK_NOTICELOG("TASK_REASSIGN (Unsupported)\n");
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		break;
+
+	default:
+		SPDK_ERRLOG("unsupported function %d\n", function);
+		task->scsi.response = SPDK_SCSI_TASK_MGMT_RESP_REJECT;
+		break;
+	}
+
+	spdk_iscsi_task_mgmt_response(conn, task);
+	spdk_iscsi_task_put(task);
+	return 0;
+}
+
+static int
+spdk_iscsi_op_nopout(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_nop_out *reqh;
+	struct iscsi_bhs_nop_in *rsph;
+	uint8_t *data;
+	uint64_t lun;
+	uint32_t task_tag;
+	uint32_t transfer_tag;
+	uint32_t CmdSN;
+	int I_bit;
+	int data_len;
+
+	if (conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		SPDK_ERRLOG("ISCSI_OP_NOPOUT not allowed in discovery session\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	reqh = (struct iscsi_bhs_nop_out *)&pdu->bhs;
+	I_bit = reqh->immediate;
+
+	data_len = DGET24(reqh->data_segment_len);
+	if (data_len > conn->MaxRecvDataSegmentLength) {
+		data_len = conn->MaxRecvDataSegmentLength;
+	}
+
+	lun = from_be64(&reqh->lun);
+	task_tag = from_be32(&reqh->itt);
+	transfer_tag = from_be32(&reqh->ttt);
+	CmdSN = from_be32(&reqh->cmd_sn);
+	pdu->cmd_sn = CmdSN;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "I=%d, ITT=%x, TTT=%x\n",
+		      I_bit, task_tag, transfer_tag);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "CmdSN=%u, StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+		      CmdSN, conn->StatSN, conn->sess->ExpCmdSN,
+		      conn->sess->MaxCmdSN);
+
+	if (transfer_tag != 0xFFFFFFFF && transfer_tag != (uint32_t)conn->id) {
+		SPDK_ERRLOG("invalid transfer tag 0x%x\n", transfer_tag);
+		/*
+		 * Technically we should probably fail the connection here, but for now
+		 *  just print the error message and continue.
+		 */
+	}
+
+	/*
+	 * We don't actually check to see if this is a response to the NOP-In
+	 * that we sent.  Our goal is to just verify that the initiator is
+	 * alive and responding to commands, not to verify that it tags
+	 * NOP-Outs correctly
+	 */
+	conn->nop_outstanding = false;
+
+	if (task_tag == 0xffffffffU) {
+		if (I_bit == 1) {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "got NOPOUT ITT=0xffffffff\n");
+			return SPDK_SUCCESS;
+		} else {
+			SPDK_ERRLOG("got NOPOUT ITT=0xffffffff, I=0\n");
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	data = calloc(1, data_len);
+	if (!data) {
+		SPDK_ERRLOG("calloc() failed for ping data\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	/* response of NOPOUT */
+	if (data_len > 0) {
+		/* copy ping data */
+		memcpy(data, pdu->data, data_len);
+	}
+
+	transfer_tag = 0xffffffffU;
+
+	/* response PDU */
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		free(data);
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	rsph = (struct iscsi_bhs_nop_in *)&rsp_pdu->bhs;
+	rsp_pdu->data = data;
+	rsph->opcode = ISCSI_OP_NOPIN;
+	rsph->flags |= 0x80; /* bit 0 default to 1 */
+	DSET24(rsph->data_segment_len, data_len);
+	to_be64(&rsph->lun, lun);
+	to_be32(&rsph->itt, task_tag);
+	to_be32(&rsph->ttt, transfer_tag);
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	conn->StatSN++;
+
+	if (I_bit == 0) {
+		conn->sess->MaxCmdSN++;
+	}
+
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+	conn->last_nopin = spdk_get_ticks();
+
+	return SPDK_SUCCESS;
+}
+
+static int
+spdk_add_transfer_task(struct spdk_iscsi_conn *conn,
+		       struct spdk_iscsi_task *task)
+{
+	uint32_t transfer_len;
+	size_t max_burst_len;
+	size_t segment_len;
+	size_t data_len;
+	int len;
+	int idx;
+	int rc;
+	int data_out_req;
+
+	transfer_len = task->scsi.transfer_len;
+	data_len = spdk_iscsi_task_get_pdu(task)->data_segment_len;
+	max_burst_len = conn->sess->MaxBurstLength;
+	segment_len = SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH;
+	data_out_req = 1 + (transfer_len - data_len - 1) / segment_len;
+	task->data_out_cnt = data_out_req;
+
+	/*
+	 * If we already have too many tasks using R2T, then queue this task
+	 *  and start sending R2T for it after some of the tasks using R2T/data
+	 *  out buffers complete.
+	 */
+	if (conn->pending_r2t >= DEFAULT_MAXR2T) {
+		TAILQ_INSERT_TAIL(&conn->queued_r2t_tasks, task, link);
+		return SPDK_SUCCESS;
+	}
+
+	conn->data_out_cnt += data_out_req;
+	idx = conn->pending_r2t++;
+
+	conn->outstanding_r2t_tasks[idx] = task;
+	task->next_expected_r2t_offset = data_len;
+	task->current_r2t_length = 0;
+	task->R2TSN = 0;
+	/* According to RFC3720 10.8.5, 0xffffffff is
+	 * reserved for TTT in R2T.
+	 */
+	if (++conn->ttt == 0xffffffffu) {
+		conn->ttt = 0;
+	}
+	task->ttt = conn->ttt;
+
+	while (data_len != transfer_len) {
+		len = DMIN32(max_burst_len, (transfer_len - data_len));
+		rc = spdk_iscsi_send_r2t(conn, task, data_len, len,
+					 task->ttt, &task->R2TSN);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_send_r2t() failed\n");
+			return rc;
+		}
+		data_len += len;
+		task->next_r2t_offset = data_len;
+		task->outstanding_r2t++;
+		if (conn->sess->MaxOutstandingR2T == task->outstanding_r2t) {
+			break;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&conn->active_r2t_tasks, task, link);
+	return SPDK_SUCCESS;
+}
+
+/* If there are additional large writes queued for R2Ts, start them now.
+ *  This is called when a large write is just completed or when multiple LUNs
+ *  are attached and large write tasks for the specific LUN are cleared.
+ */
+static void
+spdk_start_queued_transfer_tasks(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_task *task, *tmp;
+
+	TAILQ_FOREACH_SAFE(task, &conn->queued_r2t_tasks, link, tmp) {
+		if (conn->pending_r2t < DEFAULT_MAXR2T) {
+			TAILQ_REMOVE(&conn->queued_r2t_tasks, task, link);
+			spdk_add_transfer_task(conn, task);
+		} else {
+			break;
+		}
+	}
+}
+
+void spdk_del_transfer_task(struct spdk_iscsi_conn *conn, uint32_t task_tag)
+{
+	struct spdk_iscsi_task *task;
+	int i;
+
+	for (i = 0; i < conn->pending_r2t; i++) {
+		if (conn->outstanding_r2t_tasks[i]->tag == task_tag) {
+			task = conn->outstanding_r2t_tasks[i];
+			conn->data_out_cnt -= task->data_out_cnt;
+
+			conn->pending_r2t--;
+			for (; i < conn->pending_r2t; i++) {
+				conn->outstanding_r2t_tasks[i] = conn->outstanding_r2t_tasks[i + 1];
+			}
+			conn->outstanding_r2t_tasks[conn->pending_r2t] = NULL;
+			break;
+		}
+	}
+
+	spdk_start_queued_transfer_tasks(conn);
+}
+
+static void
+spdk_del_connection_queued_task(struct spdk_iscsi_conn *conn, void *tailq,
+				struct spdk_scsi_lun *lun)
+{
+	struct spdk_iscsi_task *task, *task_tmp;
+	/*
+	 * Temporary used to index spdk_scsi_task related
+	 *  queues of the connection.
+	 */
+	TAILQ_HEAD(queued_tasks, spdk_iscsi_task) *head;
+	head = (struct queued_tasks *)tailq;
+
+	TAILQ_FOREACH_SAFE(task, head, link, task_tmp) {
+		if (lun == NULL || lun == task->scsi.lun) {
+			TAILQ_REMOVE(head, task, link);
+			if (lun != NULL && spdk_scsi_lun_is_removing(lun)) {
+				spdk_scsi_task_process_null_lun(&task->scsi);
+				spdk_iscsi_task_response(conn, task);
+			}
+			spdk_iscsi_task_put(task);
+		}
+	}
+}
+
+void spdk_clear_all_transfer_task(struct spdk_iscsi_conn *conn,
+				  struct spdk_scsi_lun *lun)
+{
+	int i, j, pending_r2t;
+	struct spdk_iscsi_task *task;
+
+	pending_r2t = conn->pending_r2t;
+	for (i = 0; i < pending_r2t; i++) {
+		task = conn->outstanding_r2t_tasks[i];
+		if (lun == NULL || lun == task->scsi.lun) {
+			conn->outstanding_r2t_tasks[i] = NULL;
+			task->outstanding_r2t = 0;
+			task->next_r2t_offset = 0;
+			task->next_expected_r2t_offset = 0;
+			conn->data_out_cnt -= task->data_out_cnt;
+			conn->pending_r2t--;
+		}
+	}
+
+	for (i = 0; i < pending_r2t; i++) {
+		if (conn->outstanding_r2t_tasks[i] != NULL) {
+			continue;
+		}
+		for (j = i + 1; j < pending_r2t; j++) {
+			if (conn->outstanding_r2t_tasks[j] != NULL) {
+				conn->outstanding_r2t_tasks[i] = conn->outstanding_r2t_tasks[j];
+				conn->outstanding_r2t_tasks[j] = NULL;
+				break;
+			}
+		}
+	}
+
+	spdk_del_connection_queued_task(conn, &conn->active_r2t_tasks, lun);
+	spdk_del_connection_queued_task(conn, &conn->queued_r2t_tasks, lun);
+
+	spdk_start_queued_transfer_tasks(conn);
+}
+
+/* This function is used to handle the r2t snack */
+static int
+spdk_iscsi_handle_r2t_snack(struct spdk_iscsi_conn *conn,
+			    struct spdk_iscsi_task *task,
+			    struct spdk_iscsi_pdu *pdu, uint32_t beg_run,
+			    uint32_t run_length, int32_t task_tag)
+{
+	int32_t last_r2tsn;
+	int i;
+
+	if (beg_run < task->acked_r2tsn) {
+		SPDK_ERRLOG("ITT: 0x%08x, R2T SNACK requests retransmission of"
+			    "R2TSN: from 0x%08x to 0x%08x. But it has already"
+			    "ack to R2TSN:0x%08x, protocol error.\n",
+			    task_tag, beg_run, (beg_run + run_length),
+			    (task->acked_r2tsn - 1));
+		return spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+	}
+
+	if (run_length) {
+		if ((beg_run + run_length) > task->R2TSN) {
+			SPDK_ERRLOG("ITT: 0x%08x, received R2T SNACK with"
+				    "BegRun: 0x%08x, RunLength: 0x%08x, exceeds"
+				    "current R2TSN: 0x%08x, protocol error.\n",
+				    task_tag, beg_run, run_length,
+				    task->R2TSN);
+
+			return spdk_iscsi_reject(conn, pdu,
+						 ISCSI_REASON_INVALID_PDU_FIELD);
+		}
+		last_r2tsn = (beg_run + run_length);
+	} else {
+		last_r2tsn = task->R2TSN;
+	}
+
+	for (i = beg_run; i < last_r2tsn; i++) {
+		if (spdk_iscsi_send_r2t_recovery(conn, task, i, false) < 0) {
+			SPDK_ERRLOG("The r2t_sn=%d of r2t_task=%p is not sent\n", i, task);
+		}
+	}
+	return 0;
+}
+
+/* This function is used to recover the data in packet */
+static int
+spdk_iscsi_handle_recovery_datain(struct spdk_iscsi_conn *conn,
+				  struct spdk_iscsi_task *task,
+				  struct spdk_iscsi_pdu *pdu, uint32_t beg_run,
+				  uint32_t run_length, uint32_t task_tag)
+{
+	struct spdk_iscsi_pdu *old_pdu, *pdu_temp;
+	uint32_t i;
+	struct iscsi_bhs_data_in *datain_header;
+	uint32_t last_statsn;
+
+	task = spdk_iscsi_task_get_primary(task);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_handle_recovery_datain\n");
+
+	if (beg_run < task->acked_data_sn) {
+		SPDK_ERRLOG("ITT: 0x%08x, DATA IN SNACK requests retransmission of"
+			    "DATASN: from 0x%08x to 0x%08x but already acked to "
+			    "DATASN: 0x%08x protocol error\n",
+			    task_tag, beg_run,
+			    (beg_run + run_length), (task->acked_data_sn - 1));
+
+		return spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+	}
+
+	if (run_length == 0) {
+		/* as the DataSN begins at 0 */
+		run_length = task->datain_datasn + 1;
+	}
+
+	if ((beg_run + run_length - 1) > task->datain_datasn) {
+		SPDK_ERRLOG("Initiator requests BegRun: 0x%08x, RunLength:"
+			    "0x%08x greater than maximum DataSN: 0x%08x.\n",
+			    beg_run, run_length, task->datain_datasn);
+
+		return -1;
+	} else {
+		last_statsn = beg_run + run_length - 1;
+	}
+
+	for (i = beg_run; i <= last_statsn; i++) {
+		TAILQ_FOREACH_SAFE(old_pdu, &conn->snack_pdu_list, tailq, pdu_temp) {
+			if (old_pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
+				datain_header = (struct iscsi_bhs_data_in *)&old_pdu->bhs;
+				if (from_be32(&datain_header->itt) == task_tag &&
+				    from_be32(&datain_header->data_sn) == i) {
+					TAILQ_REMOVE(&conn->snack_pdu_list, old_pdu, tailq);
+					spdk_iscsi_conn_write_pdu(conn, old_pdu);
+					break;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+/* This function is used to handle the status snack */
+static int
+spdk_iscsi_handle_status_snack(struct spdk_iscsi_conn *conn,
+			       struct spdk_iscsi_pdu *pdu)
+{
+	uint32_t beg_run;
+	uint32_t run_length;
+	struct iscsi_bhs_snack_req *reqh;
+	uint32_t i;
+	uint32_t last_statsn;
+	bool found_pdu;
+	struct spdk_iscsi_pdu *old_pdu;
+
+	reqh = (struct iscsi_bhs_snack_req *)&pdu->bhs;
+	beg_run = from_be32(&reqh->beg_run);
+	run_length = from_be32(&reqh->run_len);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "beg_run=%d, run_length=%d, conn->StatSN="
+		      "%d, conn->exp_statsn=%d\n", beg_run, run_length,
+		      conn->StatSN, conn->exp_statsn);
+
+	if (!beg_run) {
+		beg_run = conn->exp_statsn;
+	} else if (beg_run < conn->exp_statsn) {
+		SPDK_ERRLOG("Got Status SNACK Begrun: 0x%08x, RunLength: 0x%08x "
+			    "but already got ExpStatSN: 0x%08x on CID:%hu.\n",
+			    beg_run, run_length, conn->StatSN, conn->cid);
+
+		return spdk_iscsi_reject(conn, pdu, ISCSI_REASON_INVALID_PDU_FIELD);
+	}
+
+	last_statsn = (!run_length) ? conn->StatSN : (beg_run + run_length);
+
+	for (i = beg_run; i < last_statsn; i++) {
+		found_pdu = false;
+		TAILQ_FOREACH(old_pdu, &conn->snack_pdu_list, tailq) {
+			if (from_be32(&old_pdu->bhs.stat_sn) == i) {
+				found_pdu = true;
+				break;
+			}
+		}
+
+		if (!found_pdu) {
+			SPDK_ERRLOG("Unable to find StatSN: 0x%08x. For a Status"
+				    "SNACK, assuming this is a proactive SNACK "
+				    "for an untransmitted StatSN, ignoring.\n",
+				    beg_run);
+		} else {
+			TAILQ_REMOVE(&conn->snack_pdu_list, old_pdu, tailq);
+			spdk_iscsi_conn_write_pdu(conn, old_pdu);
+		}
+	}
+
+	return 0;
+}
+
+/* This function is used to handle the data ack snack */
+static int
+spdk_iscsi_handle_data_ack(struct spdk_iscsi_conn *conn,
+			   struct spdk_iscsi_pdu *pdu)
+{
+	uint32_t transfer_tag;
+	uint32_t beg_run;
+	uint32_t run_length;
+	struct spdk_iscsi_pdu *old_pdu;
+	uint32_t old_datasn;
+	int rc;
+	struct iscsi_bhs_snack_req *reqh;
+	struct spdk_iscsi_task *task;
+	struct iscsi_bhs_data_in *datain_header;
+	struct spdk_iscsi_task *primary;
+
+	reqh = (struct iscsi_bhs_snack_req *)&pdu->bhs;
+	transfer_tag = from_be32(&reqh->ttt);
+	beg_run = from_be32(&reqh->beg_run);
+	run_length = from_be32(&reqh->run_len);
+	task = NULL;
+	datain_header = NULL;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "beg_run=%d,transfer_tag=%d,run_len=%d\n",
+		      beg_run, transfer_tag, run_length);
+
+	task = spdk_get_scsi_task_from_ttt(conn, transfer_tag);
+	if (!task) {
+		SPDK_ERRLOG("Data ACK SNACK for TTT: 0x%08x is invalid.\n",
+			    transfer_tag);
+		goto reject_return;
+	}
+
+	primary = spdk_iscsi_task_get_primary(task);
+	if ((run_length != 0) || (beg_run < primary->acked_data_sn)) {
+		SPDK_ERRLOG("TTT: 0x%08x Data ACK SNACK BegRUN: %d is less than "
+			    "the next expected acked DataSN: %d\n",
+			    transfer_tag, beg_run, primary->acked_data_sn);
+		goto reject_return;
+	}
+
+	primary->acked_data_sn = beg_run;
+
+	/* To free the pdu */
+	TAILQ_FOREACH(old_pdu, &conn->snack_pdu_list, tailq) {
+		if (old_pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
+			datain_header = (struct iscsi_bhs_data_in *) &old_pdu->bhs;
+			old_datasn = from_be32(&datain_header->data_sn);
+			if ((from_be32(&datain_header->ttt) == transfer_tag) &&
+			    (old_datasn == beg_run - 1)) {
+				TAILQ_REMOVE(&conn->snack_pdu_list, old_pdu, tailq);
+				if (old_pdu->task) {
+					spdk_iscsi_task_put(old_pdu->task);
+				}
+				spdk_put_pdu(old_pdu);
+				break;
+			}
+		}
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Received Data ACK SNACK for TTT: 0x%08x,"
+		      " updated acked DataSN to 0x%08x.\n", transfer_tag,
+		      (task->acked_data_sn - 1));
+
+	return 0;
+
+reject_return:
+	rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_INVALID_SNACK);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_reject() failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/* This function is used to remove the r2t pdu from snack_pdu_list by < task, r2t_sn> info */
+static struct spdk_iscsi_pdu *
+spdk_iscsi_remove_r2t_pdu_from_snack_list(struct spdk_iscsi_conn *conn,
+		struct spdk_iscsi_task *task,
+		uint32_t r2t_sn)
+{
+	struct spdk_iscsi_pdu *pdu;
+	struct iscsi_bhs_r2t *r2t_header;
+
+	TAILQ_FOREACH(pdu, &conn->snack_pdu_list, tailq) {
+		if (pdu->bhs.opcode == ISCSI_OP_R2T) {
+			r2t_header = (struct iscsi_bhs_r2t *)&pdu->bhs;
+			if (pdu->task == task &&
+			    from_be32(&r2t_header->r2t_sn) == r2t_sn) {
+				TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
+				return pdu;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/* This function is used re-send the r2t packet */
+static int
+spdk_iscsi_send_r2t_recovery(struct spdk_iscsi_conn *conn,
+			     struct spdk_iscsi_task *task, uint32_t r2t_sn,
+			     bool send_new_r2tsn)
+{
+	struct spdk_iscsi_pdu *pdu;
+	struct iscsi_bhs_r2t *rsph;
+	uint32_t transfer_len;
+	uint32_t len;
+	int rc;
+
+	/* remove the r2t pdu from the snack_list */
+	pdu = spdk_iscsi_remove_r2t_pdu_from_snack_list(conn, task, r2t_sn);
+	if (!pdu) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "No pdu is found\n");
+		return -1;
+	}
+
+	/* flag
+	 * false: only need to re-send the old r2t with changing statsn
+	 * true: we send a r2t with new r2tsn
+	 */
+	if (!send_new_r2tsn) {
+		to_be32(&pdu->bhs.stat_sn, conn->StatSN);
+		spdk_iscsi_conn_write_pdu(conn, pdu);
+	} else {
+		rsph = (struct iscsi_bhs_r2t *)&pdu->bhs;
+		transfer_len = from_be32(&rsph->desired_xfer_len);
+
+		/* still need to increase the acked r2tsn */
+		task->acked_r2tsn++;
+		len = DMIN32(conn->sess->MaxBurstLength, (transfer_len -
+				task->next_expected_r2t_offset));
+
+		/* remove the old_r2t_pdu */
+		if (pdu->task) {
+			spdk_iscsi_task_put(pdu->task);
+		}
+		spdk_put_pdu(pdu);
+
+		/* re-send a new r2t pdu */
+		rc = spdk_iscsi_send_r2t(conn, task, task->next_expected_r2t_offset,
+					 len, task->ttt, &task->R2TSN);
+		if (rc < 0) {
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	return 0;
+}
+
+/* This function is used to handle the snack request from the initiator */
+static int
+spdk_iscsi_op_snack(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	struct iscsi_bhs_snack_req *reqh;
+	struct spdk_iscsi_task *task;
+	int type;
+	uint32_t task_tag;
+	uint32_t beg_run;
+	uint32_t run_length;
+	int rc;
+
+	if (conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		SPDK_ERRLOG("ISCSI_OP_SNACK not allowed in  discovery session\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	reqh = (struct iscsi_bhs_snack_req *)&pdu->bhs;
+	if (!conn->sess->ErrorRecoveryLevel) {
+		SPDK_ERRLOG("Got a SNACK request in ErrorRecoveryLevel=0\n");
+		rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_reject() failed\n");
+			return -1;
+		}
+		return rc;
+	}
+
+	type = reqh->flags & ISCSI_FLAG_SNACK_TYPE_MASK;
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "The value of type is %d\n", type);
+
+	switch (type) {
+	case 0:
+		reqh = (struct iscsi_bhs_snack_req *)&pdu->bhs;
+		task_tag = from_be32(&reqh->itt);
+		beg_run = from_be32(&reqh->beg_run);
+		run_length = from_be32(&reqh->run_len);
+
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "beg_run=%d, run_length=%d, "
+			      "task_tag=%x, transfer_tag=%u\n", beg_run,
+			      run_length, task_tag, from_be32(&reqh->ttt));
+
+		task = spdk_get_scsi_task_from_itt(conn, task_tag,
+						   ISCSI_OP_SCSI_DATAIN);
+		if (task) {
+			return spdk_iscsi_handle_recovery_datain(conn, task, pdu,
+					beg_run, run_length, task_tag);
+		}
+		task = spdk_get_scsi_task_from_itt(conn, task_tag, ISCSI_OP_R2T);
+		if (task) {
+			return spdk_iscsi_handle_r2t_snack(conn, task, pdu, beg_run,
+							   run_length, task_tag);
+		}
+		SPDK_ERRLOG("It is Neither datain nor r2t recovery request\n");
+		rc = -1;
+		break;
+	case ISCSI_FLAG_SNACK_TYPE_STATUS:
+		rc = spdk_iscsi_handle_status_snack(conn, pdu);
+		break;
+	case ISCSI_FLAG_SNACK_TYPE_DATA_ACK:
+		rc = spdk_iscsi_handle_data_ack(conn, pdu);
+		break;
+	case ISCSI_FLAG_SNACK_TYPE_RDATA:
+		SPDK_ERRLOG("R-Data SNACK is Not Supported int spdk\n");
+		rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+		break;
+	default:
+		SPDK_ERRLOG("Unknown SNACK type %d, protocol error\n", type);
+		rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+		break;
+	}
+
+	return rc;
+}
+
+/* This function is used to refree the pdu when it is acknowledged */
+static void
+spdk_remove_acked_pdu(struct spdk_iscsi_conn *conn,
+		      uint32_t ExpStatSN)
+{
+	struct spdk_iscsi_pdu *pdu, *pdu_temp;
+	uint32_t stat_sn;
+
+	conn->exp_statsn = DMIN32(ExpStatSN, conn->StatSN);
+	TAILQ_FOREACH_SAFE(pdu, &conn->snack_pdu_list, tailq, pdu_temp) {
+		stat_sn = from_be32(&pdu->bhs.stat_sn);
+		if (SN32_LT(stat_sn, conn->exp_statsn)) {
+			TAILQ_REMOVE(&conn->snack_pdu_list, pdu, tailq);
+			spdk_iscsi_conn_free_pdu(conn, pdu);
+		}
+	}
+}
+
+static int spdk_iscsi_op_data(struct spdk_iscsi_conn *conn,
+			      struct spdk_iscsi_pdu *pdu)
+{
+	struct spdk_iscsi_task	*task, *subtask;
+	struct iscsi_bhs_data_out *reqh;
+	struct spdk_scsi_lun	*lun_dev;
+	uint32_t transfer_tag;
+	uint32_t task_tag;
+	uint32_t transfer_len;
+	uint32_t DataSN;
+	uint32_t buffer_offset;
+	uint32_t len;
+	int F_bit;
+	int rc;
+	int reject_reason = ISCSI_REASON_INVALID_PDU_FIELD;
+
+	if (conn->sess->session_type == SESSION_TYPE_DISCOVERY) {
+		SPDK_ERRLOG("ISCSI_OP_SCSI_DATAOUT not allowed in discovery session\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	reqh = (struct iscsi_bhs_data_out *)&pdu->bhs;
+	F_bit = !!(reqh->flags & ISCSI_FLAG_FINAL);
+	transfer_tag = from_be32(&reqh->ttt);
+	task_tag = from_be32(&reqh->itt);
+	DataSN = from_be32(&reqh->data_sn);
+	buffer_offset = from_be32(&reqh->buffer_offset);
+
+	task = spdk_get_transfer_task(conn, transfer_tag);
+	if (task == NULL) {
+		SPDK_ERRLOG("Not found task for transfer_tag=%x\n", transfer_tag);
+		goto reject_return;
+	}
+
+	lun_dev = spdk_scsi_dev_get_lun(conn->dev, task->lun_id);
+
+	if (pdu->data_segment_len > task->desired_data_transfer_length) {
+		SPDK_ERRLOG("the dataout pdu data length is larger than the value sent by R2T PDU\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	if (task->tag != task_tag) {
+		SPDK_ERRLOG("The r2t task tag is %u, and the dataout task tag is %u\n",
+			    task->tag, task_tag);
+		goto reject_return;
+	}
+
+	if (DataSN != task->r2t_datasn) {
+		SPDK_ERRLOG("DataSN(%u) exp=%d error\n", DataSN, task->r2t_datasn);
+		if (conn->sess->ErrorRecoveryLevel >= 1) {
+			goto send_r2t_recovery_return;
+		} else {
+			reject_reason = ISCSI_REASON_PROTOCOL_ERROR;
+			goto reject_return;
+		}
+	}
+
+	if (buffer_offset != task->next_expected_r2t_offset) {
+		SPDK_ERRLOG("offset(%u) error\n", buffer_offset);
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	transfer_len = task->scsi.transfer_len;
+	task->current_r2t_length += pdu->data_segment_len;
+	task->next_expected_r2t_offset += pdu->data_segment_len;
+	task->r2t_datasn++;
+
+	if (task->current_r2t_length > conn->sess->MaxBurstLength) {
+		SPDK_ERRLOG("R2T burst(%u) > MaxBurstLength(%u)\n",
+			    task->current_r2t_length,
+			    conn->sess->MaxBurstLength);
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	if (F_bit) {
+		/*
+		 * This R2T burst is done.  Clear the length before we
+		 *  receive a PDU for the next R2T burst.
+		 */
+		task->current_r2t_length = 0;
+	}
+
+	subtask = spdk_iscsi_task_get(conn, task, spdk_iscsi_task_cpl);
+	if (subtask == NULL) {
+		SPDK_ERRLOG("Unable to acquire subtask\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	subtask->scsi.offset = buffer_offset;
+	subtask->scsi.length = pdu->data_segment_len;
+	spdk_scsi_task_set_data(&subtask->scsi, pdu->data, pdu->data_segment_len);
+	spdk_iscsi_task_associate_pdu(subtask, pdu);
+
+	if (task->next_expected_r2t_offset == transfer_len) {
+		task->acked_r2tsn++;
+	} else if (F_bit && (task->next_r2t_offset < transfer_len)) {
+		task->acked_r2tsn++;
+		len = DMIN32(conn->sess->MaxBurstLength, (transfer_len -
+				task->next_r2t_offset));
+		rc = spdk_iscsi_send_r2t(conn, task, task->next_r2t_offset, len,
+					 task->ttt, &task->R2TSN);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_send_r2t() failed\n");
+		}
+		task->next_r2t_offset += len;
+	}
+
+	if (lun_dev == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "LUN %d is removed, complete the task immediately\n",
+			      task->lun_id);
+		subtask->scsi.transfer_len = subtask->scsi.length;
+		spdk_scsi_task_process_null_lun(&subtask->scsi);
+		spdk_iscsi_task_cpl(&subtask->scsi);
+		return 0;
+	}
+
+	spdk_iscsi_queue_task(conn, subtask);
+	return 0;
+
+send_r2t_recovery_return:
+	rc = spdk_iscsi_send_r2t_recovery(conn, task, task->acked_r2tsn, true);
+	if (rc == 0) {
+		return 0;
+	}
+
+reject_return:
+	rc = spdk_iscsi_reject(conn, pdu, reject_reason);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_reject() failed\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	return SPDK_SUCCESS;
+}
+
+static int
+spdk_iscsi_send_r2t(struct spdk_iscsi_conn *conn,
+		    struct spdk_iscsi_task *task, int offset,
+		    int len, uint32_t transfer_tag, uint32_t *R2TSN)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_r2t *rsph;
+
+	/* R2T PDU */
+	rsp_pdu = spdk_get_pdu();
+	if (rsp_pdu == NULL) {
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	rsph = (struct iscsi_bhs_r2t *)&rsp_pdu->bhs;
+	rsp_pdu->data = NULL;
+	rsph->opcode = ISCSI_OP_R2T;
+	rsph->flags |= 0x80; /* bit 0 is default to 1 */
+	to_be64(&rsph->lun, task->lun_id);
+	to_be32(&rsph->itt, task->tag);
+	to_be32(&rsph->ttt, transfer_tag);
+
+	to_be32(&rsph->stat_sn, conn->StatSN);
+	to_be32(&rsph->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsph->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	to_be32(&rsph->r2t_sn, *R2TSN);
+	*R2TSN += 1;
+
+	task->r2t_datasn = 0; /* next expected datasn to ack */
+
+	to_be32(&rsph->buffer_offset, (uint32_t)offset);
+	to_be32(&rsph->desired_xfer_len, (uint32_t)len);
+	task->desired_data_transfer_length = (size_t)len;
+
+	/* we need to hold onto this task/cmd because until the PDU has been
+	 * written out */
+	rsp_pdu->task = task;
+	task->scsi.ref++;
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+
+	return SPDK_SUCCESS;
+}
+
+void spdk_iscsi_send_nopin(struct spdk_iscsi_conn *conn)
+{
+	struct spdk_iscsi_pdu *rsp_pdu;
+	struct iscsi_bhs_nop_in	*rsp;
+
+	/* Only send nopin if we have logged in and are in a normal session. */
+	if (conn->sess == NULL ||
+	    !conn->full_feature ||
+	    !spdk_iscsi_param_eq_val(conn->sess->params, "SessionType", "Normal")) {
+		return;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "send NOPIN isid=%"PRIx64", tsih=%u, cid=%u\n",
+		      conn->sess->isid, conn->sess->tsih, conn->cid);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN=%u, ExpCmdSN=%u, MaxCmdSN=%u\n",
+		      conn->StatSN, conn->sess->ExpCmdSN,
+		      conn->sess->MaxCmdSN);
+
+	rsp_pdu = spdk_get_pdu();
+	rsp = (struct iscsi_bhs_nop_in *) &rsp_pdu->bhs;
+	rsp_pdu->data = NULL;
+
+	/*
+	 * spdk_get_pdu() memset's the PDU for us, so only fill out the needed
+	 *  fields.
+	 */
+	rsp->opcode = ISCSI_OP_NOPIN;
+	rsp->flags = 0x80;
+	/*
+	 * Technically the to_be32() is not needed here, since
+	 *  to_be32(0xFFFFFFFU) returns 0xFFFFFFFFU.
+	 */
+	to_be32(&rsp->itt, 0xFFFFFFFFU);
+	to_be32(&rsp->ttt, conn->id);
+	to_be32(&rsp->stat_sn, conn->StatSN);
+	to_be32(&rsp->exp_cmd_sn, conn->sess->ExpCmdSN);
+	to_be32(&rsp->max_cmd_sn, conn->sess->MaxCmdSN);
+
+	spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+	conn->last_nopin = spdk_get_ticks();
+	conn->nop_outstanding = true;
+}
+
+static void
+spdk_init_login_reject_response(struct spdk_iscsi_pdu *pdu, struct spdk_iscsi_pdu *rsp_pdu)
+{
+	struct iscsi_bhs_login_rsp *rsph;
+
+	memset(rsp_pdu, 0, sizeof(struct spdk_iscsi_pdu));
+	rsph = (struct iscsi_bhs_login_rsp *)&rsp_pdu->bhs;
+	rsph->version_max = ISCSI_VERSION;
+	rsph->version_act = ISCSI_VERSION;
+	rsph->opcode = ISCSI_OP_LOGIN_RSP;
+	rsph->status_class = ISCSI_CLASS_INITIATOR_ERROR;
+	rsph->status_detail = ISCSI_LOGIN_INVALID_LOGIN_REQUEST;
+	rsph->itt = pdu->bhs.itt;
+}
+
+int
+spdk_iscsi_execute(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu)
+{
+	int opcode;
+	int rc;
+	struct spdk_iscsi_pdu *rsp_pdu = NULL;
+	uint32_t ExpStatSN;
+	uint32_t QCmdSN;
+	int I_bit;
+	struct spdk_iscsi_sess *sess;
+	struct iscsi_bhs_scsi_req *reqh;
+
+	if (pdu == NULL) {
+		return -1;
+	}
+
+	opcode = pdu->bhs.opcode;
+	reqh = (struct iscsi_bhs_scsi_req *)&pdu->bhs;
+	pdu->cmd_sn = from_be32(&reqh->cmd_sn);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "opcode %x\n", opcode);
+
+	if (opcode == ISCSI_OP_LOGIN) {
+		rc = spdk_iscsi_op_login(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_op_login() failed\n");
+		}
+		return rc;
+	}
+
+	/* connection in login phase but receive non-login opcode
+	 * return response code 0x020b to initiator.
+	 * */
+	if (!conn->full_feature && conn->state == ISCSI_CONN_STATE_RUNNING) {
+		rsp_pdu = spdk_get_pdu();
+		if (rsp_pdu == NULL) {
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+		spdk_init_login_reject_response(pdu, rsp_pdu);
+		spdk_iscsi_conn_write_pdu(conn, rsp_pdu);
+		SPDK_ERRLOG("Received opcode %d in login phase\n", opcode);
+		return SPDK_ISCSI_LOGIN_ERROR_RESPONSE;
+	} else if (conn->state == ISCSI_CONN_STATE_INVALID) {
+		SPDK_ERRLOG("before Full Feature\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+
+	sess = conn->sess;
+	if (!sess) {
+		SPDK_ERRLOG("Connection has no associated session!\n");
+		return SPDK_ISCSI_CONNECTION_FATAL;
+	}
+	I_bit = reqh->immediate;
+	if (I_bit == 0) {
+		if (SN32_LT(pdu->cmd_sn, sess->ExpCmdSN) ||
+		    SN32_GT(pdu->cmd_sn, sess->MaxCmdSN)) {
+			if (sess->session_type == SESSION_TYPE_NORMAL &&
+			    opcode != ISCSI_OP_SCSI_DATAOUT) {
+				SPDK_ERRLOG("CmdSN(%u) ignore (ExpCmdSN=%u, MaxCmdSN=%u)\n",
+					    pdu->cmd_sn, sess->ExpCmdSN, sess->MaxCmdSN);
+
+				if (sess->ErrorRecoveryLevel >= 1) {
+					SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Skip the error in ERL 1 and 2\n");
+				} else {
+					return SPDK_PDU_FATAL;
+				}
+			}
+		}
+	} else if (pdu->cmd_sn != sess->ExpCmdSN) {
+		SPDK_ERRLOG("CmdSN(%u) error ExpCmdSN=%u\n", pdu->cmd_sn, sess->ExpCmdSN);
+
+		if (sess->ErrorRecoveryLevel >= 1) {
+			SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Skip the error in ERL 1 and 2\n");
+		} else if (opcode != ISCSI_OP_NOPOUT) {
+			/*
+			 * The Linux initiator does not send valid CmdSNs for
+			 *  nopout under heavy load, so do not close the
+			 *  connection in that case.
+			 */
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	ExpStatSN = from_be32(&reqh->exp_stat_sn);
+	if (SN32_GT(ExpStatSN, conn->StatSN)) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "StatSN(%u) advanced\n", ExpStatSN);
+		ExpStatSN = conn->StatSN;
+	}
+
+	if (sess->ErrorRecoveryLevel >= 1) {
+		spdk_remove_acked_pdu(conn, ExpStatSN);
+	}
+
+	if (opcode == ISCSI_OP_NOPOUT || opcode == ISCSI_OP_SCSI) {
+		QCmdSN = sess->MaxCmdSN - sess->ExpCmdSN + 1;
+		QCmdSN += sess->queue_depth;
+		if (SN32_LT(ExpStatSN + QCmdSN, conn->StatSN)) {
+			SPDK_ERRLOG("StatSN(%u/%u) QCmdSN(%u) error\n",
+				    ExpStatSN, conn->StatSN, QCmdSN);
+			return SPDK_ISCSI_CONNECTION_FATAL;
+		}
+	}
+
+	if (!I_bit && opcode != ISCSI_OP_SCSI_DATAOUT) {
+		sess->ExpCmdSN++;
+	}
+
+	switch (opcode) {
+	case ISCSI_OP_NOPOUT:
+		rc = spdk_iscsi_op_nopout(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_nopout() failed\n");
+			return rc;
+		}
+		break;
+
+	case ISCSI_OP_SCSI:
+		rc = spdk_iscsi_op_scsi(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_scsi() failed\n");
+			return rc;
+		}
+		break;
+	case ISCSI_OP_TASK:
+		rc = spdk_iscsi_op_task(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_task() failed\n");
+			return rc;
+		}
+		break;
+
+	case ISCSI_OP_TEXT:
+		rc = spdk_iscsi_op_text(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_text() failed\n");
+			return rc;
+		}
+		break;
+
+	case ISCSI_OP_LOGOUT:
+		rc = spdk_iscsi_op_logout(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_logout() failed\n");
+			return rc;
+		}
+		break;
+
+	case ISCSI_OP_SCSI_DATAOUT:
+		rc = spdk_iscsi_op_data(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_data() failed\n");
+			return rc;
+		}
+		break;
+
+	case ISCSI_OP_SNACK:
+		rc = spdk_iscsi_op_snack(conn, pdu);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_op_snack() failed\n");
+			return rc;
+		}
+		break;
+
+	default:
+		SPDK_ERRLOG("unsupported opcode %x\n", opcode);
+		rc = spdk_iscsi_reject(conn, pdu, ISCSI_REASON_PROTOCOL_ERROR);
+		if (rc < 0) {
+			SPDK_ERRLOG("spdk_iscsi_reject() failed\n");
+			return rc;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+void spdk_free_sess(struct spdk_iscsi_sess *sess)
+{
+	if (sess == NULL) {
+		return;
+	}
+
+	sess->tag = 0;
+	sess->target = NULL;
+	sess->session_type = SESSION_TYPE_INVALID;
+	spdk_iscsi_param_free(sess->params);
+	free(sess->conns);
+	spdk_scsi_port_free(&sess->initiator_port);
+	spdk_mempool_put(g_spdk_iscsi.session_pool, (void *)sess);
+}
+
+static int
+spdk_create_iscsi_sess(struct spdk_iscsi_conn *conn,
+		       struct spdk_iscsi_tgt_node *target,
+		       enum session_type session_type)
+{
+	struct spdk_iscsi_sess *sess;
+	int rc;
+
+	sess = spdk_mempool_get(g_spdk_iscsi.session_pool);
+	if (!sess) {
+		SPDK_ERRLOG("Unable to get session object\n");
+		SPDK_ERRLOG("MaxSessions set to %d\n", g_spdk_iscsi.MaxSessions);
+		return -ENOMEM;
+	}
+
+	/* configuration values */
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	sess->MaxConnections = g_spdk_iscsi.MaxConnectionsPerSession;
+	sess->MaxOutstandingR2T = DEFAULT_MAXOUTSTANDINGR2T;
+
+	sess->DefaultTime2Wait = g_spdk_iscsi.DefaultTime2Wait;
+	sess->DefaultTime2Retain = g_spdk_iscsi.DefaultTime2Retain;
+	sess->FirstBurstLength = g_spdk_iscsi.FirstBurstLength;
+	sess->MaxBurstLength = SPDK_ISCSI_MAX_BURST_LENGTH;
+	sess->InitialR2T = DEFAULT_INITIALR2T;
+	sess->ImmediateData = g_spdk_iscsi.ImmediateData;
+	sess->DataPDUInOrder = DEFAULT_DATAPDUINORDER;
+	sess->DataSequenceInOrder = DEFAULT_DATASEQUENCEINORDER;
+	sess->ErrorRecoveryLevel = g_spdk_iscsi.ErrorRecoveryLevel;
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	sess->tag = conn->portal->group->tag;
+
+	sess->conns = calloc(sess->MaxConnections, sizeof(*sess->conns));
+	if (!sess->conns) {
+		SPDK_ERRLOG("calloc() failed for connection array\n");
+		return -ENOMEM;
+	}
+
+	sess->connections = 0;
+
+	sess->conns[sess->connections] = conn;
+	sess->connections++;
+
+	sess->params = NULL;
+	sess->target = NULL;
+	sess->isid = 0;
+	sess->session_type = session_type;
+	sess->current_text_itt = 0xffffffffU;
+
+	/* set default params */
+	rc = spdk_iscsi_sess_params_init(&sess->params);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_sess_params_init() failed\n");
+		goto error_return;
+	}
+	/* replace with config value */
+	rc = spdk_iscsi_param_set_int(sess->params, "MaxConnections",
+				      sess->MaxConnections);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "MaxOutstandingR2T",
+				      sess->MaxOutstandingR2T);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "DefaultTime2Wait",
+				      sess->DefaultTime2Wait);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "DefaultTime2Retain",
+				      sess->DefaultTime2Retain);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "FirstBurstLength",
+				      sess->FirstBurstLength);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "MaxBurstLength",
+				      sess->MaxBurstLength);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set(sess->params, "InitialR2T",
+				  sess->InitialR2T ? "Yes" : "No");
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set(sess->params, "ImmediateData",
+				  sess->ImmediateData ? "Yes" : "No");
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set(sess->params, "DataPDUInOrder",
+				  sess->DataPDUInOrder ? "Yes" : "No");
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set(sess->params, "DataSequenceInOrder",
+				  sess->DataSequenceInOrder ? "Yes" : "No");
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set() failed\n");
+		goto error_return;
+	}
+
+	rc = spdk_iscsi_param_set_int(sess->params, "ErrorRecoveryLevel",
+				      sess->ErrorRecoveryLevel);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	/* realloc buffer */
+	rc = spdk_iscsi_param_set_int(conn->params, "MaxRecvDataSegmentLength",
+				      conn->MaxRecvDataSegmentLength);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_set_int() failed\n");
+		goto error_return;
+	}
+
+	/* sess for first connection of session */
+	conn->sess = sess;
+	return 0;
+
+error_return:
+	spdk_free_sess(sess);
+	conn->sess = NULL;
+	return -1;
+}
+
+static struct spdk_iscsi_sess *
+spdk_get_iscsi_sess_by_tsih(uint16_t tsih)
+{
+	struct spdk_iscsi_sess *session;
+
+	if (tsih == 0 || tsih > g_spdk_iscsi.MaxSessions) {
+		return NULL;
+	}
+
+	session = g_spdk_iscsi.session[tsih - 1];
+	assert(tsih == session->tsih);
+
+	return session;
+}
+
+static int
+spdk_append_iscsi_sess(struct spdk_iscsi_conn *conn,
+		       const char *initiator_port_name, uint16_t tsih, uint16_t cid)
+{
+	struct spdk_iscsi_sess *sess;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "append session: init port name=%s, tsih=%u, cid=%u\n",
+		      initiator_port_name, tsih, cid);
+
+	sess = spdk_get_iscsi_sess_by_tsih(tsih);
+	if (sess == NULL) {
+		SPDK_ERRLOG("spdk_get_iscsi_sess_by_tsih failed\n");
+		return -1;
+	}
+	if ((conn->portal->group->tag != sess->tag) ||
+	    (strcasecmp(initiator_port_name, spdk_scsi_port_get_name(sess->initiator_port)) != 0) ||
+	    (conn->target != sess->target)) {
+		/* no match */
+		SPDK_ERRLOG("no MCS session for init port name=%s, tsih=%d, cid=%d\n",
+			    initiator_port_name, tsih, cid);
+		return -1;
+	}
+
+	if (sess->connections >= sess->MaxConnections) {
+		/* no slot for connection */
+		SPDK_ERRLOG("too many connections for init port name=%s, tsih=%d, cid=%d\n",
+			    initiator_port_name, tsih, cid);
+		return -1;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Connections (tsih %d): %d\n", sess->tsih, sess->connections);
+	conn->sess = sess;
+
+	/*
+	 * TODO: need a mutex or other sync mechanism to protect the session's
+	 *  connection list.
+	 */
+	sess->conns[sess->connections] = conn;
+	sess->connections++;
+
+	return 0;
+}
+
+bool spdk_iscsi_is_deferred_free_pdu(struct spdk_iscsi_pdu *pdu)
+{
+	if (pdu == NULL) {
+		return false;
+	}
+
+	if (pdu->bhs.opcode == ISCSI_OP_R2T ||
+	    pdu->bhs.opcode == ISCSI_OP_SCSI_DATAIN) {
+		return true;
+	}
+
+	return false;
+}
diff --git a/src/spdk/lib/iscsi/iscsi.h b/src/spdk/lib/iscsi/iscsi.h
new file mode 100644
index 00000000..3cfb20fc
--- /dev/null
+++ b/src/spdk/lib/iscsi/iscsi.h
@@ -0,0 +1,467 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ISCSI_H
+#define SPDK_ISCSI_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/iscsi_spec.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+
+#include "iscsi/param.h"
+#include "iscsi/tgt_node.h"
+
+#include "spdk/assert.h"
+#include "spdk/util.h"
+
+#define SPDK_ISCSI_DEFAULT_NODEBASE "iqn.2016-06.io.spdk"
+
+#define DEFAULT_MAXR2T 4
+#define MAX_INITIATOR_NAME 256
+#define MAX_TARGET_NAME 256
+
+#define MAX_PORTAL 1024
+#define MAX_INITIATOR 256
+#define MAX_NETMASK 256
+#define MAX_SESSIONS 1024
+#define MAX_ISCSI_CONNECTIONS MAX_SESSIONS
+#define MAX_FIRSTBURSTLENGTH	16777215
+
+#define DEFAULT_PORT 3260
+#define DEFAULT_MAX_SESSIONS 128
+#define DEFAULT_MAX_CONNECTIONS_PER_SESSION 2
+#define DEFAULT_MAXOUTSTANDINGR2T 1
+#define DEFAULT_DEFAULTTIME2WAIT 2
+#define DEFAULT_DEFAULTTIME2RETAIN 20
+#define DEFAULT_FIRSTBURSTLENGTH 8192
+#define DEFAULT_INITIALR2T true
+#define DEFAULT_IMMEDIATEDATA true
+#define DEFAULT_DATAPDUINORDER true
+#define DEFAULT_DATASEQUENCEINORDER true
+#define DEFAULT_ERRORRECOVERYLEVEL 0
+#define DEFAULT_TIMEOUT 60
+#define MAX_NOPININTERVAL 60
+#define DEFAULT_NOPININTERVAL 30
+#define DEFAULT_CONNECTIONS_PER_LCORE 4
+
+/*
+ * SPDK iSCSI target currently only supports 64KB as the maximum data segment length
+ *  it can receive from initiators.  Other values may work, but no guarantees.
+ */
+#define SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH  65536
+
+/*
+ * SPDK iSCSI target will only send a maximum of SPDK_BDEV_LARGE_BUF_MAX_SIZE data segments, even if the
+ * connection can support more.
+ */
+#define SPDK_ISCSI_MAX_SEND_DATA_SEGMENT_LENGTH SPDK_BDEV_LARGE_BUF_MAX_SIZE
+
+/*
+ * Defines maximum number of data out buffers each connection can have in
+ *  use at any given time.
+ */
+#define MAX_DATA_OUT_PER_CONNECTION 16
+
+/*
+ * Defines maximum number of data in buffers each connection can have in
+ *  use at any given time. So this limit does not affect I/O smaller than
+ *  SPDK_BDEV_SMALL_BUF_MAX_SIZE.
+ */
+#define MAX_LARGE_DATAIN_PER_CONNECTION 64
+
+/*
+ * Defines default maximum queue depth per connection and this can be
+ * changed by configuration file.
+ */
+#define DEFAULT_MAX_QUEUE_DEPTH	64
+
+#define SPDK_ISCSI_MAX_BURST_LENGTH	\
+		(SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH * MAX_DATA_OUT_PER_CONNECTION)
+
+/*
+ * Defines default maximum amount in bytes of unsolicited data the iSCSI
+ *  initiator may send to the SPDK iSCSI target during the execution of
+ *  a single SCSI command. And it is smaller than the MaxBurstLength.
+ */
+#define SPDK_ISCSI_FIRST_BURST_LENGTH	8192
+
+/*
+ * Defines minimum amount in bytes of unsolicited data the iSCSI initiator
+ *  may send to the SPDK iSCSI target during the execution of a single
+ *  SCSI command.
+ */
+#define SPDK_ISCSI_MIN_FIRST_BURST_LENGTH	512
+
+/** Defines how long we should wait for a TCP close after responding to a
+ *   logout request, before terminating the connection ourselves.
+ */
+#define ISCSI_LOGOUT_TIMEOUT 5 /* in seconds */
+
+/* according to RFC1982 */
+#define SN32_CMPMAX (((uint32_t)1U) << (32 - 1))
+#define SN32_LT(S1,S2) \
+	(((uint32_t)(S1) != (uint32_t)(S2))				\
+	    && (((uint32_t)(S1) < (uint32_t)(S2)			\
+		    && ((uint32_t)(S2) - (uint32_t)(S1) < SN32_CMPMAX))	\
+		|| ((uint32_t)(S1) > (uint32_t)(S2)			\
+		    && ((uint32_t)(S1) - (uint32_t)(S2) > SN32_CMPMAX))))
+#define SN32_GT(S1,S2) \
+	(((uint32_t)(S1) != (uint32_t)(S2))				\
+	    && (((uint32_t)(S1) < (uint32_t)(S2)			\
+		    && ((uint32_t)(S2) - (uint32_t)(S1) > SN32_CMPMAX))	\
+		|| ((uint32_t)(S1) > (uint32_t)(S2)			\
+		    && ((uint32_t)(S1) - (uint32_t)(S2) < SN32_CMPMAX))))
+
+/* For spdk_iscsi_login_in related function use, we need to avoid the conflict
+ * with other errors
+ * */
+#define SPDK_ISCSI_LOGIN_ERROR_RESPONSE -1000
+#define SPDK_ISCSI_LOGIN_ERROR_PARAMETER -1001
+#define SPDK_ISCSI_PARAMETER_EXCHANGE_NOT_ONCE -1002
+
+#define ISCSI_AHS_LEN 60
+
+struct spdk_mobj {
+	struct spdk_mempool *mp;
+	void *buf;
+	size_t len;
+	uint64_t reserved; /* do not use */
+};
+
+struct spdk_iscsi_pdu {
+	struct iscsi_bhs bhs;
+	struct spdk_mobj *mobj;
+	uint8_t *data_buf;
+	uint8_t *data;
+	uint8_t header_digest[ISCSI_DIGEST_LEN];
+	uint8_t data_digest[ISCSI_DIGEST_LEN];
+	size_t data_segment_len;
+	int bhs_valid_bytes;
+	int ahs_valid_bytes;
+	int data_valid_bytes;
+	int hdigest_valid_bytes;
+	int ddigest_valid_bytes;
+	int ref;
+	bool data_from_mempool;  /* indicate whether the data buffer is allocated from mempool */
+	struct spdk_iscsi_task *task; /* data tied to a task buffer */
+	uint32_t cmd_sn;
+	uint32_t writev_offset;
+	TAILQ_ENTRY(spdk_iscsi_pdu)	tailq;
+
+
+	/*
+	 * 60 bytes of AHS should suffice for now.
+	 * This should always be at the end of PDU data structure.
+	 * we need to not zero this out when doing memory clear.
+	 */
+	uint8_t ahs[ISCSI_AHS_LEN];
+
+	struct {
+		uint16_t length; /* iSCSI SenseLength (big-endian) */
+		uint8_t data[32];
+	} sense;
+};
+
+enum iscsi_connection_state {
+	ISCSI_CONN_STATE_INVALID = 0,
+	ISCSI_CONN_STATE_RUNNING = 1,
+	ISCSI_CONN_STATE_LOGGED_OUT = 2,
+	ISCSI_CONN_STATE_EXITING = 3,
+	ISCSI_CONN_STATE_EXITED = 4,
+};
+
+enum iscsi_chap_phase {
+	ISCSI_CHAP_PHASE_NONE = 0,
+	ISCSI_CHAP_PHASE_WAIT_A = 1,
+	ISCSI_CHAP_PHASE_WAIT_NR = 2,
+	ISCSI_CHAP_PHASE_END = 3,
+};
+
+enum session_type {
+	SESSION_TYPE_INVALID = 0,
+	SESSION_TYPE_NORMAL = 1,
+	SESSION_TYPE_DISCOVERY = 2,
+};
+
+#define ISCSI_CHAP_CHALLENGE_LEN	1024
+#define ISCSI_CHAP_MAX_USER_LEN		255
+#define ISCSI_CHAP_MAX_SECRET_LEN	255
+
+struct iscsi_chap_auth {
+	enum iscsi_chap_phase chap_phase;
+
+	char user[ISCSI_CHAP_MAX_USER_LEN + 1];
+	char secret[ISCSI_CHAP_MAX_SECRET_LEN + 1];
+	char muser[ISCSI_CHAP_MAX_USER_LEN + 1];
+	char msecret[ISCSI_CHAP_MAX_SECRET_LEN + 1];
+
+	uint8_t chap_id[1];
+	uint8_t chap_mid[1];
+	int chap_challenge_len;
+	uint8_t chap_challenge[ISCSI_CHAP_CHALLENGE_LEN];
+	int chap_mchallenge_len;
+	uint8_t chap_mchallenge[ISCSI_CHAP_CHALLENGE_LEN];
+};
+
+struct spdk_iscsi_auth_secret {
+	char user[ISCSI_CHAP_MAX_USER_LEN + 1];
+	char secret[ISCSI_CHAP_MAX_SECRET_LEN + 1];
+	char muser[ISCSI_CHAP_MAX_USER_LEN + 1];
+	char msecret[ISCSI_CHAP_MAX_SECRET_LEN + 1];
+	TAILQ_ENTRY(spdk_iscsi_auth_secret) tailq;
+};
+
+struct spdk_iscsi_auth_group {
+	int32_t tag;
+	TAILQ_HEAD(, spdk_iscsi_auth_secret) secret_head;
+	TAILQ_ENTRY(spdk_iscsi_auth_group) tailq;
+};
+
+struct spdk_iscsi_sess {
+	uint32_t connections;
+	struct spdk_iscsi_conn **conns;
+
+	struct spdk_scsi_port *initiator_port;
+	int tag;
+
+	uint64_t isid;
+	uint16_t tsih;
+	struct spdk_iscsi_tgt_node *target;
+	int queue_depth;
+
+	struct iscsi_param *params;
+
+	enum session_type session_type;
+	uint32_t MaxConnections;
+	uint32_t MaxOutstandingR2T;
+	uint32_t DefaultTime2Wait;
+	uint32_t DefaultTime2Retain;
+	uint32_t FirstBurstLength;
+	uint32_t MaxBurstLength;
+	bool InitialR2T;
+	bool ImmediateData;
+	bool DataPDUInOrder;
+	bool DataSequenceInOrder;
+	uint32_t ErrorRecoveryLevel;
+
+	uint32_t ExpCmdSN;
+	uint32_t MaxCmdSN;
+
+	uint32_t current_text_itt;
+};
+
+struct spdk_iscsi_poll_group {
+	uint32_t					core;
+	struct spdk_poller				*poller;
+	struct spdk_poller				*nop_poller;
+	STAILQ_HEAD(connections, spdk_iscsi_conn)	connections;
+	struct spdk_sock_group				*sock_group;
+};
+
+struct spdk_iscsi_opts {
+	char *authfile;
+	char *nodebase;
+	int32_t timeout;
+	int32_t nopininterval;
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int32_t chap_group;
+	uint32_t MaxSessions;
+	uint32_t MaxConnectionsPerSession;
+	uint32_t MaxConnections;
+	uint32_t MaxQueueDepth;
+	uint32_t DefaultTime2Wait;
+	uint32_t DefaultTime2Retain;
+	uint32_t FirstBurstLength;
+	bool ImmediateData;
+	uint32_t ErrorRecoveryLevel;
+	bool AllowDuplicateIsid;
+	uint32_t min_connections_per_core;
+};
+
+struct spdk_iscsi_globals {
+	char *authfile;
+	char *nodebase;
+	pthread_mutex_t mutex;
+	TAILQ_HEAD(, spdk_iscsi_portal)		portal_head;
+	TAILQ_HEAD(, spdk_iscsi_portal_grp)	pg_head;
+	TAILQ_HEAD(, spdk_iscsi_init_grp)	ig_head;
+	TAILQ_HEAD(, spdk_iscsi_tgt_node)	target_head;
+	TAILQ_HEAD(, spdk_iscsi_auth_group)	auth_group_head;
+
+	int32_t timeout;
+	int32_t nopininterval;
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int32_t chap_group;
+
+	uint32_t MaxSessions;
+	uint32_t MaxConnectionsPerSession;
+	uint32_t MaxConnections;
+	uint32_t MaxQueueDepth;
+	uint32_t DefaultTime2Wait;
+	uint32_t DefaultTime2Retain;
+	uint32_t FirstBurstLength;
+	bool ImmediateData;
+	uint32_t ErrorRecoveryLevel;
+	bool AllowDuplicateIsid;
+
+	struct spdk_mempool *pdu_pool;
+	struct spdk_mempool *pdu_immediate_data_pool;
+	struct spdk_mempool *pdu_data_out_pool;
+	struct spdk_mempool *session_pool;
+	struct spdk_mempool *task_pool;
+
+	struct spdk_iscsi_sess	**session;
+	struct spdk_iscsi_poll_group *poll_group;
+};
+
+#define ISCSI_SECURITY_NEGOTIATION_PHASE	0
+#define ISCSI_OPERATIONAL_NEGOTIATION_PHASE	1
+#define ISCSI_NSG_RESERVED_CODE			2
+#define ISCSI_FULL_FEATURE_PHASE		3
+
+enum spdk_error_codes {
+	SPDK_SUCCESS		= 0,
+	SPDK_ISCSI_CONNECTION_FATAL	= -1,
+	SPDK_PDU_FATAL		= -2,
+};
+
+#define DGET24(B)											\
+	(((  (uint32_t) *((uint8_t *)(B)+0)) << 16)				\
+	 | (((uint32_t) *((uint8_t *)(B)+1)) << 8)				\
+	 | (((uint32_t) *((uint8_t *)(B)+2)) << 0))
+
+#define DSET24(B,D)													\
+	(((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 16)),		\
+	 ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)),		\
+	 ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 0)))
+
+#define xstrdup(s) (s ? strdup(s) : (char *)NULL)
+
+extern struct spdk_iscsi_globals g_spdk_iscsi;
+extern struct spdk_iscsi_opts *g_spdk_iscsi_opts;
+
+struct spdk_iscsi_task;
+struct spdk_json_write_ctx;
+
+typedef void (*spdk_iscsi_init_cb)(void *cb_arg, int rc);
+
+void spdk_iscsi_init(spdk_iscsi_init_cb cb_fn, void *cb_arg);
+typedef void (*spdk_iscsi_fini_cb)(void *arg);
+void spdk_iscsi_fini(spdk_iscsi_fini_cb cb_fn, void *cb_arg);
+void spdk_shutdown_iscsi_conns_done(void);
+void spdk_iscsi_config_text(FILE *fp);
+void spdk_iscsi_config_json(struct spdk_json_write_ctx *w);
+
+struct spdk_iscsi_opts *spdk_iscsi_opts_alloc(void);
+void spdk_iscsi_opts_free(struct spdk_iscsi_opts *opts);
+struct spdk_iscsi_opts *spdk_iscsi_opts_copy(struct spdk_iscsi_opts *src);
+void spdk_iscsi_opts_info_json(struct spdk_json_write_ctx *w);
+int spdk_iscsi_set_discovery_auth(bool disable_chap, bool require_chap,
+				  bool mutual_chap, int32_t chap_group);
+int spdk_iscsi_chap_get_authinfo(struct iscsi_chap_auth *auth, const char *authuser,
+				 int ag_tag);
+int spdk_iscsi_add_auth_group(int32_t tag, struct spdk_iscsi_auth_group **_group);
+struct spdk_iscsi_auth_group *spdk_iscsi_find_auth_group_by_tag(int32_t tag);
+void spdk_iscsi_delete_auth_group(struct spdk_iscsi_auth_group *group);
+int spdk_iscsi_auth_group_add_secret(struct spdk_iscsi_auth_group *group,
+				     const char *user, const char *secret,
+				     const char *muser, const char *msecret);
+int spdk_iscsi_auth_group_delete_secret(struct spdk_iscsi_auth_group *group,
+					const char *user);
+void spdk_iscsi_auth_groups_info_json(struct spdk_json_write_ctx *w);
+
+void spdk_iscsi_send_nopin(struct spdk_iscsi_conn *conn);
+void spdk_iscsi_task_response(struct spdk_iscsi_conn *conn,
+			      struct spdk_iscsi_task *task);
+int spdk_iscsi_execute(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu);
+int spdk_iscsi_build_iovecs(struct spdk_iscsi_conn *conn,
+			    struct iovec *iovec, struct spdk_iscsi_pdu *pdu);
+int
+spdk_iscsi_read_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu **_pdu);
+void spdk_iscsi_task_mgmt_response(struct spdk_iscsi_conn *conn,
+				   struct spdk_iscsi_task *task);
+
+int spdk_iscsi_conn_params_init(struct iscsi_param **params);
+int spdk_iscsi_sess_params_init(struct iscsi_param **params);
+
+void spdk_free_sess(struct spdk_iscsi_sess *sess);
+void spdk_clear_all_transfer_task(struct spdk_iscsi_conn *conn,
+				  struct spdk_scsi_lun *lun);
+void spdk_del_transfer_task(struct spdk_iscsi_conn *conn, uint32_t CmdSN);
+bool spdk_iscsi_is_deferred_free_pdu(struct spdk_iscsi_pdu *pdu);
+
+int spdk_iscsi_negotiate_params(struct spdk_iscsi_conn *conn,
+				struct iscsi_param **params_p, uint8_t *data,
+				int alloc_len, int data_len);
+int spdk_iscsi_copy_param2var(struct spdk_iscsi_conn *conn);
+
+void spdk_iscsi_task_cpl(struct spdk_scsi_task *scsi_task);
+void spdk_iscsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task);
+
+/* Memory management */
+void spdk_put_pdu(struct spdk_iscsi_pdu *pdu);
+struct spdk_iscsi_pdu *spdk_get_pdu(void);
+int spdk_iscsi_conn_handle_queued_datain_tasks(struct spdk_iscsi_conn *conn);
+
+static inline int
+spdk_get_immediate_data_buffer_size(void)
+{
+	/*
+	 * Specify enough extra space in addition to FirstBurstLength to
+	 *  account for a header digest, data digest and additional header
+	 *  segments (AHS).  These are not normally used but they do not
+	 *  take up much space and we need to make sure the worst-case scenario
+	 *  can be satisified by the size returned here.
+	 */
+	return g_spdk_iscsi.FirstBurstLength +
+	       ISCSI_DIGEST_LEN + /* data digest */
+	       ISCSI_DIGEST_LEN + /* header digest */
+	       8 +		   /* bidirectional AHS */
+	       52;		   /* extended CDB AHS (for a 64-byte CDB) */
+}
+
+static inline int
+spdk_get_data_out_buffer_size(void)
+{
+	return SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH;
+}
+
+#endif /* SPDK_ISCSI_H */
diff --git a/src/spdk/lib/iscsi/iscsi_rpc.c b/src/spdk/lib/iscsi/iscsi_rpc.c
new file mode 100644
index 00000000..dd9777a3
--- /dev/null
+++ b/src/spdk/lib/iscsi/iscsi_rpc.c
@@ -0,0 +1,1542 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "iscsi/iscsi.h"
+#include "iscsi/conn.h"
+#include "iscsi/tgt_node.h"
+#include "iscsi/portal_grp.h"
+#include "iscsi/init_grp.h"
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/event.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+static void
+spdk_rpc_get_initiator_groups(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_initiator_groups requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	spdk_iscsi_init_grps_info_json(w);
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_initiator_groups", spdk_rpc_get_initiator_groups, SPDK_RPC_RUNTIME)
+
+struct rpc_initiator_list {
+	size_t num_initiators;
+	char *initiators[MAX_INITIATOR];
+};
+
+static int
+decode_rpc_initiator_list(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_initiator_list *list = out;
+
+	return spdk_json_decode_array(val, spdk_json_decode_string, list->initiators, MAX_INITIATOR,
+				      &list->num_initiators, sizeof(char *));
+}
+
+static void
+free_rpc_initiator_list(struct rpc_initiator_list *list)
+{
+	size_t i;
+
+	for (i = 0; i < list->num_initiators; i++) {
+		free(list->initiators[i]);
+	}
+}
+
+struct rpc_netmask_list {
+	size_t num_netmasks;
+	char *netmasks[MAX_NETMASK];
+};
+
+static int
+decode_rpc_netmask_list(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_netmask_list *list = out;
+
+	return spdk_json_decode_array(val, spdk_json_decode_string, list->netmasks, MAX_NETMASK,
+				      &list->num_netmasks, sizeof(char *));
+}
+
+static void
+free_rpc_netmask_list(struct rpc_netmask_list *list)
+{
+	size_t i;
+
+	for (i = 0; i < list->num_netmasks; i++) {
+		free(list->netmasks[i]);
+	}
+}
+
+struct rpc_initiator_group {
+	int32_t tag;
+	struct rpc_initiator_list initiator_list;
+	struct rpc_netmask_list netmask_list;
+};
+
+static void
+free_rpc_initiator_group(struct rpc_initiator_group *ig)
+{
+	free_rpc_initiator_list(&ig->initiator_list);
+	free_rpc_netmask_list(&ig->netmask_list);
+}
+
+static const struct spdk_json_object_decoder rpc_initiator_group_decoders[] = {
+	{"tag", offsetof(struct rpc_initiator_group, tag), spdk_json_decode_int32},
+	{"initiators", offsetof(struct rpc_initiator_group, initiator_list), decode_rpc_initiator_list},
+	{"netmasks", offsetof(struct rpc_initiator_group, netmask_list), decode_rpc_netmask_list},
+};
+
+static void
+spdk_rpc_add_initiator_group(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_initiator_group req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_initiator_group_decoders,
+				    SPDK_COUNTOF(rpc_initiator_group_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.initiator_list.num_initiators == 0 ||
+	    req.netmask_list.num_netmasks == 0) {
+		goto invalid;
+	}
+
+	if (spdk_iscsi_init_grp_create_from_initiator_list(req.tag,
+			req.initiator_list.num_initiators,
+			req.initiator_list.initiators,
+			req.netmask_list.num_netmasks,
+			req.netmask_list.netmasks)) {
+		SPDK_ERRLOG("create_from_initiator_list failed\n");
+		goto invalid;
+	}
+
+	free_rpc_initiator_group(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_initiator_group(&req);
+}
+SPDK_RPC_REGISTER("add_initiator_group", spdk_rpc_add_initiator_group, SPDK_RPC_RUNTIME)
+
+static const struct spdk_json_object_decoder rpc_add_or_delete_initiators_decoders[] = {
+	{"tag", offsetof(struct rpc_initiator_group, tag), spdk_json_decode_int32},
+	{"initiators", offsetof(struct rpc_initiator_group, initiator_list), decode_rpc_initiator_list, true},
+	{"netmasks", offsetof(struct rpc_initiator_group, netmask_list), decode_rpc_netmask_list, true},
+};
+
+static void
+spdk_rpc_add_initiators_to_initiator_group(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_initiator_group req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_add_or_delete_initiators_decoders,
+				    SPDK_COUNTOF(rpc_add_or_delete_initiators_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (spdk_iscsi_init_grp_add_initiators_from_initiator_list(req.tag,
+			req.initiator_list.num_initiators,
+			req.initiator_list.initiators,
+			req.netmask_list.num_netmasks,
+			req.netmask_list.netmasks)) {
+		SPDK_ERRLOG("add_initiators_from_initiator_list failed\n");
+		goto invalid;
+	}
+
+	free_rpc_initiator_group(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_initiator_group(&req);
+}
+SPDK_RPC_REGISTER("add_initiators_to_initiator_group",
+		  spdk_rpc_add_initiators_to_initiator_group, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_delete_initiators_from_initiator_group(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_initiator_group req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_add_or_delete_initiators_decoders,
+				    SPDK_COUNTOF(rpc_add_or_delete_initiators_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (spdk_iscsi_init_grp_delete_initiators_from_initiator_list(req.tag,
+			req.initiator_list.num_initiators,
+			req.initiator_list.initiators,
+			req.netmask_list.num_netmasks,
+			req.netmask_list.netmasks)) {
+		SPDK_ERRLOG("delete_initiators_from_initiator_list failed\n");
+		goto invalid;
+	}
+
+	free_rpc_initiator_group(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_initiator_group(&req);
+}
+SPDK_RPC_REGISTER("delete_initiators_from_initiator_group",
+		  spdk_rpc_delete_initiators_from_initiator_group, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_initiator_group {
+	int32_t tag;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_initiator_group_decoders[] = {
+	{"tag", offsetof(struct rpc_delete_initiator_group, tag), spdk_json_decode_int32},
+};
+
+static void
+spdk_rpc_delete_initiator_group(struct spdk_jsonrpc_request *request,
+				const struct spdk_json_val *params)
+{
+	struct rpc_delete_initiator_group req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_init_grp *ig;
+
+	if (spdk_json_decode_object(params, rpc_delete_initiator_group_decoders,
+				    SPDK_COUNTOF(rpc_delete_initiator_group_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	ig = spdk_iscsi_init_grp_unregister(req.tag);
+	if (!ig) {
+		goto invalid;
+	}
+	spdk_iscsi_tgt_node_delete_map(NULL, ig);
+	spdk_iscsi_init_grp_destroy(ig);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("delete_initiator_group", spdk_rpc_delete_initiator_group, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_target_nodes(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_target_nodes requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	spdk_iscsi_tgt_nodes_info_json(w);
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_target_nodes", spdk_rpc_get_target_nodes, SPDK_RPC_RUNTIME)
+
+struct rpc_pg_ig_map {
+	int32_t pg_tag;
+	int32_t ig_tag;
+};
+
+static const struct spdk_json_object_decoder rpc_pg_ig_map_decoders[] = {
+	{"pg_tag", offsetof(struct rpc_pg_ig_map, pg_tag), spdk_json_decode_int32},
+	{"ig_tag", offsetof(struct rpc_pg_ig_map, ig_tag), spdk_json_decode_int32},
+};
+
+static int
+decode_rpc_pg_ig_map(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_pg_ig_map *pg_ig_map = out;
+
+	return spdk_json_decode_object(val, rpc_pg_ig_map_decoders,
+				       SPDK_COUNTOF(rpc_pg_ig_map_decoders),
+				       pg_ig_map);
+}
+
+struct rpc_pg_ig_maps {
+	size_t num_maps;
+	struct rpc_pg_ig_map maps[MAX_TARGET_MAP];
+};
+
+static int
+decode_rpc_pg_ig_maps(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_pg_ig_maps *pg_ig_maps = out;
+
+	return spdk_json_decode_array(val, decode_rpc_pg_ig_map, pg_ig_maps->maps,
+				      MAX_TARGET_MAP, &pg_ig_maps->num_maps,
+				      sizeof(struct rpc_pg_ig_map));
+}
+
+#define RPC_CONSTRUCT_TARGET_NODE_MAX_LUN	64
+
+struct rpc_lun {
+	char *bdev_name;
+	int32_t lun_id;
+};
+
+static const struct spdk_json_object_decoder rpc_lun_decoders[] = {
+	{"bdev_name", offsetof(struct rpc_lun, bdev_name), spdk_json_decode_string},
+	{"lun_id", offsetof(struct rpc_lun, lun_id), spdk_json_decode_int32},
+};
+
+static int
+decode_rpc_lun(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_lun *lun = out;
+
+	return spdk_json_decode_object(val, rpc_lun_decoders,
+				       SPDK_COUNTOF(rpc_lun_decoders), lun);
+}
+
+struct rpc_luns {
+	size_t num_luns;
+	struct rpc_lun luns[RPC_CONSTRUCT_TARGET_NODE_MAX_LUN];
+};
+
+static int
+decode_rpc_luns(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_luns *luns = out;
+
+	return spdk_json_decode_array(val, decode_rpc_lun, luns->luns,
+				      RPC_CONSTRUCT_TARGET_NODE_MAX_LUN,
+				      &luns->num_luns, sizeof(struct rpc_lun));
+}
+
+static void
+free_rpc_luns(struct rpc_luns *p)
+{
+	size_t i;
+
+	for (i = 0; i < p->num_luns; i++) {
+		free(p->luns[i].bdev_name);
+	}
+}
+
+struct rpc_target_node {
+	char *name;
+	char *alias_name;
+
+	struct rpc_pg_ig_maps pg_ig_maps;
+	struct rpc_luns luns;
+
+	int32_t queue_depth;
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int32_t chap_group;
+
+	bool header_digest;
+	bool data_digest;
+};
+
+static void
+free_rpc_target_node(struct rpc_target_node *req)
+{
+	free(req->name);
+	free(req->alias_name);
+	free_rpc_luns(&req->luns);
+}
+
+static const struct spdk_json_object_decoder rpc_target_node_decoders[] = {
+	{"name", offsetof(struct rpc_target_node, name), spdk_json_decode_string},
+	{"alias_name", offsetof(struct rpc_target_node, alias_name), spdk_json_decode_string},
+	{"pg_ig_maps", offsetof(struct rpc_target_node, pg_ig_maps), decode_rpc_pg_ig_maps},
+	{"luns", offsetof(struct rpc_target_node, luns), decode_rpc_luns},
+	{"queue_depth", offsetof(struct rpc_target_node, queue_depth), spdk_json_decode_int32},
+	{"disable_chap", offsetof(struct rpc_target_node, disable_chap), spdk_json_decode_bool, true},
+	{"require_chap", offsetof(struct rpc_target_node, require_chap), spdk_json_decode_bool, true},
+	{"mutual_chap", offsetof(struct rpc_target_node, mutual_chap), spdk_json_decode_bool, true},
+	{"chap_group", offsetof(struct rpc_target_node, chap_group), spdk_json_decode_int32, true},
+	{"header_digest", offsetof(struct rpc_target_node, header_digest), spdk_json_decode_bool, true},
+	{"data_digest", offsetof(struct rpc_target_node, data_digest), spdk_json_decode_bool, true},
+};
+
+static void
+spdk_rpc_construct_target_node(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_target_node req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_tgt_node *target;
+	int32_t pg_tags[MAX_TARGET_MAP] = {0}, ig_tags[MAX_TARGET_MAP] = {0};
+	char *bdev_names[RPC_CONSTRUCT_TARGET_NODE_MAX_LUN] = {0};
+	int32_t lun_ids[RPC_CONSTRUCT_TARGET_NODE_MAX_LUN] = {0};
+	size_t i;
+
+	if (spdk_json_decode_object(params, rpc_target_node_decoders,
+				    SPDK_COUNTOF(rpc_target_node_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	for (i = 0; i < req.pg_ig_maps.num_maps; i++) {
+		pg_tags[i] = req.pg_ig_maps.maps[i].pg_tag;
+		ig_tags[i] = req.pg_ig_maps.maps[i].ig_tag;
+	}
+
+	for (i = 0; i < req.luns.num_luns; i++) {
+		bdev_names[i] = req.luns.luns[i].bdev_name;
+		lun_ids[i] = req.luns.luns[i].lun_id;
+	}
+
+	/*
+	 * Use default parameters in a few places:
+	 *  index = -1 : automatically pick an index for the new target node
+	 *  alias = NULL
+	 */
+	target = spdk_iscsi_tgt_node_construct(-1, req.name, req.alias_name,
+					       pg_tags,
+					       ig_tags,
+					       req.pg_ig_maps.num_maps,
+					       (const char **)bdev_names,
+					       lun_ids,
+					       req.luns.num_luns,
+					       req.queue_depth,
+					       req.disable_chap,
+					       req.require_chap,
+					       req.mutual_chap,
+					       req.chap_group,
+					       req.header_digest,
+					       req.data_digest);
+
+	if (target == NULL) {
+		goto invalid;
+	}
+
+	free_rpc_target_node(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_target_node(&req);
+}
+SPDK_RPC_REGISTER("construct_target_node", spdk_rpc_construct_target_node, SPDK_RPC_RUNTIME)
+
+struct rpc_tgt_node_pg_ig_maps {
+	char *name;
+	struct rpc_pg_ig_maps pg_ig_maps;
+};
+
+static const struct spdk_json_object_decoder rpc_tgt_node_pg_ig_maps_decoders[] = {
+	{"name", offsetof(struct rpc_tgt_node_pg_ig_maps, name), spdk_json_decode_string},
+	{"pg_ig_maps", offsetof(struct rpc_tgt_node_pg_ig_maps, pg_ig_maps), decode_rpc_pg_ig_maps},
+};
+
+static void
+spdk_rpc_add_pg_ig_maps(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_tgt_node_pg_ig_maps req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_tgt_node *target;
+	int32_t pg_tags[MAX_TARGET_MAP] = {0}, ig_tags[MAX_TARGET_MAP] = {0};
+	size_t i;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_tgt_node_pg_ig_maps_decoders,
+				    SPDK_COUNTOF(rpc_tgt_node_pg_ig_maps_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	target = spdk_iscsi_find_tgt_node(req.name);
+	if (target == NULL) {
+		SPDK_ERRLOG("target is not found\n");
+		goto invalid;
+	}
+
+	for (i = 0; i < req.pg_ig_maps.num_maps; i++) {
+		pg_tags[i] = req.pg_ig_maps.maps[i].pg_tag;
+		ig_tags[i] = req.pg_ig_maps.maps[i].ig_tag;
+	}
+
+	rc = spdk_iscsi_tgt_node_add_pg_ig_maps(target, pg_tags, ig_tags,
+						req.pg_ig_maps.num_maps);
+	if (rc < 0) {
+		SPDK_ERRLOG("add pg-ig maps failed\n");
+		goto invalid;
+	}
+
+	free(req.name);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w != NULL) {
+		spdk_json_write_bool(w, true);
+		spdk_jsonrpc_end_result(request, w);
+	}
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 "Invalid parameters");
+	free(req.name);
+}
+SPDK_RPC_REGISTER("add_pg_ig_maps", spdk_rpc_add_pg_ig_maps, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_delete_pg_ig_maps(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_tgt_node_pg_ig_maps req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_tgt_node *target;
+	int32_t pg_tags[MAX_TARGET_MAP] = {0}, ig_tags[MAX_TARGET_MAP] = {0};
+	size_t i;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_tgt_node_pg_ig_maps_decoders,
+				    SPDK_COUNTOF(rpc_tgt_node_pg_ig_maps_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	target = spdk_iscsi_find_tgt_node(req.name);
+	if (target == NULL) {
+		SPDK_ERRLOG("target is not found\n");
+		goto invalid;
+	}
+
+	for (i = 0; i < req.pg_ig_maps.num_maps; i++) {
+		pg_tags[i] = req.pg_ig_maps.maps[i].pg_tag;
+		ig_tags[i] = req.pg_ig_maps.maps[i].ig_tag;
+	}
+
+	rc = spdk_iscsi_tgt_node_delete_pg_ig_maps(target, pg_tags, ig_tags,
+			req.pg_ig_maps.num_maps);
+	if (rc < 0) {
+		SPDK_ERRLOG("remove pg-ig maps failed\n");
+		goto invalid;
+	}
+
+	free(req.name);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w != NULL) {
+		spdk_json_write_bool(w, true);
+		spdk_jsonrpc_end_result(request, w);
+	}
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 "Invalid parameters");
+	free(req.name);
+}
+SPDK_RPC_REGISTER("delete_pg_ig_maps", spdk_rpc_delete_pg_ig_maps, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_target_node {
+	char *name;
+};
+
+static void
+free_rpc_delete_target_node(struct rpc_delete_target_node *r)
+{
+	free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_target_node_decoders[] = {
+	{"name", offsetof(struct rpc_delete_target_node, name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_delete_target_node(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_delete_target_node req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_delete_target_node_decoders,
+				    SPDK_COUNTOF(rpc_delete_target_node_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.name == NULL) {
+		SPDK_ERRLOG("missing name param\n");
+		goto invalid;
+	}
+
+	if (spdk_iscsi_shutdown_tgt_node_by_name(req.name)) {
+		SPDK_ERRLOG("shutdown_tgt_node_by_name failed\n");
+		goto invalid;
+	}
+
+	free_rpc_delete_target_node(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_delete_target_node(&req);
+}
+SPDK_RPC_REGISTER("delete_target_node", spdk_rpc_delete_target_node, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_portal_groups(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_portal_groups requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	spdk_iscsi_portal_grps_info_json(w);
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_portal_groups", spdk_rpc_get_portal_groups, SPDK_RPC_RUNTIME)
+
+struct rpc_portal {
+	char *host;
+	char *port;
+	char *cpumask;
+};
+
+struct rpc_portal_list {
+	size_t num_portals;
+	struct rpc_portal portals[MAX_PORTAL];
+};
+
+struct rpc_portal_group {
+	int32_t tag;
+	struct rpc_portal_list portal_list;
+};
+
+static void
+free_rpc_portal(struct rpc_portal *portal)
+{
+	free(portal->host);
+	free(portal->port);
+	free(portal->cpumask);
+}
+
+static void
+free_rpc_portal_list(struct rpc_portal_list *pl)
+{
+	size_t i;
+
+	for (i = 0; i < pl->num_portals; i++) {
+		free_rpc_portal(&pl->portals[i]);
+	}
+	pl->num_portals = 0;
+}
+
+static void
+free_rpc_portal_group(struct rpc_portal_group *pg)
+{
+	free_rpc_portal_list(&pg->portal_list);
+}
+
+static const struct spdk_json_object_decoder rpc_portal_decoders[] = {
+	{"host", offsetof(struct rpc_portal, host), spdk_json_decode_string},
+	{"port", offsetof(struct rpc_portal, port), spdk_json_decode_string},
+	{"cpumask", offsetof(struct rpc_portal, cpumask), spdk_json_decode_string, true},
+};
+
+static int
+decode_rpc_portal(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_portal *portal = out;
+
+	return spdk_json_decode_object(val, rpc_portal_decoders,
+				       SPDK_COUNTOF(rpc_portal_decoders),
+				       portal);
+}
+
+static int
+decode_rpc_portal_list(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_portal_list *list = out;
+
+	return spdk_json_decode_array(val, decode_rpc_portal, list->portals, MAX_PORTAL, &list->num_portals,
+				      sizeof(struct rpc_portal));
+}
+
+static const struct spdk_json_object_decoder rpc_portal_group_decoders[] = {
+	{"tag", offsetof(struct rpc_portal_group, tag), spdk_json_decode_int32},
+	{"portals", offsetof(struct rpc_portal_group, portal_list), decode_rpc_portal_list},
+};
+
+static void
+spdk_rpc_add_portal_group(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_portal_group req = {};
+	struct spdk_iscsi_portal_grp *pg = NULL;
+	struct spdk_iscsi_portal *portal;
+	struct spdk_json_write_ctx *w;
+	size_t i = 0;
+	int rc = -1;
+
+	if (spdk_json_decode_object(params, rpc_portal_group_decoders,
+				    SPDK_COUNTOF(rpc_portal_group_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto out;
+	}
+
+	pg = spdk_iscsi_portal_grp_create(req.tag);
+	if (pg == NULL) {
+		SPDK_ERRLOG("portal_grp_create failed\n");
+		goto out;
+	}
+	for (i = 0; i < req.portal_list.num_portals; i++) {
+		portal = spdk_iscsi_portal_create(req.portal_list.portals[i].host,
+						  req.portal_list.portals[i].port,
+						  req.portal_list.portals[i].cpumask);
+		if (portal == NULL) {
+			SPDK_ERRLOG("portal_create failed\n");
+			goto out;
+		}
+		spdk_iscsi_portal_grp_add_portal(pg, portal);
+	}
+
+	rc = spdk_iscsi_portal_grp_open(pg);
+	if (rc != 0) {
+		SPDK_ERRLOG("portal_grp_open failed\n");
+		goto out;
+	}
+
+	rc = spdk_iscsi_portal_grp_register(pg);
+	if (rc != 0) {
+		SPDK_ERRLOG("portal_grp_register failed\n");
+	}
+
+out:
+	if (rc == 0) {
+		w = spdk_jsonrpc_begin_result(request);
+		if (w != NULL) {
+			spdk_json_write_bool(w, true);
+			spdk_jsonrpc_end_result(request, w);
+		}
+	} else {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+		if (pg != NULL) {
+			spdk_iscsi_portal_grp_release(pg);
+		}
+	}
+	free_rpc_portal_group(&req);
+}
+SPDK_RPC_REGISTER("add_portal_group", spdk_rpc_add_portal_group, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_portal_group {
+	int32_t tag;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_portal_group_decoders[] = {
+	{"tag", offsetof(struct rpc_delete_portal_group, tag), spdk_json_decode_int32},
+};
+
+static void
+spdk_rpc_delete_portal_group(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_delete_portal_group req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_portal_grp *pg;
+
+	if (spdk_json_decode_object(params, rpc_delete_portal_group_decoders,
+				    SPDK_COUNTOF(rpc_delete_portal_group_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	pg = spdk_iscsi_portal_grp_unregister(req.tag);
+	if (!pg) {
+		goto invalid;
+	}
+
+	spdk_iscsi_tgt_node_delete_map(pg, NULL);
+	spdk_iscsi_portal_grp_release(pg);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("delete_portal_group", spdk_rpc_delete_portal_group, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_iscsi_connections(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_conn *conns = g_conns_array;
+	int i;
+	uint16_t tsih;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_iscsi_connections requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	for (i = 0; i < MAX_ISCSI_CONNECTIONS; i++) {
+		struct spdk_iscsi_conn *c = &conns[i];
+
+		if (!c->is_valid) {
+			continue;
+		}
+
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "id");
+		spdk_json_write_int32(w, c->id);
+
+		spdk_json_write_name(w, "cid");
+		spdk_json_write_int32(w, c->cid);
+
+		/*
+		 * If we try to return data for a connection that has not
+		 *  logged in yet, the session will not be set.  So in this
+		 *  case, return -1 for the tsih rather than segfaulting
+		 *  on the null c->sess.
+		 */
+		if (c->sess == NULL) {
+			tsih = -1;
+		} else {
+			tsih = c->sess->tsih;
+		}
+		spdk_json_write_name(w, "tsih");
+		spdk_json_write_int32(w, tsih);
+
+		spdk_json_write_name(w, "lcore_id");
+		spdk_json_write_int32(w, c->lcore);
+
+		spdk_json_write_name(w, "initiator_addr");
+		spdk_json_write_string(w, c->initiator_addr);
+
+		spdk_json_write_name(w, "target_addr");
+		spdk_json_write_string(w, c->target_addr);
+
+		spdk_json_write_name(w, "target_node_name");
+		spdk_json_write_string(w, c->target_short_name);
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_iscsi_connections", spdk_rpc_get_iscsi_connections, SPDK_RPC_RUNTIME)
+
+struct rpc_target_lun {
+	char *name;
+	char *bdev_name;
+	int32_t lun_id;
+};
+
+static void
+free_rpc_target_lun(struct rpc_target_lun *req)
+{
+	free(req->name);
+	free(req->bdev_name);
+}
+
+static const struct spdk_json_object_decoder rpc_target_lun_decoders[] = {
+	{"name", offsetof(struct rpc_target_lun, name), spdk_json_decode_string},
+	{"bdev_name", offsetof(struct rpc_target_lun, bdev_name), spdk_json_decode_string},
+	{"lun_id", offsetof(struct rpc_target_lun, lun_id), spdk_json_decode_int32, true},
+};
+
+static void
+spdk_rpc_target_node_add_lun(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_target_lun req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_tgt_node *target;
+	int rc;
+
+	req.lun_id = -1;
+
+	if (spdk_json_decode_object(params, rpc_target_lun_decoders,
+				    SPDK_COUNTOF(rpc_target_lun_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	target = spdk_iscsi_find_tgt_node(req.name);
+	if (target == NULL) {
+		SPDK_ERRLOG("target is not found\n");
+		goto invalid;
+	}
+
+	rc = spdk_iscsi_tgt_node_add_lun(target, req.bdev_name, req.lun_id);
+	if (rc < 0) {
+		SPDK_ERRLOG("add lun failed\n");
+		goto invalid;
+	}
+
+	free_rpc_target_lun(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 "Invalid parameters");
+	free_rpc_target_lun(&req);
+}
+SPDK_RPC_REGISTER("target_node_add_lun", spdk_rpc_target_node_add_lun, SPDK_RPC_RUNTIME)
+
+struct rpc_target_auth {
+	char *name;
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int32_t chap_group;
+};
+
+static void
+free_rpc_target_auth(struct rpc_target_auth *req)
+{
+	free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_target_auth_decoders[] = {
+	{"name", offsetof(struct rpc_target_auth, name), spdk_json_decode_string},
+	{"disable_chap", offsetof(struct rpc_target_auth, disable_chap), spdk_json_decode_bool, true},
+	{"require_chap", offsetof(struct rpc_target_auth, require_chap), spdk_json_decode_bool, true},
+	{"mutual_chap", offsetof(struct rpc_target_auth, mutual_chap), spdk_json_decode_bool, true},
+	{"chap_group", offsetof(struct rpc_target_auth, chap_group), spdk_json_decode_int32, true},
+};
+
+static void
+spdk_rpc_set_iscsi_target_node_auth(struct spdk_jsonrpc_request *request,
+				    const struct spdk_json_val *params)
+{
+	struct rpc_target_auth req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_tgt_node *target;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_target_auth_decoders,
+				    SPDK_COUNTOF(rpc_target_auth_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		return;
+	}
+
+	target = spdk_iscsi_find_tgt_node(req.name);
+	if (target == NULL) {
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not find target %s", req.name);
+		free_rpc_target_auth(&req);
+		return;
+	}
+
+	rc = spdk_iscsi_tgt_node_set_chap_params(target, req.disable_chap, req.require_chap,
+			req.mutual_chap, req.chap_group);
+	if (rc < 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid combination of auth params");
+		free_rpc_target_auth(&req);
+		return;
+	}
+
+	free_rpc_target_auth(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_iscsi_target_node_auth", spdk_rpc_set_iscsi_target_node_auth,
+		  SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_iscsi_global_params(struct spdk_jsonrpc_request *request,
+				 const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_iscsi_global_params requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_iscsi_opts_info_json(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_iscsi_global_params", spdk_rpc_get_iscsi_global_params, SPDK_RPC_RUNTIME)
+
+struct rpc_discovery_auth {
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int32_t chap_group;
+};
+
+static const struct spdk_json_object_decoder rpc_discovery_auth_decoders[] = {
+	{"disable_chap", offsetof(struct rpc_discovery_auth, disable_chap), spdk_json_decode_bool, true},
+	{"require_chap", offsetof(struct rpc_discovery_auth, require_chap), spdk_json_decode_bool, true},
+	{"mutual_chap", offsetof(struct rpc_discovery_auth, mutual_chap), spdk_json_decode_bool, true},
+	{"chap_group", offsetof(struct rpc_discovery_auth, chap_group), spdk_json_decode_int32, true},
+};
+
+static void
+spdk_rpc_set_iscsi_discovery_auth(struct spdk_jsonrpc_request *request,
+				  const struct spdk_json_val *params)
+{
+	struct rpc_discovery_auth req = {};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_discovery_auth_decoders,
+				    SPDK_COUNTOF(rpc_discovery_auth_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		return;
+	}
+
+	rc = spdk_iscsi_set_discovery_auth(req.disable_chap, req.require_chap,
+					   req.mutual_chap, req.chap_group);
+	if (rc < 0) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid combination of CHAP params");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("set_iscsi_discovery_auth", spdk_rpc_set_iscsi_discovery_auth, SPDK_RPC_RUNTIME)
+
+
+#define MAX_AUTH_SECRETS	64
+
+struct rpc_auth_secret {
+	char *user;
+	char *secret;
+	char *muser;
+	char *msecret;
+};
+
+static void
+free_rpc_auth_secret(struct rpc_auth_secret *_secret)
+{
+	free(_secret->user);
+	free(_secret->secret);
+	free(_secret->muser);
+	free(_secret->msecret);
+}
+
+static const struct spdk_json_object_decoder rpc_auth_secret_decoders[] = {
+	{"user", offsetof(struct rpc_auth_secret, user), spdk_json_decode_string},
+	{"secret", offsetof(struct rpc_auth_secret, secret), spdk_json_decode_string},
+	{"muser", offsetof(struct rpc_auth_secret, muser), spdk_json_decode_string, true},
+	{"msecret", offsetof(struct rpc_auth_secret, msecret), spdk_json_decode_string, true},
+};
+
+static int
+decode_rpc_auth_secret(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_auth_secret *_secret = out;
+
+	return spdk_json_decode_object(val, rpc_auth_secret_decoders,
+				       SPDK_COUNTOF(rpc_auth_secret_decoders), _secret);
+}
+
+struct rpc_auth_secrets {
+	size_t num_secret;
+	struct rpc_auth_secret secrets[MAX_AUTH_SECRETS];
+};
+
+static void
+free_rpc_auth_secrets(struct rpc_auth_secrets *secrets)
+{
+	size_t i;
+
+	for (i = 0; i < secrets->num_secret; i++) {
+		free_rpc_auth_secret(&secrets->secrets[i]);
+	}
+}
+
+static int
+decode_rpc_auth_secrets(const struct spdk_json_val *val, void *out)
+{
+	struct rpc_auth_secrets *secrets = out;
+
+	return spdk_json_decode_array(val, decode_rpc_auth_secret, secrets->secrets,
+				      MAX_AUTH_SECRETS, &secrets->num_secret,
+				      sizeof(struct rpc_auth_secret));
+}
+
+struct rpc_auth_group {
+	int32_t tag;
+	struct rpc_auth_secrets secrets;
+};
+
+static void
+free_rpc_auth_group(struct rpc_auth_group *group)
+{
+	free_rpc_auth_secrets(&group->secrets);
+}
+
+static const struct spdk_json_object_decoder rpc_auth_group_decoders[] = {
+	{"tag", offsetof(struct rpc_auth_group, tag), spdk_json_decode_int32},
+	{"secrets", offsetof(struct rpc_auth_group, secrets), decode_rpc_auth_secrets, true},
+};
+
+static void
+spdk_rpc_add_iscsi_auth_group(struct spdk_jsonrpc_request *request,
+			      const struct spdk_json_val *params)
+{
+	struct rpc_auth_group req = {};
+	struct rpc_auth_secret *_secret;
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_auth_group *group = NULL;
+	int rc;
+	size_t i;
+
+	if (spdk_json_decode_object(params, rpc_auth_group_decoders,
+				    SPDK_COUNTOF(rpc_auth_group_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		free_rpc_auth_group(&req);
+		return;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	rc = spdk_iscsi_add_auth_group(req.tag, &group);
+	if (rc != 0) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not add auth group (%d), %s",
+						     req.tag, spdk_strerror(-rc));
+		free_rpc_auth_group(&req);
+		return;
+	}
+
+	for (i = 0; i < req.secrets.num_secret; i++) {
+		_secret = &req.secrets.secrets[i];
+		rc = spdk_iscsi_auth_group_add_secret(group, _secret->user, _secret->secret,
+						      _secret->muser, _secret->msecret);
+		if (rc != 0) {
+			spdk_iscsi_delete_auth_group(group);
+			pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+			spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							     "Could not add secret to auth group (%d), %s",
+							     req.tag, spdk_strerror(-rc));
+			free_rpc_auth_group(&req);
+			return;
+		}
+	}
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	free_rpc_auth_group(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("add_iscsi_auth_group", spdk_rpc_add_iscsi_auth_group, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_auth_group {
+	int32_t tag;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_auth_group_decoders[] = {
+	{"tag", offsetof(struct rpc_delete_auth_group, tag), spdk_json_decode_int32},
+};
+
+static void
+spdk_rpc_delete_iscsi_auth_group(struct spdk_jsonrpc_request *request,
+				 const struct spdk_json_val *params)
+{
+	struct rpc_delete_auth_group req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_auth_group *group;
+
+	if (spdk_json_decode_object(params, rpc_delete_auth_group_decoders,
+				    SPDK_COUNTOF(rpc_delete_auth_group_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		return;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	group = spdk_iscsi_find_auth_group_by_tag(req.tag);
+	if (group == NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not find auth group (%d)", req.tag);
+		return;
+	}
+
+	spdk_iscsi_delete_auth_group(group);
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("delete_iscsi_auth_group", spdk_rpc_delete_iscsi_auth_group, SPDK_RPC_RUNTIME)
+
+struct rpc_add_auth_secret {
+	int32_t tag;
+	char *user;
+	char *secret;
+	char *muser;
+	char *msecret;
+};
+
+static void
+free_rpc_add_auth_secret(struct rpc_add_auth_secret *_secret)
+{
+	free(_secret->user);
+	free(_secret->secret);
+	free(_secret->muser);
+	free(_secret->msecret);
+}
+
+static const struct spdk_json_object_decoder rpc_add_auth_secret_decoders[] = {
+	{"tag", offsetof(struct rpc_add_auth_secret, tag), spdk_json_decode_int32},
+	{"user", offsetof(struct rpc_add_auth_secret, user), spdk_json_decode_string},
+	{"secret", offsetof(struct rpc_add_auth_secret, secret), spdk_json_decode_string},
+	{"muser", offsetof(struct rpc_add_auth_secret, muser), spdk_json_decode_string, true},
+	{"msecret", offsetof(struct rpc_add_auth_secret, msecret), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_add_secret_to_iscsi_auth_group(struct spdk_jsonrpc_request *request,
+					const struct spdk_json_val *params)
+{
+	struct rpc_add_auth_secret req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_auth_group *group;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_add_auth_secret_decoders,
+				    SPDK_COUNTOF(rpc_add_auth_secret_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		free_rpc_add_auth_secret(&req);
+		return;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	group = spdk_iscsi_find_auth_group_by_tag(req.tag);
+	if (group == NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not find auth group (%d)", req.tag);
+		free_rpc_add_auth_secret(&req);
+		return;
+	}
+
+	rc = spdk_iscsi_auth_group_add_secret(group, req.user, req.secret, req.muser, req.msecret);
+	if (rc != 0) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not add secret to auth group (%d), %s",
+						     req.tag, spdk_strerror(-rc));
+		free_rpc_add_auth_secret(&req);
+		return;
+	}
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	free_rpc_add_auth_secret(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("add_secret_to_iscsi_auth_group", spdk_rpc_add_secret_to_iscsi_auth_group,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_delete_auth_secret {
+	int32_t tag;
+	char *user;
+};
+
+static void
+free_rpc_delete_auth_secret(struct rpc_delete_auth_secret *_secret)
+{
+	free(_secret->user);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_auth_secret_decoders[] = {
+	{"tag", offsetof(struct rpc_delete_auth_secret, tag), spdk_json_decode_int32},
+	{"user", offsetof(struct rpc_delete_auth_secret, user), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_delete_secret_from_iscsi_auth_group(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_delete_auth_secret req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_iscsi_auth_group *group;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_delete_auth_secret_decoders,
+				    SPDK_COUNTOF(rpc_delete_auth_secret_decoders), &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		free_rpc_delete_auth_secret(&req);
+		return;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	group = spdk_iscsi_find_auth_group_by_tag(req.tag);
+	if (group == NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not find auth group (%d)", req.tag);
+		free_rpc_delete_auth_secret(&req);
+		return;
+	}
+
+	rc = spdk_iscsi_auth_group_delete_secret(group, req.user);
+	if (rc != 0) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						     "Could not delete secret from CHAP group (%d), %s",
+						     req.tag, spdk_strerror(-rc));
+		free_rpc_delete_auth_secret(&req);
+		return;
+	}
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	free_rpc_delete_auth_secret(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("delete_secret_from_iscsi_auth_group",
+		  spdk_rpc_delete_secret_from_iscsi_auth_group, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_iscsi_auth_groups(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_iscsi_auth_groups requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	spdk_iscsi_auth_groups_info_json(w);
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_iscsi_auth_groups", spdk_rpc_get_iscsi_auth_groups, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/iscsi/iscsi_subsystem.c b/src/spdk/lib/iscsi/iscsi_subsystem.c
new file mode 100644
index 00000000..6cfa4f93
--- /dev/null
+++ b/src/spdk/lib/iscsi/iscsi_subsystem.c
@@ -0,0 +1,1523 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/env.h"
+#include "spdk/string.h"
+#include "spdk/sock.h"
+#include "spdk/likely.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/init_grp.h"
+#include "iscsi/portal_grp.h"
+#include "iscsi/conn.h"
+#include "iscsi/task.h"
+
+#include "spdk_internal/event.h"
+#include "spdk_internal/log.h"
+
+struct spdk_iscsi_opts *g_spdk_iscsi_opts = NULL;
+
+static spdk_iscsi_init_cb g_init_cb_fn = NULL;
+static void *g_init_cb_arg = NULL;
+
+static spdk_iscsi_fini_cb g_fini_cb_fn;
+static void *g_fini_cb_arg;
+
+#define ISCSI_CONFIG_TMPL \
+"[iSCSI]\n" \
+"  # node name (not include optional part)\n" \
+"  # Users can optionally change this to fit their environment.\n" \
+"  NodeBase \"%s\"\n" \
+"\n" \
+"  # files\n" \
+"  %s %s\n" \
+"\n" \
+"  # socket I/O timeout sec. (polling is infinity)\n" \
+"  Timeout %d\n" \
+"\n" \
+"  # authentication information for discovery session\n" \
+"  DiscoveryAuthMethod %s\n" \
+"  DiscoveryAuthGroup %s\n" \
+"\n" \
+"  MaxSessions %d\n" \
+"  MaxConnectionsPerSession %d\n" \
+"  MaxConnections %d\n" \
+"  MaxQueueDepth %d\n" \
+"\n" \
+"  # iSCSI initial parameters negotiate with initiators\n" \
+"  # NOTE: incorrect values might crash\n" \
+"  DefaultTime2Wait %d\n" \
+"  DefaultTime2Retain %d\n" \
+"\n" \
+"  FirstBurstLength %d\n" \
+"  ImmediateData %s\n" \
+"  ErrorRecoveryLevel %d\n" \
+"\n"
+
+static void
+spdk_iscsi_globals_config_text(FILE *fp)
+{
+	const char *authmethod = "None";
+	char authgroup[32] = "None";
+
+	if (NULL == fp) {
+		return;
+	}
+
+	if (g_spdk_iscsi.require_chap) {
+		authmethod = "CHAP";
+	} else if (g_spdk_iscsi.mutual_chap) {
+		authmethod = "CHAP Mutual";
+	} else if (!g_spdk_iscsi.disable_chap) {
+		authmethod = "Auto";
+	}
+
+	if (g_spdk_iscsi.chap_group) {
+		snprintf(authgroup, sizeof(authgroup), "AuthGroup%d", g_spdk_iscsi.chap_group);
+	}
+
+	fprintf(fp, ISCSI_CONFIG_TMPL,
+		g_spdk_iscsi.nodebase,
+		g_spdk_iscsi.authfile ? "AuthFile" : "",
+		g_spdk_iscsi.authfile ? g_spdk_iscsi.authfile : "",
+		g_spdk_iscsi.timeout, authmethod, authgroup,
+		g_spdk_iscsi.MaxSessions, g_spdk_iscsi.MaxConnectionsPerSession,
+		g_spdk_iscsi.MaxConnections,
+		g_spdk_iscsi.MaxQueueDepth,
+		g_spdk_iscsi.DefaultTime2Wait, g_spdk_iscsi.DefaultTime2Retain,
+		g_spdk_iscsi.FirstBurstLength,
+		(g_spdk_iscsi.ImmediateData) ? "Yes" : "No",
+		g_spdk_iscsi.ErrorRecoveryLevel);
+}
+
+static void
+spdk_mobj_ctor(struct spdk_mempool *mp, __attribute__((unused)) void *arg,
+	       void *_m, __attribute__((unused)) unsigned i)
+{
+	struct spdk_mobj *m = _m;
+	uint64_t *phys_addr;
+	ptrdiff_t off;
+
+	m->mp = mp;
+	m->buf = (uint8_t *)m + sizeof(struct spdk_mobj);
+	m->buf = (void *)((unsigned long)((uint8_t *)m->buf + 512) & ~511UL);
+	off = (uint64_t)(uint8_t *)m->buf - (uint64_t)(uint8_t *)m;
+
+	/*
+	 * we store the physical address in a 64bit unsigned integer
+	 * right before the 512B aligned buffer area.
+	 */
+	phys_addr = (uint64_t *)m->buf - 1;
+	*phys_addr = spdk_vtophys(m) + off;
+}
+
+#define NUM_PDU_PER_CONNECTION(iscsi)	(2 * (iscsi->MaxQueueDepth + MAX_LARGE_DATAIN_PER_CONNECTION + 8))
+#define PDU_POOL_SIZE(iscsi)	(iscsi->MaxConnections * NUM_PDU_PER_CONNECTION(iscsi))
+#define IMMEDIATE_DATA_POOL_SIZE(iscsi)	(iscsi->MaxConnections * 128)
+#define DATA_OUT_POOL_SIZE(iscsi)	(iscsi->MaxConnections * MAX_DATA_OUT_PER_CONNECTION)
+
+static int spdk_iscsi_initialize_pdu_pool(void)
+{
+	struct spdk_iscsi_globals *iscsi = &g_spdk_iscsi;
+	int imm_mobj_size = spdk_get_immediate_data_buffer_size() +
+			    sizeof(struct spdk_mobj) + 512;
+	int dout_mobj_size = spdk_get_data_out_buffer_size() +
+			     sizeof(struct spdk_mobj) + 512;
+
+	/* create PDU pool */
+	iscsi->pdu_pool = spdk_mempool_create("PDU_Pool",
+					      PDU_POOL_SIZE(iscsi),
+					      sizeof(struct spdk_iscsi_pdu),
+					      256, SPDK_ENV_SOCKET_ID_ANY);
+	if (!iscsi->pdu_pool) {
+		SPDK_ERRLOG("create PDU pool failed\n");
+		return -1;
+	}
+
+	iscsi->pdu_immediate_data_pool = spdk_mempool_create_ctor("PDU_immediate_data_Pool",
+					 IMMEDIATE_DATA_POOL_SIZE(iscsi),
+					 imm_mobj_size, 0,
+					 spdk_env_get_socket_id(spdk_env_get_current_core()),
+					 spdk_mobj_ctor, NULL);
+	if (!iscsi->pdu_immediate_data_pool) {
+		SPDK_ERRLOG("create PDU immediate data pool failed\n");
+		return -1;
+	}
+
+	iscsi->pdu_data_out_pool = spdk_mempool_create_ctor("PDU_data_out_Pool",
+				   DATA_OUT_POOL_SIZE(iscsi),
+				   dout_mobj_size, 256,
+				   spdk_env_get_socket_id(spdk_env_get_current_core()),
+				   spdk_mobj_ctor, NULL);
+	if (!iscsi->pdu_data_out_pool) {
+		SPDK_ERRLOG("create PDU data out pool failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void spdk_iscsi_sess_ctor(struct spdk_mempool *pool, void *arg,
+				 void *session_buf, unsigned index)
+{
+	struct spdk_iscsi_globals		*iscsi = arg;
+	struct spdk_iscsi_sess	*sess = session_buf;
+
+	iscsi->session[index] = sess;
+
+	/* tsih 0 is reserved, so start tsih values at 1. */
+	sess->tsih = index + 1;
+}
+
+#define DEFAULT_TASK_POOL_SIZE 32768
+
+static int
+spdk_iscsi_initialize_task_pool(void)
+{
+	struct spdk_iscsi_globals *iscsi = &g_spdk_iscsi;
+
+	/* create scsi_task pool */
+	iscsi->task_pool = spdk_mempool_create("SCSI_TASK_Pool",
+					       DEFAULT_TASK_POOL_SIZE,
+					       sizeof(struct spdk_iscsi_task),
+					       128, SPDK_ENV_SOCKET_ID_ANY);
+	if (!iscsi->task_pool) {
+		SPDK_ERRLOG("create task pool failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#define SESSION_POOL_SIZE(iscsi)	(iscsi->MaxSessions)
+static int spdk_iscsi_initialize_session_pool(void)
+{
+	struct spdk_iscsi_globals *iscsi = &g_spdk_iscsi;
+
+	iscsi->session_pool = spdk_mempool_create_ctor("Session_Pool",
+			      SESSION_POOL_SIZE(iscsi),
+			      sizeof(struct spdk_iscsi_sess), 0,
+			      SPDK_ENV_SOCKET_ID_ANY,
+			      spdk_iscsi_sess_ctor, iscsi);
+	if (!iscsi->session_pool) {
+		SPDK_ERRLOG("create session pool failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_iscsi_initialize_all_pools(void)
+{
+	if (spdk_iscsi_initialize_pdu_pool() != 0) {
+		return -1;
+	}
+
+	if (spdk_iscsi_initialize_session_pool() != 0) {
+		return -1;
+	}
+
+	if (spdk_iscsi_initialize_task_pool() != 0) {
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+spdk_iscsi_check_pool(struct spdk_mempool *pool, size_t count)
+{
+	if (spdk_mempool_count(pool) != count) {
+		SPDK_ERRLOG("spdk_mempool_count(%s) == %zu, should be %zu\n",
+			    spdk_mempool_get_name(pool), spdk_mempool_count(pool), count);
+	}
+}
+
+static void
+spdk_iscsi_check_pools(void)
+{
+	struct spdk_iscsi_globals *iscsi = &g_spdk_iscsi;
+
+	spdk_iscsi_check_pool(iscsi->pdu_pool, PDU_POOL_SIZE(iscsi));
+	spdk_iscsi_check_pool(iscsi->session_pool, SESSION_POOL_SIZE(iscsi));
+	spdk_iscsi_check_pool(iscsi->pdu_immediate_data_pool, IMMEDIATE_DATA_POOL_SIZE(iscsi));
+	spdk_iscsi_check_pool(iscsi->pdu_data_out_pool, DATA_OUT_POOL_SIZE(iscsi));
+	spdk_iscsi_check_pool(iscsi->task_pool, DEFAULT_TASK_POOL_SIZE);
+}
+
+static void
+spdk_iscsi_free_pools(void)
+{
+	struct spdk_iscsi_globals *iscsi = &g_spdk_iscsi;
+
+	spdk_mempool_free(iscsi->pdu_pool);
+	spdk_mempool_free(iscsi->session_pool);
+	spdk_mempool_free(iscsi->pdu_immediate_data_pool);
+	spdk_mempool_free(iscsi->pdu_data_out_pool);
+	spdk_mempool_free(iscsi->task_pool);
+}
+
+void spdk_put_pdu(struct spdk_iscsi_pdu *pdu)
+{
+	if (!pdu) {
+		return;
+	}
+
+	pdu->ref--;
+
+	if (pdu->ref < 0) {
+		SPDK_ERRLOG("Negative PDU refcount: %p\n", pdu);
+		pdu->ref = 0;
+	}
+
+	if (pdu->ref == 0) {
+		if (pdu->mobj) {
+			spdk_mempool_put(pdu->mobj->mp, (void *)pdu->mobj);
+		}
+
+		if (pdu->data && !pdu->data_from_mempool) {
+			free(pdu->data);
+		}
+
+		spdk_mempool_put(g_spdk_iscsi.pdu_pool, (void *)pdu);
+	}
+}
+
+struct spdk_iscsi_pdu *spdk_get_pdu(void)
+{
+	struct spdk_iscsi_pdu *pdu;
+
+	pdu = spdk_mempool_get(g_spdk_iscsi.pdu_pool);
+	if (!pdu) {
+		SPDK_ERRLOG("Unable to get PDU\n");
+		abort();
+	}
+
+	/* we do not want to zero out the last part of the structure reserved for AHS and sense data */
+	memset(pdu, 0, offsetof(struct spdk_iscsi_pdu, ahs));
+	pdu->ref = 1;
+
+	return pdu;
+}
+
+static void
+spdk_iscsi_log_globals(void)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthFile %s\n",
+		      g_spdk_iscsi.authfile ? g_spdk_iscsi.authfile : "(none)");
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "NodeBase %s\n", g_spdk_iscsi.nodebase);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "MaxSessions %d\n", g_spdk_iscsi.MaxSessions);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "MaxConnectionsPerSession %d\n",
+		      g_spdk_iscsi.MaxConnectionsPerSession);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "MaxQueueDepth %d\n", g_spdk_iscsi.MaxQueueDepth);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "DefaultTime2Wait %d\n",
+		      g_spdk_iscsi.DefaultTime2Wait);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "DefaultTime2Retain %d\n",
+		      g_spdk_iscsi.DefaultTime2Retain);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "FirstBurstLength %d\n",
+		      g_spdk_iscsi.FirstBurstLength);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "ImmediateData %s\n",
+		      g_spdk_iscsi.ImmediateData ? "Yes" : "No");
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AllowDuplicateIsid %s\n",
+		      g_spdk_iscsi.AllowDuplicateIsid ? "Yes" : "No");
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "ErrorRecoveryLevel %d\n",
+		      g_spdk_iscsi.ErrorRecoveryLevel);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Timeout %d\n", g_spdk_iscsi.timeout);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "NopInInterval %d\n",
+		      g_spdk_iscsi.nopininterval);
+	if (g_spdk_iscsi.disable_chap) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "DiscoveryAuthMethod None\n");
+	} else if (!g_spdk_iscsi.require_chap) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "DiscoveryAuthMethod Auto\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "DiscoveryAuthMethod %s %s\n",
+			      g_spdk_iscsi.require_chap ? "CHAP" : "",
+			      g_spdk_iscsi.mutual_chap ? "Mutual" : "");
+	}
+
+	if (g_spdk_iscsi.chap_group == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "DiscoveryAuthGroup None\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "DiscoveryAuthGroup AuthGroup%d\n",
+			      g_spdk_iscsi.chap_group);
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "MinConnectionsPerCore%d\n",
+		      spdk_iscsi_conn_get_min_per_core());
+}
+
+static void
+spdk_iscsi_opts_init(struct spdk_iscsi_opts *opts)
+{
+	opts->MaxSessions = DEFAULT_MAX_SESSIONS;
+	opts->MaxConnectionsPerSession = DEFAULT_MAX_CONNECTIONS_PER_SESSION;
+	opts->MaxQueueDepth = DEFAULT_MAX_QUEUE_DEPTH;
+	opts->DefaultTime2Wait = DEFAULT_DEFAULTTIME2WAIT;
+	opts->DefaultTime2Retain = DEFAULT_DEFAULTTIME2RETAIN;
+	opts->FirstBurstLength = DEFAULT_FIRSTBURSTLENGTH;
+	opts->ImmediateData = DEFAULT_IMMEDIATEDATA;
+	opts->AllowDuplicateIsid = false;
+	opts->ErrorRecoveryLevel = DEFAULT_ERRORRECOVERYLEVEL;
+	opts->timeout = DEFAULT_TIMEOUT;
+	opts->nopininterval = DEFAULT_NOPININTERVAL;
+	opts->disable_chap = false;
+	opts->require_chap = false;
+	opts->mutual_chap = false;
+	opts->chap_group = 0;
+	opts->authfile = NULL;
+	opts->nodebase = NULL;
+	opts->min_connections_per_core = DEFAULT_CONNECTIONS_PER_LCORE;
+}
+
+struct spdk_iscsi_opts *
+spdk_iscsi_opts_alloc(void)
+{
+	struct spdk_iscsi_opts *opts;
+
+	opts = calloc(1, sizeof(*opts));
+	if (!opts) {
+		SPDK_ERRLOG("calloc() failed for iscsi options\n");
+		return NULL;
+	}
+
+	spdk_iscsi_opts_init(opts);
+
+	return opts;
+}
+
+void
+spdk_iscsi_opts_free(struct spdk_iscsi_opts *opts)
+{
+	free(opts->authfile);
+	free(opts->nodebase);
+	free(opts);
+}
+
+/* Deep copy of spdk_iscsi_opts */
+struct spdk_iscsi_opts *
+spdk_iscsi_opts_copy(struct spdk_iscsi_opts *src)
+{
+	struct spdk_iscsi_opts *dst;
+
+	dst = calloc(1, sizeof(*dst));
+	if (!dst) {
+		SPDK_ERRLOG("calloc() failed for iscsi options\n");
+		return NULL;
+	}
+
+	if (src->authfile) {
+		dst->authfile = strdup(src->authfile);
+		if (!dst->authfile) {
+			free(dst);
+			SPDK_ERRLOG("failed to strdup for auth file %s\n", src->authfile);
+			return NULL;
+		}
+	}
+
+	if (src->nodebase) {
+		dst->nodebase = strdup(src->nodebase);
+		if (!dst->nodebase) {
+			free(dst->authfile);
+			free(dst);
+			SPDK_ERRLOG("failed to strdup for nodebase %s\n", src->nodebase);
+			return NULL;
+		}
+	}
+
+	dst->MaxSessions = src->MaxSessions;
+	dst->MaxConnectionsPerSession = src->MaxConnectionsPerSession;
+	dst->MaxQueueDepth = src->MaxQueueDepth;
+	dst->DefaultTime2Wait = src->DefaultTime2Wait;
+	dst->DefaultTime2Retain = src->DefaultTime2Retain;
+	dst->FirstBurstLength = src->FirstBurstLength;
+	dst->ImmediateData = src->ImmediateData;
+	dst->AllowDuplicateIsid = src->AllowDuplicateIsid;
+	dst->ErrorRecoveryLevel = src->ErrorRecoveryLevel;
+	dst->timeout = src->timeout;
+	dst->nopininterval = src->nopininterval;
+	dst->disable_chap = src->disable_chap;
+	dst->require_chap = src->require_chap;
+	dst->mutual_chap = src->mutual_chap;
+	dst->chap_group = src->chap_group;
+	dst->min_connections_per_core = src->min_connections_per_core;
+
+	return dst;
+}
+
+static int
+spdk_iscsi_read_config_file_params(struct spdk_conf_section *sp,
+				   struct spdk_iscsi_opts *opts)
+{
+	const char *val;
+	int MaxSessions;
+	int MaxConnectionsPerSession;
+	int MaxQueueDepth;
+	int DefaultTime2Wait;
+	int DefaultTime2Retain;
+	int FirstBurstLength;
+	int ErrorRecoveryLevel;
+	int timeout;
+	int nopininterval;
+	int min_conn_per_core = 0;
+	const char *ag_tag;
+	int ag_tag_i;
+	int i;
+
+	val = spdk_conf_section_get_val(sp, "Comment");
+	if (val != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Comment %s\n", val);
+	}
+
+	val = spdk_conf_section_get_val(sp, "AuthFile");
+	if (val != NULL) {
+		opts->authfile = strdup(val);
+		if (!opts->authfile) {
+			SPDK_ERRLOG("strdup() failed for AuthFile\n");
+			return -ENOMEM;
+		}
+	}
+
+	val = spdk_conf_section_get_val(sp, "NodeBase");
+	if (val != NULL) {
+		opts->nodebase = strdup(val);
+		if (!opts->nodebase) {
+			free(opts->authfile);
+			SPDK_ERRLOG("strdup() failed for NodeBase\n");
+			return -ENOMEM;
+		}
+	}
+
+	MaxSessions = spdk_conf_section_get_intval(sp, "MaxSessions");
+	if (MaxSessions >= 0) {
+		opts->MaxSessions = MaxSessions;
+	}
+
+	MaxConnectionsPerSession = spdk_conf_section_get_intval(sp, "MaxConnectionsPerSession");
+	if (MaxConnectionsPerSession >= 0) {
+		opts->MaxConnectionsPerSession = MaxConnectionsPerSession;
+	}
+
+	MaxQueueDepth = spdk_conf_section_get_intval(sp, "MaxQueueDepth");
+	if (MaxQueueDepth >= 0) {
+		opts->MaxQueueDepth = MaxQueueDepth;
+	}
+
+	DefaultTime2Wait = spdk_conf_section_get_intval(sp, "DefaultTime2Wait");
+	if (DefaultTime2Wait >= 0) {
+		opts->DefaultTime2Wait = DefaultTime2Wait;
+	}
+
+	DefaultTime2Retain = spdk_conf_section_get_intval(sp, "DefaultTime2Retain");
+	if (DefaultTime2Retain >= 0) {
+		opts->DefaultTime2Retain = DefaultTime2Retain;
+	}
+
+	FirstBurstLength = spdk_conf_section_get_intval(sp, "FirstBurstLength");
+	if (FirstBurstLength >= 0) {
+		opts->FirstBurstLength = FirstBurstLength;
+	}
+
+	opts->ImmediateData = spdk_conf_section_get_boolval(sp, "ImmediateData",
+			      opts->ImmediateData);
+
+	/* This option is only for test.
+	 * If AllowDuplicateIsid is enabled, it allows different connections carrying
+	 * TSIH=0 login the target within the same session.
+	 */
+	opts->AllowDuplicateIsid = spdk_conf_section_get_boolval(sp, "AllowDuplicateIsid",
+				   opts->AllowDuplicateIsid);
+
+	ErrorRecoveryLevel = spdk_conf_section_get_intval(sp, "ErrorRecoveryLevel");
+	if (ErrorRecoveryLevel >= 0) {
+		opts->ErrorRecoveryLevel = ErrorRecoveryLevel;
+	}
+	timeout = spdk_conf_section_get_intval(sp, "Timeout");
+	if (timeout >= 0) {
+		opts->timeout = timeout;
+	}
+	nopininterval = spdk_conf_section_get_intval(sp, "NopInInterval");
+	if (nopininterval >= 0) {
+		opts->nopininterval = nopininterval;
+	}
+	val = spdk_conf_section_get_val(sp, "DiscoveryAuthMethod");
+	if (val != NULL) {
+		for (i = 0; ; i++) {
+			val = spdk_conf_section_get_nmval(sp, "DiscoveryAuthMethod", 0, i);
+			if (val == NULL) {
+				break;
+			}
+			if (strcasecmp(val, "CHAP") == 0) {
+				opts->require_chap = true;
+			} else if (strcasecmp(val, "Mutual") == 0) {
+				opts->require_chap = true;
+				opts->mutual_chap = true;
+			} else if (strcasecmp(val, "Auto") == 0) {
+				opts->disable_chap = false;
+				opts->require_chap = false;
+				opts->mutual_chap = false;
+			} else if (strcasecmp(val, "None") == 0) {
+				opts->disable_chap = true;
+				opts->require_chap = false;
+				opts->mutual_chap = false;
+			} else {
+				SPDK_ERRLOG("unknown CHAP mode %s\n", val);
+			}
+		}
+		if (opts->mutual_chap && !opts->require_chap) {
+			SPDK_ERRLOG("CHAP must set to be required when using mutual CHAP.\n");
+			return -EINVAL;
+		}
+	}
+	val = spdk_conf_section_get_val(sp, "DiscoveryAuthGroup");
+	if (val != NULL) {
+		ag_tag = val;
+		if (strcasecmp(ag_tag, "None") == 0) {
+			opts->chap_group = 0;
+		} else {
+			if (strncasecmp(ag_tag, "AuthGroup",
+					strlen("AuthGroup")) != 0
+			    || sscanf(ag_tag, "%*[^0-9]%d", &ag_tag_i) != 1
+			    || ag_tag_i == 0) {
+				SPDK_ERRLOG("invalid auth group %s, ignoring\n", ag_tag);
+			} else {
+				opts->chap_group = ag_tag_i;
+			}
+		}
+	}
+	min_conn_per_core = spdk_conf_section_get_intval(sp, "MinConnectionsPerCore");
+	if (min_conn_per_core >= 0) {
+		opts->min_connections_per_core = min_conn_per_core;
+	}
+
+	return 0;
+}
+
+static int
+spdk_iscsi_opts_verify(struct spdk_iscsi_opts *opts)
+{
+	if (!opts->nodebase) {
+		opts->nodebase = strdup(SPDK_ISCSI_DEFAULT_NODEBASE);
+		if (opts->nodebase == NULL) {
+			SPDK_ERRLOG("strdup() failed for default nodebase\n");
+			return -ENOMEM;
+		}
+	}
+
+	if (opts->MaxSessions == 0 || opts->MaxSessions > 65535) {
+		SPDK_ERRLOG("%d is invalid. MaxSessions must be more than 0 and no more than 65535\n",
+			    opts->MaxSessions);
+		return -EINVAL;
+	}
+
+	if (opts->MaxConnectionsPerSession == 0 || opts->MaxConnectionsPerSession > 65535) {
+		SPDK_ERRLOG("%d is invalid. MaxConnectionsPerSession must be more than 0 and no more than 65535\n",
+			    opts->MaxConnectionsPerSession);
+		return -EINVAL;
+	}
+
+	if (opts->MaxQueueDepth == 0 || opts->MaxQueueDepth > 256) {
+		SPDK_ERRLOG("%d is invalid. MaxQueueDepth must be more than 0 and no more than 256\n",
+			    opts->MaxQueueDepth);
+		return -EINVAL;
+	}
+
+	if (opts->DefaultTime2Wait > 3600) {
+		SPDK_ERRLOG("%d is invalid. DefaultTime2Wait must be no more than 3600\n",
+			    opts->DefaultTime2Wait);
+		return -EINVAL;
+	}
+
+	if (opts->DefaultTime2Retain > 3600) {
+		SPDK_ERRLOG("%d is invalid. DefaultTime2Retain must be no more than 3600\n",
+			    opts->DefaultTime2Retain);
+		return -EINVAL;
+	}
+
+	if (opts->FirstBurstLength >= SPDK_ISCSI_MIN_FIRST_BURST_LENGTH) {
+		if (opts->FirstBurstLength > SPDK_ISCSI_MAX_BURST_LENGTH) {
+			SPDK_ERRLOG("FirstBurstLength %d shall not exceed MaxBurstLength %d\n",
+				    opts->FirstBurstLength, SPDK_ISCSI_MAX_BURST_LENGTH);
+			return -EINVAL;
+		}
+	} else {
+		SPDK_ERRLOG("FirstBurstLength %d shall be no less than %d\n",
+			    opts->FirstBurstLength, SPDK_ISCSI_MIN_FIRST_BURST_LENGTH);
+		return -EINVAL;
+	}
+
+	if (opts->ErrorRecoveryLevel > 2) {
+		SPDK_ERRLOG("ErrorRecoveryLevel %d is not supported.\n", opts->ErrorRecoveryLevel);
+		return -EINVAL;
+	}
+
+	if (opts->timeout < 0) {
+		SPDK_ERRLOG("%d is invalid. timeout must not be less than 0\n", opts->timeout);
+		return -EINVAL;
+	}
+
+	if (opts->nopininterval < 0 || opts->nopininterval > MAX_NOPININTERVAL) {
+		SPDK_ERRLOG("%d is invalid. nopinterval must be between 0 and %d\n",
+			    opts->nopininterval, MAX_NOPININTERVAL);
+		return -EINVAL;
+	}
+
+	if (!spdk_iscsi_check_chap_params(opts->disable_chap, opts->require_chap,
+					  opts->mutual_chap, opts->chap_group)) {
+		SPDK_ERRLOG("CHAP params in opts are illegal combination\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+spdk_iscsi_parse_options(struct spdk_iscsi_opts **popts)
+{
+	struct spdk_iscsi_opts *opts;
+	struct spdk_conf_section *sp;
+	int rc;
+
+	opts = spdk_iscsi_opts_alloc();
+	if (!opts) {
+		SPDK_ERRLOG("spdk_iscsi_opts_alloc_failed() failed\n");
+		return -ENOMEM;
+	}
+
+	/* Process parameters */
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_read_config_file_parmas\n");
+	sp = spdk_conf_find_section(NULL, "iSCSI");
+	if (sp != NULL) {
+		rc = spdk_iscsi_read_config_file_params(sp, opts);
+		if (rc != 0) {
+			free(opts);
+			SPDK_ERRLOG("spdk_iscsi_read_config_file_params() failed\n");
+			return rc;
+		}
+	}
+
+	*popts = opts;
+
+	return 0;
+}
+
+static int
+spdk_iscsi_set_global_params(struct spdk_iscsi_opts *opts)
+{
+	int rc;
+
+	rc = spdk_iscsi_opts_verify(opts);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_iscsi_opts_verify() failed\n");
+		return rc;
+	}
+
+	if (opts->authfile != NULL) {
+		g_spdk_iscsi.authfile = strdup(opts->authfile);
+		if (!g_spdk_iscsi.authfile) {
+			SPDK_ERRLOG("failed to strdup for auth file %s\n", opts->authfile);
+			return -ENOMEM;
+		}
+	}
+
+	g_spdk_iscsi.nodebase = strdup(opts->nodebase);
+	if (!g_spdk_iscsi.nodebase) {
+		SPDK_ERRLOG("failed to strdup for nodebase %s\n", opts->nodebase);
+		return -ENOMEM;
+	}
+
+	g_spdk_iscsi.MaxSessions = opts->MaxSessions;
+	g_spdk_iscsi.MaxConnectionsPerSession = opts->MaxConnectionsPerSession;
+	g_spdk_iscsi.MaxQueueDepth = opts->MaxQueueDepth;
+	g_spdk_iscsi.DefaultTime2Wait = opts->DefaultTime2Wait;
+	g_spdk_iscsi.DefaultTime2Retain = opts->DefaultTime2Retain;
+	g_spdk_iscsi.FirstBurstLength = opts->FirstBurstLength;
+	g_spdk_iscsi.ImmediateData = opts->ImmediateData;
+	g_spdk_iscsi.AllowDuplicateIsid = opts->AllowDuplicateIsid;
+	g_spdk_iscsi.ErrorRecoveryLevel = opts->ErrorRecoveryLevel;
+	g_spdk_iscsi.timeout = opts->timeout;
+	g_spdk_iscsi.nopininterval = opts->nopininterval;
+	g_spdk_iscsi.disable_chap = opts->disable_chap;
+	g_spdk_iscsi.require_chap = opts->require_chap;
+	g_spdk_iscsi.mutual_chap = opts->mutual_chap;
+	g_spdk_iscsi.chap_group = opts->chap_group;
+
+	spdk_iscsi_conn_set_min_per_core(opts->min_connections_per_core);
+
+	spdk_iscsi_log_globals();
+
+	return 0;
+}
+
+int
+spdk_iscsi_set_discovery_auth(bool disable_chap, bool require_chap, bool mutual_chap,
+			      int32_t chap_group)
+{
+	if (!spdk_iscsi_check_chap_params(disable_chap, require_chap, mutual_chap,
+					  chap_group)) {
+		SPDK_ERRLOG("CHAP params are illegal combination\n");
+		return -EINVAL;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	g_spdk_iscsi.disable_chap = disable_chap;
+	g_spdk_iscsi.require_chap = require_chap;
+	g_spdk_iscsi.mutual_chap = mutual_chap;
+	g_spdk_iscsi.chap_group = chap_group;
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	return 0;
+}
+
+int
+spdk_iscsi_auth_group_add_secret(struct spdk_iscsi_auth_group *group,
+				 const char *user, const char *secret,
+				 const char *muser, const char *msecret)
+{
+	struct spdk_iscsi_auth_secret *_secret;
+	size_t len;
+
+	if (user == NULL || secret == NULL) {
+		SPDK_ERRLOG("user and secret must be specified\n");
+		return -EINVAL;
+	}
+
+	if (muser != NULL && msecret == NULL) {
+		SPDK_ERRLOG("msecret must be specified with muser\n");
+		return -EINVAL;
+	}
+
+	TAILQ_FOREACH(_secret, &group->secret_head, tailq) {
+		if (strcmp(_secret->user, user) == 0) {
+			SPDK_ERRLOG("user for secret is duplicated\n");
+			return -EEXIST;
+		}
+	}
+
+	_secret = calloc(1, sizeof(*_secret));
+	if (_secret == NULL) {
+		SPDK_ERRLOG("calloc() failed for CHAP secret\n");
+		return -ENOMEM;
+	}
+
+	len = strnlen(user, sizeof(_secret->user));
+	if (len > sizeof(_secret->user) - 1) {
+		SPDK_ERRLOG("CHAP user longer than %zu characters: %s\n",
+			    sizeof(_secret->user) - 1, user);
+		free(_secret);
+		return -EINVAL;
+	}
+	memcpy(_secret->user, user, len);
+
+	len = strnlen(secret, sizeof(_secret->secret));
+	if (len > sizeof(_secret->secret) - 1) {
+		SPDK_ERRLOG("CHAP secret longer than %zu characters: %s\n",
+			    sizeof(_secret->secret) - 1, secret);
+		free(_secret);
+		return -EINVAL;
+	}
+	memcpy(_secret->secret, secret, len);
+
+	if (muser != NULL) {
+		len = strnlen(muser, sizeof(_secret->muser));
+		if (len > sizeof(_secret->muser) - 1) {
+			SPDK_ERRLOG("Mutual CHAP user longer than %zu characters: %s\n",
+				    sizeof(_secret->muser) - 1, muser);
+			free(_secret);
+			return -EINVAL;
+		}
+		memcpy(_secret->muser, muser, len);
+
+		len = strnlen(msecret, sizeof(_secret->msecret));
+		if (len > sizeof(_secret->msecret) - 1) {
+			SPDK_ERRLOG("Mutual CHAP secret longer than %zu characters: %s\n",
+				    sizeof(_secret->msecret) - 1, msecret);
+			free(_secret);
+			return -EINVAL;
+		}
+		memcpy(_secret->msecret, msecret, len);
+	}
+
+	TAILQ_INSERT_TAIL(&group->secret_head, _secret, tailq);
+	return 0;
+}
+
+int
+spdk_iscsi_auth_group_delete_secret(struct spdk_iscsi_auth_group *group,
+				    const char *user)
+{
+	struct spdk_iscsi_auth_secret *_secret;
+
+	if (user == NULL) {
+		SPDK_ERRLOG("user must be specified\n");
+		return -EINVAL;
+	}
+
+	TAILQ_FOREACH(_secret, &group->secret_head, tailq) {
+		if (strcmp(_secret->user, user) == 0) {
+			break;
+		}
+	}
+
+	if (_secret == NULL) {
+		SPDK_ERRLOG("secret is not found\n");
+		return -ENODEV;
+	}
+
+	TAILQ_REMOVE(&group->secret_head, _secret, tailq);
+	free(_secret);
+
+	return 0;
+}
+
+int
+spdk_iscsi_add_auth_group(int32_t tag, struct spdk_iscsi_auth_group **_group)
+{
+	struct spdk_iscsi_auth_group *group;
+
+	TAILQ_FOREACH(group, &g_spdk_iscsi.auth_group_head, tailq) {
+		if (group->tag == tag) {
+			SPDK_ERRLOG("Auth group (%d) already exists\n", tag);
+			return -EEXIST;
+		}
+	}
+
+	group = calloc(1, sizeof(*group));
+	if (group == NULL) {
+		SPDK_ERRLOG("calloc() failed for auth group\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INIT(&group->secret_head);
+	group->tag = tag;
+
+	TAILQ_INSERT_TAIL(&g_spdk_iscsi.auth_group_head, group, tailq);
+
+	*_group = group;
+	return 0;
+}
+
+void
+spdk_iscsi_delete_auth_group(struct spdk_iscsi_auth_group *group)
+{
+	struct spdk_iscsi_auth_secret *_secret, *tmp;
+
+	TAILQ_REMOVE(&g_spdk_iscsi.auth_group_head, group, tailq);
+
+	TAILQ_FOREACH_SAFE(_secret, &group->secret_head, tailq, tmp) {
+		TAILQ_REMOVE(&group->secret_head, _secret, tailq);
+		free(_secret);
+	}
+	free(group);
+}
+
+struct spdk_iscsi_auth_group *
+spdk_iscsi_find_auth_group_by_tag(int32_t tag)
+{
+	struct spdk_iscsi_auth_group *group;
+
+	TAILQ_FOREACH(group, &g_spdk_iscsi.auth_group_head, tailq) {
+		if (group->tag == tag) {
+			return group;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+spdk_iscsi_auth_groups_destroy(void)
+{
+	struct spdk_iscsi_auth_group *group, *tmp;
+
+	TAILQ_FOREACH_SAFE(group, &g_spdk_iscsi.auth_group_head, tailq, tmp) {
+		spdk_iscsi_delete_auth_group(group);
+	}
+}
+
+static int
+spdk_iscsi_parse_auth_group(struct spdk_conf_section *sp)
+{
+	int rc;
+	int i;
+	int tag;
+	const char *val, *user, *secret, *muser, *msecret;
+	struct spdk_iscsi_auth_group *group = NULL;
+
+	val = spdk_conf_section_get_val(sp, "Comment");
+	if (val != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Comment %s\n", val);
+	}
+
+	tag = spdk_conf_section_get_num(sp);
+
+	rc = spdk_iscsi_add_auth_group(tag, &group);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to add auth group\n");
+		return rc;
+	}
+
+	for (i = 0; ; i++) {
+		val = spdk_conf_section_get_nval(sp, "Auth", i);
+		if (val == NULL) {
+			break;
+		}
+
+		user = spdk_conf_section_get_nmval(sp, "Auth", i, 0);
+		secret = spdk_conf_section_get_nmval(sp, "Auth", i, 1);
+		muser = spdk_conf_section_get_nmval(sp, "Auth", i, 2);
+		msecret = spdk_conf_section_get_nmval(sp, "Auth", i, 3);
+
+		rc = spdk_iscsi_auth_group_add_secret(group, user, secret, muser, msecret);
+		if (rc != 0) {
+			SPDK_ERRLOG("Failed to add secret to auth group\n");
+			spdk_iscsi_delete_auth_group(group);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int
+spdk_iscsi_parse_auth_info(void)
+{
+	struct spdk_conf *config;
+	struct spdk_conf_section *sp;
+	int rc;
+
+	config = spdk_conf_allocate();
+	if (!config) {
+		SPDK_ERRLOG("Failed to allocate config file\n");
+		return -ENOMEM;
+	}
+
+	rc = spdk_conf_read(config, g_spdk_iscsi.authfile);
+	if (rc != 0) {
+		SPDK_INFOLOG(SPDK_LOG_ISCSI, "Failed to load auth file\n");
+		spdk_conf_free(config);
+		return rc;
+	}
+
+	sp = spdk_conf_first_section(config);
+	while (sp != NULL) {
+		if (spdk_conf_section_match_prefix(sp, "AuthGroup")) {
+			if (spdk_conf_section_get_num(sp) == 0) {
+				SPDK_ERRLOG("Group 0 is invalid\n");
+				spdk_iscsi_auth_groups_destroy();
+				spdk_conf_free(config);
+				return -EINVAL;
+			}
+
+			rc = spdk_iscsi_parse_auth_group(sp);
+			if (rc != 0) {
+				SPDK_ERRLOG("parse_auth_group() failed\n");
+				spdk_iscsi_auth_groups_destroy();
+				spdk_conf_free(config);
+				return rc;
+			}
+		}
+		sp = spdk_conf_next_section(sp);
+	}
+
+	spdk_conf_free(config);
+	return 0;
+}
+
+static struct spdk_iscsi_auth_secret *
+spdk_iscsi_find_auth_secret(const char *authuser, int ag_tag)
+{
+	struct spdk_iscsi_auth_group *group;
+	struct spdk_iscsi_auth_secret *_secret;
+
+	TAILQ_FOREACH(group, &g_spdk_iscsi.auth_group_head, tailq) {
+		if (group->tag == ag_tag) {
+			TAILQ_FOREACH(_secret, &group->secret_head, tailq) {
+				if (strcmp(_secret->user, authuser) == 0) {
+					return _secret;
+				}
+			}
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_iscsi_chap_get_authinfo(struct iscsi_chap_auth *auth, const char *authuser,
+			     int ag_tag)
+{
+	struct spdk_iscsi_auth_secret *_secret;
+
+	if (authuser == NULL) {
+		return -EINVAL;
+	}
+
+	if (auth->user[0] != '\0') {
+		memset(auth->user, 0, sizeof(auth->user));
+		memset(auth->secret, 0, sizeof(auth->secret));
+		memset(auth->muser, 0, sizeof(auth->muser));
+		memset(auth->msecret, 0, sizeof(auth->msecret));
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	_secret = spdk_iscsi_find_auth_secret(authuser, ag_tag);
+	if (_secret == NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		SPDK_ERRLOG("CHAP secret is not found: user:%s, tag:%d\n",
+			    authuser, ag_tag);
+		return -ENOENT;
+	}
+
+	memcpy(auth->user, _secret->user, sizeof(auth->user));
+	memcpy(auth->secret, _secret->secret, sizeof(auth->secret));
+
+	if (_secret->muser[0] != '\0') {
+		memcpy(auth->muser, _secret->muser, sizeof(auth->muser));
+		memcpy(auth->msecret, _secret->msecret, sizeof(auth->msecret));
+	}
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return 0;
+}
+
+static int
+spdk_iscsi_initialize_global_params(void)
+{
+	int rc;
+
+	if (!g_spdk_iscsi_opts) {
+		rc = spdk_iscsi_parse_options(&g_spdk_iscsi_opts);
+		if (rc != 0) {
+			SPDK_ERRLOG("spdk_iscsi_parse_options() failed\n");
+			return rc;
+		}
+	}
+
+	rc = spdk_iscsi_set_global_params(g_spdk_iscsi_opts);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_iscsi_set_global_params() failed\n");
+	}
+
+	spdk_iscsi_opts_free(g_spdk_iscsi_opts);
+	g_spdk_iscsi_opts = NULL;
+
+	return rc;
+}
+
+static void
+spdk_iscsi_init_complete(int rc)
+{
+	spdk_iscsi_init_cb cb_fn = g_init_cb_fn;
+	void *cb_arg = g_init_cb_arg;
+
+	g_init_cb_fn = NULL;
+	g_init_cb_arg = NULL;
+
+	cb_fn(cb_arg, rc);
+}
+
+static int
+spdk_iscsi_poll_group_poll(void *ctx)
+{
+	struct spdk_iscsi_poll_group *group = ctx;
+	struct spdk_iscsi_conn *conn, *tmp;
+	int rc;
+
+	if (spdk_unlikely(STAILQ_EMPTY(&group->connections))) {
+		return 0;
+	}
+
+	rc = spdk_sock_group_poll(group->sock_group);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to poll sock_group=%p\n", group->sock_group);
+	}
+
+	STAILQ_FOREACH_SAFE(conn, &group->connections, link, tmp) {
+		if (conn->state == ISCSI_CONN_STATE_EXITING) {
+			spdk_iscsi_conn_destruct(conn);
+		}
+	}
+
+	return -1;
+}
+
+static int
+spdk_iscsi_poll_group_handle_nop(void *ctx)
+{
+	struct spdk_iscsi_poll_group *group = ctx;
+	struct spdk_iscsi_conn *conn, *tmp;
+
+	STAILQ_FOREACH_SAFE(conn, &group->connections, link, tmp) {
+		spdk_iscsi_conn_handle_nop(conn);
+	}
+
+	return -1;
+}
+
+static void
+iscsi_create_poll_group(void *ctx)
+{
+	struct spdk_iscsi_poll_group *pg;
+
+	assert(g_spdk_iscsi.poll_group != NULL);
+	pg = &g_spdk_iscsi.poll_group[spdk_env_get_current_core()];
+	pg->core = spdk_env_get_current_core();
+
+	STAILQ_INIT(&pg->connections);
+	pg->sock_group = spdk_sock_group_create();
+	assert(pg->sock_group != NULL);
+
+	pg->poller = spdk_poller_register(spdk_iscsi_poll_group_poll, pg, 0);
+	/* set the period to 1 sec */
+	pg->nop_poller = spdk_poller_register(spdk_iscsi_poll_group_handle_nop, pg, 1000000);
+}
+
+static void
+iscsi_unregister_poll_group(void *ctx)
+{
+	struct spdk_iscsi_poll_group *pg;
+
+	assert(g_spdk_iscsi.poll_group != NULL);
+	pg = &g_spdk_iscsi.poll_group[spdk_env_get_current_core()];
+	assert(pg->poller != NULL);
+	assert(pg->sock_group != NULL);
+
+	spdk_sock_group_close(&pg->sock_group);
+	spdk_poller_unregister(&pg->poller);
+	spdk_poller_unregister(&pg->nop_poller);
+}
+
+static void
+spdk_initialize_iscsi_poll_group(spdk_thread_fn cpl)
+{
+	size_t g_num_poll_groups = spdk_env_get_last_core() + 1;
+
+	g_spdk_iscsi.poll_group = calloc(g_num_poll_groups, sizeof(struct spdk_iscsi_poll_group));
+	if (!g_spdk_iscsi.poll_group) {
+		SPDK_ERRLOG("Failed to allocated iscsi poll group\n");
+		spdk_iscsi_init_complete(-1);
+		return;
+	}
+
+	/* Send a message to each thread and create a poll group */
+	spdk_for_each_thread(iscsi_create_poll_group, NULL, cpl);
+}
+
+static void
+spdk_iscsi_parse_configuration(void *ctx)
+{
+	int rc;
+
+	rc = spdk_iscsi_parse_portal_grps();
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_iscsi_parse_portal_grps() failed\n");
+		goto end;
+	}
+
+	rc = spdk_iscsi_parse_init_grps();
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_iscsi_parse_init_grps() failed\n");
+		goto end;
+	}
+
+	rc = spdk_iscsi_parse_tgt_nodes();
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_iscsi_parse_tgt_nodes() failed\n");
+	}
+
+	if (g_spdk_iscsi.authfile != NULL) {
+		if (access(g_spdk_iscsi.authfile, R_OK) == 0) {
+			rc = spdk_iscsi_parse_auth_info();
+			if (rc < 0) {
+				SPDK_ERRLOG("spdk_iscsi_parse_auth_info() failed\n");
+			}
+		} else {
+			SPDK_INFOLOG(SPDK_LOG_ISCSI, "CHAP secret file is not found in the path %s\n",
+				     g_spdk_iscsi.authfile);
+		}
+	}
+
+end:
+	spdk_iscsi_init_complete(rc);
+}
+
+static int
+spdk_iscsi_parse_globals(void)
+{
+	int rc;
+
+	rc = spdk_iscsi_initialize_global_params();
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_iscsi_initialize_iscsi_global_params() failed\n");
+		return rc;
+	}
+
+	g_spdk_iscsi.session = spdk_dma_zmalloc(sizeof(void *) * g_spdk_iscsi.MaxSessions, 0, NULL);
+	if (!g_spdk_iscsi.session) {
+		SPDK_ERRLOG("spdk_dma_zmalloc() failed for session array\n");
+		return -1;
+	}
+
+	/*
+	 * For now, just support same number of total connections, rather
+	 *  than MaxSessions * MaxConnectionsPerSession.  After we add better
+	 *  handling for low resource conditions from our various buffer
+	 *  pools, we can bump this up to support more connections.
+	 */
+	g_spdk_iscsi.MaxConnections = g_spdk_iscsi.MaxSessions;
+
+	rc = spdk_iscsi_initialize_all_pools();
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_initialize_all_pools() failed\n");
+		return -1;
+	}
+
+	rc = spdk_initialize_iscsi_conns();
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_initialize_iscsi_conns() failed\n");
+		return rc;
+	}
+
+	spdk_initialize_iscsi_poll_group(spdk_iscsi_parse_configuration);
+	return 0;
+}
+
+void
+spdk_iscsi_init(spdk_iscsi_init_cb cb_fn, void *cb_arg)
+{
+	int rc;
+
+	assert(cb_fn != NULL);
+	g_init_cb_fn = cb_fn;
+	g_init_cb_arg = cb_arg;
+
+	rc = spdk_iscsi_parse_globals();
+	if (rc < 0) {
+		SPDK_ERRLOG("spdk_iscsi_parse_globals() failed\n");
+		spdk_iscsi_init_complete(-1);
+	}
+
+	/*
+	 * spdk_iscsi_parse_configuration() will be called as the callback to
+	 * spdk_initialize_iscsi_poll_group() and will complete iSCSI
+	 * subsystem initialization.
+	 */
+}
+
+void
+spdk_iscsi_fini(spdk_iscsi_fini_cb cb_fn, void *cb_arg)
+{
+	g_fini_cb_fn = cb_fn;
+	g_fini_cb_arg = cb_arg;
+
+	spdk_iscsi_portal_grp_close_all();
+	spdk_shutdown_iscsi_conns();
+}
+
+static void
+spdk_iscsi_fini_done(void *arg)
+{
+	spdk_iscsi_check_pools();
+	spdk_iscsi_free_pools();
+
+	spdk_iscsi_shutdown_tgt_nodes();
+	spdk_iscsi_init_grps_destroy();
+	spdk_iscsi_portal_grps_destroy();
+	spdk_iscsi_auth_groups_destroy();
+	free(g_spdk_iscsi.authfile);
+	free(g_spdk_iscsi.nodebase);
+	free(g_spdk_iscsi.poll_group);
+
+	pthread_mutex_destroy(&g_spdk_iscsi.mutex);
+	g_fini_cb_fn(g_fini_cb_arg);
+}
+
+void
+spdk_shutdown_iscsi_conns_done(void)
+{
+	if (g_spdk_iscsi.poll_group) {
+		spdk_for_each_thread(iscsi_unregister_poll_group, NULL, spdk_iscsi_fini_done);
+	} else {
+		spdk_iscsi_fini_done(NULL);
+	}
+}
+
+void
+spdk_iscsi_config_text(FILE *fp)
+{
+	spdk_iscsi_globals_config_text(fp);
+	spdk_iscsi_portal_grps_config_text(fp);
+	spdk_iscsi_init_grps_config_text(fp);
+	spdk_iscsi_tgt_nodes_config_text(fp);
+}
+
+void
+spdk_iscsi_opts_info_json(struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	if (g_spdk_iscsi.authfile != NULL) {
+		spdk_json_write_named_string(w, "auth_file", g_spdk_iscsi.authfile);
+	}
+	spdk_json_write_named_string(w, "node_base", g_spdk_iscsi.nodebase);
+
+	spdk_json_write_named_uint32(w, "max_sessions", g_spdk_iscsi.MaxSessions);
+	spdk_json_write_named_uint32(w, "max_connections_per_session",
+				     g_spdk_iscsi.MaxConnectionsPerSession);
+
+	spdk_json_write_named_uint32(w, "max_queue_depth", g_spdk_iscsi.MaxQueueDepth);
+
+	spdk_json_write_named_uint32(w, "default_time2wait", g_spdk_iscsi.DefaultTime2Wait);
+	spdk_json_write_named_uint32(w, "default_time2retain", g_spdk_iscsi.DefaultTime2Retain);
+
+	spdk_json_write_named_uint32(w, "first_burst_length", g_spdk_iscsi.FirstBurstLength);
+
+	spdk_json_write_named_bool(w, "immediate_data", g_spdk_iscsi.ImmediateData);
+
+	spdk_json_write_named_bool(w, "allow_duplicated_isid", g_spdk_iscsi.AllowDuplicateIsid);
+
+	spdk_json_write_named_uint32(w, "error_recovery_level", g_spdk_iscsi.ErrorRecoveryLevel);
+
+	spdk_json_write_named_int32(w, "nop_timeout", g_spdk_iscsi.timeout);
+	spdk_json_write_named_int32(w, "nop_in_interval", g_spdk_iscsi.nopininterval);
+
+	spdk_json_write_named_bool(w, "disable_chap", g_spdk_iscsi.disable_chap);
+	spdk_json_write_named_bool(w, "require_chap", g_spdk_iscsi.require_chap);
+	spdk_json_write_named_bool(w, "mutual_chap", g_spdk_iscsi.mutual_chap);
+	spdk_json_write_named_int32(w, "chap_group", g_spdk_iscsi.chap_group);
+
+	spdk_json_write_named_uint32(w, "min_connections_per_core",
+				     spdk_iscsi_conn_get_min_per_core());
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_iscsi_auth_group_info_json(struct spdk_iscsi_auth_group *group,
+				struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_auth_secret *_secret;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_int32(w, "tag", group->tag);
+
+	spdk_json_write_named_array_begin(w, "secrets");
+	TAILQ_FOREACH(_secret, &group->secret_head, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "user", _secret->user);
+		spdk_json_write_named_string(w, "secret", _secret->secret);
+
+		if (_secret->muser[0] != '\0') {
+			spdk_json_write_named_string(w, "muser", _secret->muser);
+			spdk_json_write_named_string(w, "msecret", _secret->msecret);
+		}
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_iscsi_auth_group_config_json(struct spdk_iscsi_auth_group *group,
+				  struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "add_iscsi_auth_group");
+
+	spdk_json_write_name(w, "params");
+	spdk_iscsi_auth_group_info_json(group, w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_iscsi_auth_groups_info_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_auth_group *group;
+
+	TAILQ_FOREACH(group, &g_spdk_iscsi.auth_group_head, tailq) {
+		spdk_iscsi_auth_group_info_json(group, w);
+	}
+}
+
+static void
+spdk_iscsi_auth_groups_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_auth_group *group;
+
+	TAILQ_FOREACH(group, &g_spdk_iscsi.auth_group_head, tailq) {
+		spdk_iscsi_auth_group_config_json(group, w);
+	}
+}
+
+static void
+spdk_iscsi_opts_config_json(struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "set_iscsi_options");
+
+	spdk_json_write_name(w, "params");
+	spdk_iscsi_opts_info_json(w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_iscsi_config_json(struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_array_begin(w);
+	spdk_iscsi_opts_config_json(w);
+	spdk_iscsi_portal_grps_config_json(w);
+	spdk_iscsi_init_grps_config_json(w);
+	spdk_iscsi_tgt_nodes_config_json(w);
+	spdk_iscsi_auth_groups_config_json(w);
+	spdk_json_write_array_end(w);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("iscsi", SPDK_LOG_ISCSI)
diff --git a/src/spdk/lib/iscsi/md5.c b/src/spdk/lib/iscsi/md5.c
new file mode 100644
index 00000000..2b3291e4
--- /dev/null
+++ b/src/spdk/lib/iscsi/md5.c
@@ -0,0 +1,75 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <openssl/md5.h>
+
+#include "iscsi/md5.h"
+
+int spdk_md5init(struct spdk_md5ctx *md5ctx)
+{
+	int rc;
+
+	if (md5ctx == NULL) {
+		return -1;
+	}
+	rc = MD5_Init(&md5ctx->md5ctx);
+	return rc;
+}
+
+int spdk_md5final(void *md5, struct spdk_md5ctx *md5ctx)
+{
+	int rc;
+
+	if (md5ctx == NULL || md5 == NULL) {
+		return -1;
+	}
+	rc = MD5_Final(md5, &md5ctx->md5ctx);
+	return rc;
+}
+
+int spdk_md5update(struct spdk_md5ctx *md5ctx, const void *data, size_t len)
+{
+	int rc;
+
+	if (md5ctx == NULL) {
+		return -1;
+	}
+	if (data == NULL || len == 0) {
+		return 0;
+	}
+	rc = MD5_Update(&md5ctx->md5ctx, data, len);
+	return rc;
+}
diff --git a/src/spdk/lib/iscsi/md5.h b/src/spdk/lib/iscsi/md5.h
new file mode 100644
index 00000000..ff571b4a
--- /dev/null
+++ b/src/spdk/lib/iscsi/md5.h
@@ -0,0 +1,52 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_MD5_H
+#define SPDK_MD5_H
+
+#include "spdk/stdinc.h"
+
+#include <openssl/md5.h>
+
+#define SPDK_MD5DIGEST_LEN MD5_DIGEST_LENGTH
+
+struct spdk_md5ctx {
+	MD5_CTX md5ctx;
+};
+
+int spdk_md5init(struct spdk_md5ctx *md5ctx);
+int spdk_md5final(void *md5, struct spdk_md5ctx *md5ctx);
+int spdk_md5update(struct spdk_md5ctx *md5ctx, const void *data, size_t len);
+
+#endif /* SPDK_MD5_H */
diff --git a/src/spdk/lib/iscsi/param.c b/src/spdk/lib/iscsi/param.c
new file mode 100644
index 00000000..e09bf899
--- /dev/null
+++ b/src/spdk/lib/iscsi/param.c
@@ -0,0 +1,1182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/string.h"
+#include "iscsi/iscsi.h"
+#include "iscsi/param.h"
+#include "iscsi/conn.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#define MAX_TMPBUF 1024
+
+/* whose value may be bigger than 255 */
+static const char *non_simple_value_params[] = {
+	"CHAP_C",
+	"CHAP_R",
+	NULL,
+};
+
+void
+spdk_iscsi_param_free(struct iscsi_param *params)
+{
+	struct iscsi_param *param, *next_param;
+
+	if (params == NULL) {
+		return;
+	}
+	for (param = params; param != NULL; param = next_param) {
+		next_param = param->next;
+		if (param->list) {
+			free(param->list);
+		}
+		free(param->val);
+		free(param->key);
+		free(param);
+	}
+}
+
+static int
+spdk_iscsi_find_key_in_array(const char *key, const char *array[])
+{
+	int i;
+
+	for (i = 0; array[i] != NULL; i++) {
+		if (strcasecmp(key, array[i]) == 0) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+struct iscsi_param *
+spdk_iscsi_param_find(struct iscsi_param *params, const char *key)
+{
+	struct iscsi_param *param;
+
+	if (params == NULL || key == NULL) {
+		return NULL;
+	}
+	for (param = params; param != NULL; param = param->next) {
+		if (param->key != NULL && param->key[0] == key[0]
+		    && strcasecmp(param->key, key) == 0) {
+			return param;
+		}
+	}
+	return NULL;
+}
+
+int
+spdk_iscsi_param_del(struct iscsi_param **params, const char *key)
+{
+	struct iscsi_param *param, *prev_param = NULL;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "del %s\n", key);
+	if (params == NULL || key == NULL) {
+		return 0;
+	}
+	for (param = *params; param != NULL; param = param->next) {
+		if (param->key != NULL && param->key[0] == key[0]
+		    && strcasecmp(param->key, key) == 0) {
+			if (prev_param != NULL) {
+				prev_param->next = param->next;
+			} else {
+				*params = param->next;
+			}
+			param->next = NULL;
+			spdk_iscsi_param_free(param);
+			return 0;
+		}
+		prev_param = param;
+	}
+	return -1;
+}
+
+int
+spdk_iscsi_param_add(struct iscsi_param **params, const char *key,
+		     const char *val, const char *list, int type)
+{
+	struct iscsi_param *param, *last_param;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "add %s=%s, list=[%s], type=%d\n",
+		      key, val, list, type);
+	if (key == NULL) {
+		return -1;
+	}
+
+	param = spdk_iscsi_param_find(*params, key);
+	if (param != NULL) {
+		spdk_iscsi_param_del(params, key);
+	}
+
+	param = calloc(1, sizeof(*param));
+	if (!param) {
+		SPDK_ERRLOG("calloc() failed for parameter\n");
+		return -ENOMEM;
+	}
+
+	param->next = NULL;
+	param->key = xstrdup(key);
+	param->val = xstrdup(val);
+	param->list = xstrdup(list);
+	param->type = type;
+
+	last_param = *params;
+	if (last_param != NULL) {
+		while (last_param->next != NULL) {
+			last_param = last_param->next;
+		}
+		last_param->next = param;
+	} else {
+		*params = param;
+	}
+
+	return 0;
+}
+
+int
+spdk_iscsi_param_set(struct iscsi_param *params, const char *key,
+		     const char *val)
+{
+	struct iscsi_param *param;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set %s=%s\n", key, val);
+	param = spdk_iscsi_param_find(params, key);
+	if (param == NULL) {
+		SPDK_ERRLOG("no key %s\n", key);
+		return -1;
+	}
+
+	free(param->val);
+
+	param->val = xstrdup(val);
+
+	return 0;
+}
+
+int
+spdk_iscsi_param_set_int(struct iscsi_param *params, const char *key, uint32_t val)
+{
+	char buf[MAX_TMPBUF];
+	struct iscsi_param *param;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set %s=%d\n", key, val);
+	param = spdk_iscsi_param_find(params, key);
+	if (param == NULL) {
+		SPDK_ERRLOG("no key %s\n", key);
+		return -1;
+	}
+
+	free(param->val);
+	snprintf(buf, sizeof buf, "%d", val);
+
+	param->val = strdup(buf);
+
+	return 0;
+}
+
+/**
+ * Parse a single KEY=VAL pair
+ *
+ * data = "KEY=VAL<NUL>"
+ */
+static int
+spdk_iscsi_parse_param(struct iscsi_param **params, const uint8_t *data)
+{
+	int rc;
+	uint8_t *key_copy;
+	const uint8_t *key_end, *val;
+	int key_len, val_len;
+	int max_len;
+
+	key_end = strchr(data, '=');
+	if (!key_end) {
+		SPDK_ERRLOG("'=' not found\n");
+		return -1;
+	}
+
+	key_len = key_end - data;
+	if (key_len == 0) {
+		SPDK_ERRLOG("Empty key\n");
+		return -1;
+	}
+	/*
+	 * RFC 7143 6.1
+	 */
+	if (key_len > ISCSI_TEXT_MAX_KEY_LEN) {
+		SPDK_ERRLOG("Key name length is bigger than 63\n");
+		return -1;
+	}
+
+	key_copy = malloc(key_len + 1);
+	if (!key_copy) {
+		SPDK_ERRLOG("malloc() failed for key_copy\n");
+		return -ENOMEM;
+	}
+
+	memcpy(key_copy, data, key_len);
+	key_copy[key_len] = '\0';
+	/* check whether this key is duplicated */
+	if (NULL != spdk_iscsi_param_find(*params, key_copy)) {
+		SPDK_ERRLOG("Duplicated Key %s\n", key_copy);
+		free(key_copy);
+		return -1;
+	}
+
+	val = key_end + 1; /* +1 to skip over the '=' */
+	val_len = strlen(val);
+	/*
+	 * RFC 3720 5.1
+	 * If not otherwise specified, the maximum length of a simple-value
+	 * (not its encoded representation) is 255 bytes, not including the delimiter
+	 * (comma or zero byte).
+	 */
+	/*
+	 * comma or zero is counted in, otherwise we need to iterate each parameter
+	 * value
+	 */
+	max_len = spdk_iscsi_find_key_in_array(key_copy, non_simple_value_params) ?
+		  ISCSI_TEXT_MAX_VAL_LEN : ISCSI_TEXT_MAX_SIMPLE_VAL_LEN;
+	if (val_len > max_len) {
+		SPDK_ERRLOG("Overflow Val %d\n", val_len);
+		free(key_copy);
+		return -1;
+	}
+
+	rc = spdk_iscsi_param_add(params, key_copy, val, NULL, 0);
+	free(key_copy);
+	if (rc < 0) {
+		SPDK_ERRLOG("iscsi_param_add() failed\n");
+		return -1;
+	}
+
+	/* return number of bytes consumed
+	 * +1 for '=' and +1 for NUL
+	 */
+	return key_len + 1 + val_len + 1;
+}
+
+/**
+ * Parse a sequence of KEY=VAL pairs.
+ *
+ * \param data "KEY=VAL<NUL>KEY=VAL<NUL>..."
+ * \param len length of data in bytes
+ */
+int
+spdk_iscsi_parse_params(struct iscsi_param **params, const uint8_t *data,
+			int len, bool cbit_enabled, char **partial_parameter)
+{
+	int rc, offset = 0;
+	char *p;
+	int i;
+
+	/* strip the partial text parameters if previous PDU have C enabled */
+	if (partial_parameter && *partial_parameter) {
+		for (i = 0; i < len && data[i] != '\0'; i++) {
+			;
+		}
+		p = spdk_sprintf_alloc("%s%s", *partial_parameter, (const char *)data);
+		if (!p) {
+			return -1;
+		}
+		rc = spdk_iscsi_parse_param(params, p);
+		free(p);
+		if (rc < 0) {
+			return -1;
+		}
+		free(*partial_parameter);
+		*partial_parameter = NULL;
+
+		data = data + i + 1;
+		len = len - (i + 1);
+	}
+
+	/* strip the partial text parameters if C bit is enabled */
+	if (cbit_enabled) {
+		if (partial_parameter == NULL) {
+			SPDK_ERRLOG("C bit set but no partial parameters provided\n");
+			return -1;
+		}
+
+		/*
+		 * reverse iterate the string from the tail not including '\0'
+		 * index of last '\0' is len -1.
+		 */
+		for (i = len - 2; data[i] != '\0' && i > 0; i--) {
+			;
+		}
+		*partial_parameter = xstrdup(&data[i == 0 ? 0 : i + 1]);
+		len = (i == 0 ? 0 : i + 1);
+	}
+
+	while (offset < len && data[offset] != '\0') {
+		rc = spdk_iscsi_parse_param(params, data + offset);
+		if (rc < 0) {
+			return -1;
+		}
+		offset += rc;
+	}
+	return 0;
+}
+
+char *
+spdk_iscsi_param_get_val(struct iscsi_param *params, const char *key)
+{
+	struct iscsi_param *param;
+
+	param = spdk_iscsi_param_find(params, key);
+	if (param == NULL) {
+		return NULL;
+	}
+	return param->val;
+}
+
+int
+spdk_iscsi_param_eq_val(struct iscsi_param *params, const char *key,
+			const char *val)
+{
+	struct iscsi_param *param;
+
+	param = spdk_iscsi_param_find(params, key);
+	if (param == NULL) {
+		return 0;
+	}
+	if (strcasecmp(param->val, val) == 0) {
+		return 1;
+	}
+	return 0;
+}
+
+struct iscsi_param_table {
+	const char *key;
+	const char *val;
+	const char *list;
+	int type;
+};
+
+static const struct iscsi_param_table conn_param_table[] = {
+	{ "HeaderDigest", "None", "CRC32C,None", ISPT_LIST },
+	{ "DataDigest", "None", "CRC32C,None", ISPT_LIST },
+	{ "MaxRecvDataSegmentLength", "8192", "512,16777215", ISPT_NUMERICAL_DECLARATIVE },
+	{ "OFMarker", "No", "Yes,No", ISPT_BOOLEAN_AND },
+	{ "IFMarker", "No", "Yes,No", ISPT_BOOLEAN_AND },
+	{ "OFMarkInt", "1", "1,65535", ISPT_NUMERICAL_MIN },
+	{ "IFMarkInt", "1", "1,65535", ISPT_NUMERICAL_MIN },
+	{ "AuthMethod", "None", "CHAP,None", ISPT_LIST },
+	{ "CHAP_A", "5", "5", ISPT_LIST },
+	{ "CHAP_N", "", "", ISPT_DECLARATIVE },
+	{ "CHAP_R", "", "", ISPT_DECLARATIVE },
+	{ "CHAP_I", "", "", ISPT_DECLARATIVE },
+	{ "CHAP_C", "", "", ISPT_DECLARATIVE },
+	{ NULL, NULL, NULL, ISPT_INVALID },
+};
+
+static const struct iscsi_param_table sess_param_table[] = {
+	{ "MaxConnections", "1", "1,65535", ISPT_NUMERICAL_MIN },
+#if 0
+	/* need special handling */
+	{ "SendTargets", "", "", ISPT_DECLARATIVE },
+#endif
+	{ "TargetName", "", "", ISPT_DECLARATIVE },
+	{ "InitiatorName", "", "", ISPT_DECLARATIVE },
+	{ "TargetAlias", "", "", ISPT_DECLARATIVE },
+	{ "InitiatorAlias", "", "", ISPT_DECLARATIVE },
+	{ "TargetAddress", "", "", ISPT_DECLARATIVE },
+	{ "TargetPortalGroupTag", "1", "1,65535", ISPT_NUMERICAL_DECLARATIVE },
+	{ "InitialR2T", "Yes", "Yes,No", ISPT_BOOLEAN_OR },
+	{ "ImmediateData", "Yes", "Yes,No", ISPT_BOOLEAN_AND },
+	{ "MaxBurstLength", "262144", "512,16777215", ISPT_NUMERICAL_MIN },
+	{ "FirstBurstLength", "65536", "512,16777215", ISPT_NUMERICAL_MIN },
+	{ "DefaultTime2Wait", "2", "0,3600", ISPT_NUMERICAL_MAX },
+	{ "DefaultTime2Retain", "20", "0,3600", ISPT_NUMERICAL_MIN },
+	{ "MaxOutstandingR2T", "1", "1,65536", ISPT_NUMERICAL_MIN },
+	{ "DataPDUInOrder", "Yes", "Yes,No", ISPT_BOOLEAN_OR },
+	{ "DataSequenceInOrder", "Yes", "Yes,No", ISPT_BOOLEAN_OR },
+	{ "ErrorRecoveryLevel", "0", "0,2", ISPT_NUMERICAL_MIN },
+	{ "SessionType", "Normal", "Normal,Discovery", ISPT_DECLARATIVE },
+	{ NULL, NULL, NULL, ISPT_INVALID },
+};
+
+static int
+spdk_iscsi_params_init_internal(struct iscsi_param **params,
+				const struct iscsi_param_table *table)
+{
+	int rc;
+	int i;
+	struct iscsi_param *param;
+
+	for (i = 0; table[i].key != NULL; i++) {
+		rc = spdk_iscsi_param_add(params, table[i].key, table[i].val,
+					  table[i].list, table[i].type);
+		if (rc < 0) {
+			SPDK_ERRLOG("iscsi_param_add() failed\n");
+			return -1;
+		}
+		param = spdk_iscsi_param_find(*params, table[i].key);
+		if (param != NULL) {
+			param->state_index = i;
+		} else {
+			SPDK_ERRLOG("spdk_iscsi_param_find() failed\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_iscsi_conn_params_init(struct iscsi_param **params)
+{
+	return spdk_iscsi_params_init_internal(params, &conn_param_table[0]);
+}
+
+int
+spdk_iscsi_sess_params_init(struct iscsi_param **params)
+{
+	return spdk_iscsi_params_init_internal(params, &sess_param_table[0]);
+}
+
+static const char *chap_type[] = {
+	"CHAP_A",
+	"CHAP_N",
+	"CHAP_R",
+	"CHAP_I",
+	"CHAP_C",
+	NULL,
+};
+
+static const char *discovery_ignored_param[] = {
+	"MaxConnections",
+	"InitialR2T",
+	"ImmediateData",
+	"MaxBurstLength",
+	"FirstBurstLength"
+	"MaxOutstandingR2T",
+	"DataPDUInOrder",
+	NULL,
+};
+
+static const char *multi_negot_conn_params[] = {
+	"MaxRecvDataSegmentLength",
+	NULL,
+};
+
+/* The following params should be declared by target */
+static const char *target_declarative_params[] = {
+	"TargetAlias",
+	"TargetAddress",
+	"TargetPortalGroupTag",
+	NULL,
+};
+
+/* This function is used to construct the data from the special param (e.g.,
+ * MaxRecvDataSegmentLength)
+ * return:
+ * normal: the total len of the data
+ * error: -1
+ */
+static int
+spdk_iscsi_special_param_construction(struct spdk_iscsi_conn *conn,
+				      struct iscsi_param *param,
+				      bool FirstBurstLength_flag, char *data,
+				      int alloc_len, int total)
+{
+	int len;
+	struct iscsi_param *param_first;
+	struct iscsi_param *param_max;
+	uint32_t FirstBurstLength;
+	uint32_t MaxBurstLength;
+	char *val;
+
+	val = malloc(ISCSI_TEXT_MAX_VAL_LEN + 1);
+	if (!val) {
+		SPDK_ERRLOG("malloc() failed for temporary buffer\n");
+		return -ENOMEM;
+	}
+
+	if (strcasecmp(param->key, "MaxRecvDataSegmentLength") == 0) {
+		/*
+		 * MaxRecvDataSegmentLength is sent by both
+		 *      initiator and target, but is declarative - meaning
+		 *      each direction can have different values.
+		 * So when MaxRecvDataSegmentLength is found in the
+		 *      the parameter set sent from the initiator, add SPDK
+		 *      iscsi target's MaxRecvDataSegmentLength value to
+		 *      the returned parameter list.
+		 */
+		if (alloc_len - total < 1) {
+			SPDK_ERRLOG("data space small %d\n", alloc_len);
+			free(val);
+			return -1;
+		}
+
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "returning MaxRecvDataSegmentLength=%d\n",
+			      SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH);
+		len = snprintf((char *)data + total, alloc_len - total,
+			       "MaxRecvDataSegmentLength=%d",
+			       SPDK_ISCSI_MAX_RECV_DATA_SEGMENT_LENGTH);
+		total += len + 1;
+	}
+
+	if (strcasecmp(param->key, "MaxBurstLength") == 0 &&
+	    !FirstBurstLength_flag) {
+		if (alloc_len - total < 1) {
+			SPDK_ERRLOG("data space small %d\n", alloc_len);
+			free(val);
+			return -1;
+		}
+
+		param_first = spdk_iscsi_param_find(conn->sess->params,
+						    "FirstBurstLength");
+		if (param_first != NULL) {
+			FirstBurstLength = (uint32_t)strtol(param_first->val, NULL, 10);
+		} else {
+			FirstBurstLength = SPDK_ISCSI_FIRST_BURST_LENGTH;
+		}
+		param_max = spdk_iscsi_param_find(conn->sess->params,
+						  "MaxBurstLength");
+		if (param_max != NULL) {
+			MaxBurstLength = (uint32_t)strtol(param_max->val, NULL, 10);
+		} else {
+			MaxBurstLength = SPDK_ISCSI_MAX_BURST_LENGTH;
+		}
+
+		if (FirstBurstLength > MaxBurstLength) {
+			FirstBurstLength = MaxBurstLength;
+			if (param_first != NULL) {
+				free(param_first->val);
+				snprintf(val, ISCSI_TEXT_MAX_VAL_LEN, "%d",
+					 FirstBurstLength);
+				param_first->val = xstrdup(val);
+			}
+		}
+		len = snprintf((char *)data + total, alloc_len - total,
+			       "FirstBurstLength=%d", FirstBurstLength);
+		total += len + 1;
+	}
+
+	free(val);
+	return total;
+
+}
+
+/**
+ * spdk_iscsi_construct_data_from_param:
+ * To construct the data which will be returned to the initiator
+ * return: length of the negotiated data, -1 indicates error;
+ */
+static int
+spdk_iscsi_construct_data_from_param(struct iscsi_param *param, char *new_val,
+				     char *data, int alloc_len, int total)
+{
+	int len;
+
+	if (param->type != ISPT_DECLARATIVE &&
+	    param->type != ISPT_NUMERICAL_DECLARATIVE) {
+		if (alloc_len - total < 1) {
+			SPDK_ERRLOG("data space small %d\n", alloc_len);
+			return -1;
+		}
+
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "negotiated %s=%s\n",
+			      param->key, new_val);
+		len = snprintf((char *)data + total, alloc_len - total, "%s=%s",
+			       param->key, new_val);
+		total += len + 1;
+	}
+	return total;
+}
+
+/**
+ * To negotiate param with
+ * type = ISPT_LIST
+ * return: the negotiated value of the key
+ */
+static char *spdk_iscsi_negotiate_param_list(int *add_param_value,
+		struct iscsi_param *param,
+		char *valid_list, char *in_val,
+		char *cur_val)
+{
+	char *val_start, *val_end;
+	char *in_start, *in_end;
+	int flag = 0;
+
+	if (add_param_value == NULL) {
+		return NULL;
+	}
+
+	in_start = in_val;
+	do {
+		if ((in_end = strchr(in_start, (int)',')) != NULL) {
+			*in_end = '\0';
+		}
+		val_start = valid_list;
+		do {
+			if ((val_end = strchr(val_start, (int)',')) != NULL) {
+				*val_end = '\0';
+			}
+			if (strcasecmp(in_start, val_start) == 0) {
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "match %s\n",
+					      val_start);
+				flag = 1;
+				break;
+			}
+			if (val_end) {
+				*val_end = ',';
+				val_start = val_end + 1;
+			}
+		} while (val_end);
+		if (flag) {
+			break;
+		}
+		if (in_end) {
+			*in_end = ',';
+			in_start = in_end + 1;
+		}
+	} while (in_end);
+
+	return flag ? val_start : NULL;
+}
+
+/**
+ * To negotiate param with
+ * type = ISPT_NUMERICAL_MIN/MAX, ISPT_NUMERICAL_DECLARATIVE
+ * return: the negotiated value of the key
+ */
+static char *spdk_iscsi_negotiate_param_numerical(int *add_param_value,
+		struct iscsi_param *param,
+		char *valid_list, char *in_val,
+		char *cur_val)
+{
+	char *valid_next;
+	char *new_val = NULL;
+	char *min_val, *max_val;
+	int val_i, cur_val_i;
+	int min_i, max_i;
+
+	if (add_param_value == NULL) {
+		return NULL;
+	}
+
+	val_i = (int)strtol(param->val, NULL, 10);
+	/* check whether the key is FirstBurstLength, if that we use in_val */
+	if (strcasecmp(param->key, "FirstBurstLength") == 0) {
+		val_i = (int)strtol(in_val, NULL, 10);
+	}
+
+	cur_val_i = (int)strtol(cur_val, NULL, 10);
+	valid_next = valid_list;
+	min_val = spdk_strsepq(&valid_next, ",");
+	max_val = spdk_strsepq(&valid_next, ",");
+	min_i = (min_val != NULL) ? (int)strtol(min_val, NULL, 10) : 0;
+	max_i = (max_val != NULL) ? (int)strtol(max_val, NULL, 10) : 0;
+	if (val_i < min_i || val_i > max_i) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "key %.64s reject\n", param->key);
+		new_val = NULL;
+	} else {
+		switch (param->type) {
+		case ISPT_NUMERICAL_MIN:
+			if (val_i > cur_val_i) {
+				val_i = cur_val_i;
+			}
+			break;
+		case ISPT_NUMERICAL_MAX:
+			if (val_i < cur_val_i) {
+				val_i = cur_val_i;
+			}
+			break;
+		default:
+			break;
+		}
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN, "%d", val_i);
+		new_val = in_val;
+	}
+
+	return new_val;
+}
+
+/**
+ * To negotiate param with
+ * type = ISPT_BOOLEAN_OR, ISPT_BOOLEAN_AND
+ * return: the negotiated value of the key
+ */
+static char *spdk_iscsi_negotiate_param_boolean(int *add_param_value,
+		struct iscsi_param *param,
+		char *in_val, char *cur_val,
+		const char *value)
+{
+	char *new_val = NULL;
+
+	if (add_param_value == NULL) {
+		return NULL;
+	}
+
+	/* Make sure the val is Yes or No */
+	if (!((strcasecmp(in_val, "Yes") == 0) ||
+	      (strcasecmp(in_val, "No") == 0))) {
+		/* unknown value */
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", "Reject");
+		new_val = in_val;
+		*add_param_value = 1;
+		return new_val;
+	}
+
+	if (strcasecmp(cur_val, value) == 0) {
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", value);
+		new_val = in_val;
+	} else {
+		new_val = param->val;
+	}
+
+	return new_val;
+}
+
+/**
+ * The entry function to handle each type of the param
+ * return value: the new negotiated value
+ */
+static char *
+spdk_iscsi_negotiate_param_all(int *add_param_value, struct iscsi_param *param,
+			       char *valid_list, char *in_val, char *cur_val)
+{
+	char *new_val;
+	switch (param->type) {
+	case ISPT_LIST:
+		new_val = spdk_iscsi_negotiate_param_list(add_param_value,
+				param,
+				valid_list,
+				in_val,
+				cur_val);
+		break;
+
+	case ISPT_NUMERICAL_MIN:
+	case ISPT_NUMERICAL_MAX:
+	case ISPT_NUMERICAL_DECLARATIVE:
+		new_val = spdk_iscsi_negotiate_param_numerical(add_param_value,
+				param,
+				valid_list,
+				in_val,
+				cur_val);
+		break;
+
+	case ISPT_BOOLEAN_OR:
+		new_val = spdk_iscsi_negotiate_param_boolean(add_param_value,
+				param,
+				in_val,
+				cur_val,
+				"Yes");
+		break;
+	case ISPT_BOOLEAN_AND:
+		new_val = spdk_iscsi_negotiate_param_boolean(add_param_value,
+				param,
+				in_val,
+				cur_val,
+				"No");
+		break;
+
+	default:
+		snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", param->val);
+		new_val = in_val;
+		break;
+	}
+
+	return new_val;
+}
+
+/**
+ * This function is used to judge whether the param is in session's params or
+ * connection's params
+ */
+static int
+spdk_iscsi_negotiate_param_init(struct spdk_iscsi_conn *conn,
+				struct iscsi_param **cur_param_p,
+				struct iscsi_param **params_dst_p,
+				struct iscsi_param *param)
+{
+	int index;
+
+	*cur_param_p = spdk_iscsi_param_find(*params_dst_p, param->key);
+	if (*cur_param_p == NULL) {
+		*params_dst_p = conn->sess->params;
+		*cur_param_p = spdk_iscsi_param_find(*params_dst_p, param->key);
+		if (*cur_param_p == NULL) {
+			if ((strncasecmp(param->key, "X-", 2) == 0) ||
+			    (strncasecmp(param->key, "X#", 2) == 0)) {
+				/* Extension Key */
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+					      "extension key %.64s\n",
+					      param->key);
+			} else {
+				SPDK_ERRLOG("unknown key %.64s\n", param->key);
+			}
+			return 1;
+		} else {
+			index = (*cur_param_p)->state_index;
+			if (conn->sess_param_state_negotiated[index] &&
+			    !spdk_iscsi_find_key_in_array(param->key,
+							  target_declarative_params)) {
+				return SPDK_ISCSI_PARAMETER_EXCHANGE_NOT_ONCE;
+			}
+			conn->sess_param_state_negotiated[index] = true;
+		}
+	} else {
+		index = (*cur_param_p)->state_index;
+		if (conn->conn_param_state_negotiated[index] &&
+		    !spdk_iscsi_find_key_in_array(param->key,
+						  multi_negot_conn_params)) {
+			return SPDK_ISCSI_PARAMETER_EXCHANGE_NOT_ONCE;
+		}
+		conn->conn_param_state_negotiated[index] = true;
+	}
+
+	return 0;
+}
+
+int
+spdk_iscsi_negotiate_params(struct spdk_iscsi_conn *conn,
+			    struct iscsi_param **params, uint8_t *data, int alloc_len,
+			    int data_len)
+{
+	struct iscsi_param *param;
+	struct iscsi_param *cur_param;
+	char *valid_list, *in_val;
+	char *cur_val;
+	char *new_val;
+	int discovery;
+	int total;
+	int rc;
+	uint32_t FirstBurstLength;
+	uint32_t MaxBurstLength;
+	bool FirstBurstLength_flag = false;
+	int type;
+
+	total = data_len;
+	if (alloc_len < 1) {
+		return 0;
+	}
+	if (total > alloc_len) {
+		total = alloc_len;
+		data[total - 1] = '\0';
+		return total;
+	}
+
+	if (*params == NULL) {
+		/* no input */
+		return total;
+	}
+
+	/* discovery? */
+	discovery = 0;
+	cur_param = spdk_iscsi_param_find(*params, "SessionType");
+	if (cur_param == NULL) {
+		cur_param = spdk_iscsi_param_find(conn->sess->params, "SessionType");
+		if (cur_param == NULL) {
+			/* no session type */
+		} else {
+			if (strcasecmp(cur_param->val, "Discovery") == 0) {
+				discovery = 1;
+			}
+		}
+	} else {
+		if (strcasecmp(cur_param->val, "Discovery") == 0) {
+			discovery = 1;
+		}
+	}
+
+	/* for temporary store */
+	valid_list = malloc(ISCSI_TEXT_MAX_VAL_LEN + 1);
+	if (!valid_list) {
+		SPDK_ERRLOG("malloc() failed for valid_list\n");
+		return -ENOMEM;
+	}
+
+	in_val = malloc(ISCSI_TEXT_MAX_VAL_LEN + 1);
+	if (!in_val) {
+		SPDK_ERRLOG("malloc() failed for in_val\n");
+		free(valid_list);
+		return -ENOMEM;
+	}
+
+	cur_val = malloc(ISCSI_TEXT_MAX_VAL_LEN + 1);
+	if (!cur_val) {
+		SPDK_ERRLOG("malloc() failed for cur_val\n");
+		free(valid_list);
+		free(in_val);
+		return -ENOMEM;
+	}
+
+	/* To adjust the location of FirstBurstLength location and put it to
+	 *  the end, then we can always firstly determine the MaxBurstLength
+	 */
+	param = spdk_iscsi_param_find(*params, "MaxBurstLength");
+	if (param != NULL) {
+		param = spdk_iscsi_param_find(*params, "FirstBurstLength");
+
+		/* check the existence of FirstBurstLength */
+		if (param != NULL) {
+			FirstBurstLength_flag = true;
+			if (param->next != NULL) {
+				snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", param->val);
+				type = param->type;
+				spdk_iscsi_param_add(params, "FirstBurstLength",
+						     in_val, NULL, type);
+			}
+		}
+	}
+
+	for (param = *params; param != NULL; param = param->next) {
+		struct iscsi_param *params_dst = conn->params;
+		int add_param_value = 0;
+		new_val = NULL;
+		param->type = ISPT_INVALID;
+
+		/* sendtargets is special */
+		if (strcasecmp(param->key, "SendTargets") == 0) {
+			continue;
+		}
+		/* CHAP keys */
+		if (spdk_iscsi_find_key_in_array(param->key, chap_type)) {
+			continue;
+		}
+
+		/* 12.2, 12.10, 12.11, 12.13, 12.14, 12.17, 12.18, 12.19 */
+		if (discovery &&
+		    spdk_iscsi_find_key_in_array(param->key,
+						 discovery_ignored_param)) {
+			snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", "Irrelevant");
+			new_val = in_val;
+			add_param_value = 1;
+		} else {
+			rc = spdk_iscsi_negotiate_param_init(conn,
+							     &cur_param,
+							     &params_dst,
+							     param);
+			if (rc < 0) {
+				free(valid_list);
+				free(in_val);
+				free(cur_val);
+				return rc;
+			} else if (rc > 0) {
+				snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", "NotUnderstood");
+				new_val = in_val;
+				add_param_value = 1;
+			} else {
+				snprintf(valid_list, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", cur_param->list);
+				snprintf(cur_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", cur_param->val);
+				param->type = cur_param->type;
+			}
+		}
+
+		if (param->type > 0) {
+			snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN + 1, "%s", param->val);
+
+			/* "NotUnderstood" value shouldn't be assigned to "Understood" key */
+			if (strcasecmp(in_val, "NotUnderstood") == 0) {
+				free(in_val);
+				free(valid_list);
+				free(cur_val);
+				return SPDK_ISCSI_LOGIN_ERROR_PARAMETER;
+			}
+
+			if (strcasecmp(param->key, "FirstBurstLength") == 0) {
+				FirstBurstLength = (uint32_t)strtol(param->val, NULL,
+								    10);
+				new_val = spdk_iscsi_param_get_val(conn->sess->params,
+								   "MaxBurstLength");
+				if (new_val != NULL) {
+					MaxBurstLength = (uint32_t) strtol(new_val, NULL,
+									   10);
+				} else {
+					MaxBurstLength = SPDK_ISCSI_MAX_BURST_LENGTH;
+				}
+				if (FirstBurstLength < MAX_FIRSTBURSTLENGTH &&
+				    FirstBurstLength > MaxBurstLength) {
+					FirstBurstLength = MaxBurstLength;
+					snprintf(in_val, ISCSI_TEXT_MAX_VAL_LEN, "%d",
+						 FirstBurstLength);
+				}
+			}
+
+			/* prevent target's declarative params from being changed by initiator */
+			if (spdk_iscsi_find_key_in_array(param->key, target_declarative_params)) {
+				add_param_value = 1;
+			}
+
+			new_val = spdk_iscsi_negotiate_param_all(&add_param_value,
+					param,
+					valid_list,
+					in_val,
+					cur_val);
+		}
+
+		/* check the negotiated value of the key */
+		if (new_val != NULL) {
+			/* add_param_value = 0 means updating the value of
+			 *      existed key in the connection's parameters
+			 */
+			if (add_param_value == 0) {
+				spdk_iscsi_param_set(params_dst, param->key, new_val);
+			}
+			total = spdk_iscsi_construct_data_from_param(param,
+					new_val,
+					data,
+					alloc_len,
+					total);
+			if (total < 0) {
+				goto final_return;
+			}
+
+			total = spdk_iscsi_special_param_construction(conn,
+					param,
+					FirstBurstLength_flag,
+					data,
+					alloc_len,
+					total);
+			if (total < 0) {
+				goto final_return;
+			}
+		} else {
+			total = -1;
+			break;
+		}
+	}
+
+final_return:
+	free(valid_list);
+	free(in_val);
+	free(cur_val);
+
+	return total;
+}
+
+int
+spdk_iscsi_copy_param2var(struct spdk_iscsi_conn *conn)
+{
+	const char *val;
+
+	val = spdk_iscsi_param_get_val(conn->params, "MaxRecvDataSegmentLength");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval MaxRecvDataSegmentLength failed\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+		      "copy MaxRecvDataSegmentLength=%s\n", val);
+	conn->MaxRecvDataSegmentLength = (int)strtol(val, NULL, 10);
+	if (conn->MaxRecvDataSegmentLength > SPDK_ISCSI_MAX_SEND_DATA_SEGMENT_LENGTH) {
+		conn->MaxRecvDataSegmentLength = SPDK_ISCSI_MAX_SEND_DATA_SEGMENT_LENGTH;
+	}
+
+	val = spdk_iscsi_param_get_val(conn->params, "HeaderDigest");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval HeaderDigest failed\n");
+		return -1;
+	}
+	if (strcasecmp(val, "CRC32C") == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set HeaderDigest=1\n");
+		conn->header_digest = 1;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set HeaderDigest=0\n");
+		conn->header_digest = 0;
+	}
+	val = spdk_iscsi_param_get_val(conn->params, "DataDigest");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval DataDigest failed\n");
+		return -1;
+	}
+	if (strcasecmp(val, "CRC32C") == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set DataDigest=1\n");
+		conn->data_digest = 1;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set DataDigest=0\n");
+		conn->data_digest = 0;
+	}
+
+	val = spdk_iscsi_param_get_val(conn->sess->params, "MaxConnections");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval MaxConnections failed\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "copy MaxConnections=%s\n", val);
+	conn->sess->MaxConnections = (uint32_t) strtol(val, NULL, 10);
+	val = spdk_iscsi_param_get_val(conn->sess->params, "MaxOutstandingR2T");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval MaxOutstandingR2T failed\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "copy MaxOutstandingR2T=%s\n", val);
+	conn->sess->MaxOutstandingR2T = (uint32_t) strtol(val, NULL, 10);
+	val = spdk_iscsi_param_get_val(conn->sess->params, "FirstBurstLength");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval FirstBurstLength failed\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "copy FirstBurstLength=%s\n", val);
+	conn->sess->FirstBurstLength = (uint32_t) strtol(val, NULL, 10);
+	val = spdk_iscsi_param_get_val(conn->sess->params, "MaxBurstLength");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval MaxBurstLength failed\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "copy MaxBurstLength=%s\n", val);
+	conn->sess->MaxBurstLength = (uint32_t) strtol(val, NULL, 10);
+	val = spdk_iscsi_param_get_val(conn->sess->params, "InitialR2T");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval InitialR2T failed\n");
+		return -1;
+	}
+	if (strcasecmp(val, "Yes") == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set InitialR2T=1\n");
+		conn->sess->InitialR2T = true;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set InitialR2T=0\n");
+		conn->sess->InitialR2T = false;
+	}
+	val = spdk_iscsi_param_get_val(conn->sess->params, "ImmediateData");
+	if (val == NULL) {
+		SPDK_ERRLOG("Getval ImmediateData failed\n");
+		return -1;
+	}
+	if (strcasecmp(val, "Yes") == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set ImmediateData=1\n");
+		conn->sess->ImmediateData = true;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "set ImmediateData=0\n");
+		conn->sess->ImmediateData = false;
+	}
+	return 0;
+}
diff --git a/src/spdk/lib/iscsi/param.h b/src/spdk/lib/iscsi/param.h
new file mode 100644
index 00000000..c9dc8cab
--- /dev/null
+++ b/src/spdk/lib/iscsi/param.h
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ISCSI_PARAM_H
+#define SPDK_ISCSI_PARAM_H
+
+#include "spdk/stdinc.h"
+
+enum iscsi_param_type {
+	ISPT_INVALID = -1,
+	ISPT_NOTSPECIFIED = 0,
+	ISPT_LIST,
+	ISPT_NUMERICAL_MIN,
+	ISPT_NUMERICAL_MAX,
+	ISPT_NUMERICAL_DECLARATIVE,
+	ISPT_DECLARATIVE,
+	ISPT_BOOLEAN_OR,
+	ISPT_BOOLEAN_AND,
+};
+
+struct iscsi_param {
+	struct iscsi_param *next;
+	char *key;
+	char *val;
+	char *list;
+	int type;
+	int state_index;
+};
+
+void
+spdk_iscsi_param_free(struct iscsi_param *params);
+struct iscsi_param *
+spdk_iscsi_param_find(struct iscsi_param *params, const char *key);
+int
+spdk_iscsi_param_del(struct iscsi_param **params, const char *key);
+int
+spdk_iscsi_param_add(struct iscsi_param **params, const char *key,
+		     const char *val, const char *list, int type);
+int
+spdk_iscsi_param_set(struct iscsi_param *params, const char *key,
+		     const char *val);
+int
+spdk_iscsi_param_set_int(struct iscsi_param *params, const char *key, uint32_t val);
+int
+spdk_iscsi_parse_params(struct iscsi_param **params, const uint8_t *data,
+			int len, bool cbit_enabled, char **partial_parameter);
+char *
+spdk_iscsi_param_get_val(struct iscsi_param *params, const char *key);
+int
+spdk_iscsi_param_eq_val(struct iscsi_param *params, const char *key,
+			const char *val);
+
+#endif /* SPDK_ISCSI_PARAM_H */
diff --git a/src/spdk/lib/iscsi/portal_grp.c b/src/spdk/lib/iscsi/portal_grp.c
new file mode 100644
index 00000000..60a724c9
--- /dev/null
+++ b/src/spdk/lib/iscsi/portal_grp.c
@@ -0,0 +1,707 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/conf.h"
+#include "spdk/sock.h"
+#include "spdk/event.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/conn.h"
+#include "iscsi/portal_grp.h"
+#include "iscsi/acceptor.h"
+
+#define PORTNUMSTRLEN 32
+
+static struct spdk_iscsi_portal *
+spdk_iscsi_portal_find_by_addr(const char *host, const char *port)
+{
+	struct spdk_iscsi_portal *p;
+
+	TAILQ_FOREACH(p, &g_spdk_iscsi.portal_head, g_tailq) {
+		if (!strcmp(p->host, host) && !strcmp(p->port, port)) {
+			return p;
+		}
+	}
+
+	return NULL;
+}
+
+/* Assumes caller allocated host and port strings on the heap */
+struct spdk_iscsi_portal *
+spdk_iscsi_portal_create(const char *host, const char *port, const char *cpumask)
+{
+	struct spdk_iscsi_portal *p = NULL, *tmp;
+	struct spdk_cpuset *core_mask = NULL;
+	int rc;
+
+	assert(host != NULL);
+	assert(port != NULL);
+
+
+	p = calloc(1, sizeof(*p));
+	if (!p) {
+		SPDK_ERRLOG("calloc() failed for portal\n");
+		return NULL;
+	}
+
+	/* check and overwrite abbreviation of wildcard */
+	if (strcasecmp(host, "[*]") == 0) {
+		SPDK_WARNLOG("Please use \"[::]\" as IPv6 wildcard\n");
+		SPDK_WARNLOG("Convert \"[*]\" to \"[::]\" automatically\n");
+		SPDK_WARNLOG("(Use of \"[*]\" will be deprecated in a future release)");
+		p->host = strdup("[::]");
+	} else if (strcasecmp(host, "*") == 0) {
+		SPDK_WARNLOG("Please use \"0.0.0.0\" as IPv4 wildcard\n");
+		SPDK_WARNLOG("Convert \"*\" to \"0.0.0.0\" automatically\n");
+		SPDK_WARNLOG("(Use of \"[*]\" will be deprecated in a future release)");
+		p->host = strdup("0.0.0.0");
+	} else {
+		p->host = strdup(host);
+	}
+	if (!p->host) {
+		SPDK_ERRLOG("strdup() failed for host\n");
+		goto error_out;
+	}
+
+	p->port = strdup(port);
+	if (!p->port) {
+		SPDK_ERRLOG("strdup() failed for host\n");
+		goto error_out;
+	}
+
+	core_mask = spdk_cpuset_alloc();
+	if (!core_mask) {
+		SPDK_ERRLOG("spdk_cpuset_alloc() failed for host\n");
+		goto error_out;
+	}
+
+	if (cpumask != NULL) {
+		rc = spdk_app_parse_core_mask(cpumask, core_mask);
+		if (rc < 0) {
+			SPDK_ERRLOG("cpumask (%s) is invalid\n", cpumask);
+			goto error_out;
+		}
+		if (spdk_cpuset_count(core_mask) == 0) {
+			SPDK_ERRLOG("cpumask (%s) does not contain core mask (0x%s)\n",
+				    cpumask, spdk_cpuset_fmt(spdk_app_get_core_mask()));
+			goto error_out;
+		}
+	} else {
+		spdk_cpuset_copy(core_mask, spdk_app_get_core_mask());
+	}
+
+	p->cpumask = core_mask;
+
+	p->sock = NULL;
+	p->group = NULL; /* set at a later time by caller */
+	p->acceptor_poller = NULL;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	tmp = spdk_iscsi_portal_find_by_addr(host, port);
+	if (tmp != NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		SPDK_ERRLOG("portal (%s, %s) already exists\n", host, port);
+		goto error_out;
+	}
+
+	TAILQ_INSERT_TAIL(&g_spdk_iscsi.portal_head, p, g_tailq);
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	return p;
+
+error_out:
+	spdk_cpuset_free(core_mask);
+	free(p->port);
+	free(p->host);
+	free(p);
+
+	return NULL;
+}
+
+void
+spdk_iscsi_portal_destroy(struct spdk_iscsi_portal *p)
+{
+	assert(p != NULL);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_portal_destroy\n");
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_REMOVE(&g_spdk_iscsi.portal_head, p, g_tailq);
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	free(p->host);
+	free(p->port);
+	spdk_cpuset_free(p->cpumask);
+	free(p);
+
+}
+
+static int
+spdk_iscsi_portal_open(struct spdk_iscsi_portal *p)
+{
+	struct spdk_sock *sock;
+	int port;
+
+	if (p->sock != NULL) {
+		SPDK_ERRLOG("portal (%s, %s) is already opened\n",
+			    p->host, p->port);
+		return -1;
+	}
+
+	port = (int)strtol(p->port, NULL, 0);
+	sock = spdk_sock_listen(p->host, port);
+	if (sock == NULL) {
+		SPDK_ERRLOG("listen error %.64s.%d\n", p->host, port);
+		return -1;
+	}
+
+	p->sock = sock;
+
+	/*
+	 * When the portal is created by config file, incoming connection
+	 * requests for the socket are pended to accept until reactors start.
+	 * However the gap between listen() and accept() will be slight and
+	 * the requests will be queued by the nonzero backlog of the socket
+	 * or resend by TCP.
+	 */
+	spdk_iscsi_acceptor_start(p);
+
+	return 0;
+}
+
+static void
+spdk_iscsi_portal_close(struct spdk_iscsi_portal *p)
+{
+	if (p->sock) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "close portal (%s, %s)\n",
+			      p->host, p->port);
+		spdk_iscsi_acceptor_stop(p);
+		spdk_sock_close(&p->sock);
+	}
+}
+
+static int
+spdk_iscsi_parse_portal(const char *portalstring, struct spdk_iscsi_portal **ip,
+			int dry_run)
+{
+	char *host = NULL, *port = NULL, *cpumask = NULL;
+	int len, rc = -1;
+	const char *p, *q;
+
+	if (portalstring == NULL) {
+		SPDK_ERRLOG("portal error\n");
+		goto error_out;
+	}
+
+	/* IP address */
+	if (portalstring[0] == '[') {
+		/* IPv6 */
+		p = strchr(portalstring + 1, ']');
+		if (p == NULL) {
+			SPDK_ERRLOG("portal error\n");
+			goto error_out;
+		}
+		p++;
+	} else {
+		/* IPv4 */
+		p = strchr(portalstring, ':');
+		if (p == NULL) {
+			p = portalstring + strlen(portalstring);
+		}
+	}
+
+	if (!dry_run) {
+		len = p - portalstring;
+		host = malloc(len + 1);
+		if (host == NULL) {
+			SPDK_ERRLOG("malloc() failed for host\n");
+			goto error_out;
+		}
+		memcpy(host, portalstring, len);
+		host[len] = '\0';
+	}
+
+	/* Port number (IPv4 and IPv6 are the same) */
+	if (p[0] == '\0') {
+		if (!dry_run) {
+			port = malloc(PORTNUMSTRLEN);
+			if (!port) {
+				SPDK_ERRLOG("malloc() failed for port\n");
+				goto error_out;
+			}
+			snprintf(port, PORTNUMSTRLEN, "%d", DEFAULT_PORT);
+		}
+	} else {
+		if (p[0] != ':') {
+			SPDK_ERRLOG("portal error\n");
+			goto error_out;
+		}
+		q = strchr(portalstring, '@');
+		if (q == NULL) {
+			q = portalstring + strlen(portalstring);
+		}
+		if (q == p) {
+			SPDK_ERRLOG("no port specified\n");
+			goto error_out;
+		}
+
+		if (!dry_run) {
+			len = q - p - 1;
+			port = malloc(len + 1);
+			if (port == NULL) {
+				SPDK_ERRLOG("malloc() failed for port\n");
+				goto error_out;
+			}
+			memcpy(port, p + 1, len);
+			port[len] = '\0';
+		}
+	}
+
+	/* Cpumask (IPv4 and IPv6 are the same) */
+	p = strchr(portalstring, '@');
+	if (p != NULL) {
+		q = portalstring + strlen(portalstring);
+		if (q == p) {
+			SPDK_ERRLOG("no cpumask specified\n");
+			goto error_out;
+		}
+		if (!dry_run) {
+			len = q - p - 1;
+			cpumask = malloc(len + 1);
+			if (cpumask == NULL) {
+				SPDK_ERRLOG("malloc() failed for cpumask\n");
+				goto error_out;
+			}
+			memcpy(cpumask, p + 1, len);
+			cpumask[len] = '\0';
+		}
+	}
+
+	if (!dry_run) {
+		*ip = spdk_iscsi_portal_create(host, port, cpumask);
+		if (!*ip) {
+			goto error_out;
+		}
+	}
+
+	rc = 0;
+error_out:
+	free(host);
+	free(port);
+	free(cpumask);
+
+	return rc;
+}
+
+struct spdk_iscsi_portal_grp *
+spdk_iscsi_portal_grp_create(int tag)
+{
+	struct spdk_iscsi_portal_grp *pg = malloc(sizeof(*pg));
+
+	if (!pg) {
+		SPDK_ERRLOG("malloc() failed for portal group\n");
+		return NULL;
+	}
+
+	pg->ref = 0;
+	pg->tag = tag;
+
+	TAILQ_INIT(&pg->head);
+
+	return pg;
+}
+
+void
+spdk_iscsi_portal_grp_destroy(struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_portal	*p;
+
+	assert(pg != NULL);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_portal_grp_destroy\n");
+	while (!TAILQ_EMPTY(&pg->head)) {
+		p = TAILQ_FIRST(&pg->head);
+		TAILQ_REMOVE(&pg->head, p, per_pg_tailq);
+		spdk_iscsi_portal_destroy(p);
+	}
+	free(pg);
+}
+
+int
+spdk_iscsi_portal_grp_register(struct spdk_iscsi_portal_grp *pg)
+{
+	int rc = -1;
+	struct spdk_iscsi_portal_grp *tmp;
+
+	assert(pg != NULL);
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	tmp = spdk_iscsi_portal_grp_find_by_tag(pg->tag);
+	if (tmp == NULL) {
+		TAILQ_INSERT_TAIL(&g_spdk_iscsi.pg_head, pg, tailq);
+		rc = 0;
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return rc;
+}
+
+void
+spdk_iscsi_portal_grp_add_portal(struct spdk_iscsi_portal_grp *pg,
+				 struct spdk_iscsi_portal *p)
+{
+	assert(pg != NULL);
+	assert(p != NULL);
+
+	p->group = pg;
+	TAILQ_INSERT_TAIL(&pg->head, p, per_pg_tailq);
+}
+
+static int
+spdk_iscsi_parse_portal_grp(struct spdk_conf_section *sp)
+{
+	struct spdk_iscsi_portal_grp *pg;
+	struct spdk_iscsi_portal *p;
+	const char *val;
+	char *label, *portal;
+	int portals = 0, i = 0, rc = 0;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "add portal group (from config file) %d\n",
+		      spdk_conf_section_get_num(sp));
+
+	val = spdk_conf_section_get_val(sp, "Comment");
+	if (val != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "Comment %s\n", val);
+	}
+
+	/* counts number of definitions */
+	for (i = 0; ; i++) {
+		/*
+		 * label is no longer used, but we keep it in the config
+		 *  file definition so that we do not break existing config
+		 *  files.
+		 */
+		label = spdk_conf_section_get_nmval(sp, "Portal", i, 0);
+		portal = spdk_conf_section_get_nmval(sp, "Portal", i, 1);
+		if (label == NULL || portal == NULL) {
+			break;
+		}
+		rc = spdk_iscsi_parse_portal(portal, &p, 1);
+		if (rc < 0) {
+			SPDK_ERRLOG("parse portal error (%s)\n", portal);
+			return -1;
+		}
+	}
+
+	portals = i;
+	if (portals > MAX_PORTAL) {
+		SPDK_ERRLOG("%d > MAX_PORTAL\n", portals);
+		return -1;
+	}
+
+	pg = spdk_iscsi_portal_grp_create(spdk_conf_section_get_num(sp));
+	if (!pg) {
+		SPDK_ERRLOG("portal group malloc error (%s)\n", spdk_conf_section_get_name(sp));
+		return -1;
+	}
+
+	for (i = 0; i < portals; i++) {
+		label = spdk_conf_section_get_nmval(sp, "Portal", i, 0);
+		portal = spdk_conf_section_get_nmval(sp, "Portal", i, 1);
+		if (label == NULL || portal == NULL) {
+			SPDK_ERRLOG("portal error\n");
+			goto error;
+		}
+
+		rc = spdk_iscsi_parse_portal(portal, &p, 0);
+		if (rc < 0) {
+			SPDK_ERRLOG("parse portal error (%s)\n", portal);
+			goto error;
+		}
+
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+			      "RIndex=%d, Host=%s, Port=%s, Tag=%d\n",
+			      i, p->host, p->port, spdk_conf_section_get_num(sp));
+
+		spdk_iscsi_portal_grp_add_portal(pg, p);
+	}
+
+	rc = spdk_iscsi_portal_grp_open(pg);
+	if (rc != 0) {
+		SPDK_ERRLOG("portal_grp_open failed\n");
+		goto error;
+	}
+
+	/* Add portal group to the end of the pg list */
+	rc = spdk_iscsi_portal_grp_register(pg);
+	if (rc != 0) {
+		SPDK_ERRLOG("register portal failed\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	spdk_iscsi_portal_grp_release(pg);
+	return -1;
+}
+
+struct spdk_iscsi_portal_grp *
+spdk_iscsi_portal_grp_find_by_tag(int tag)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		if (pg->tag == tag) {
+			return pg;
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_iscsi_parse_portal_grps(void)
+{
+	int rc = 0;
+	struct spdk_conf_section *sp;
+
+	sp = spdk_conf_first_section(NULL);
+	while (sp != NULL) {
+		if (spdk_conf_section_match_prefix(sp, "PortalGroup")) {
+			if (spdk_conf_section_get_num(sp) == 0) {
+				SPDK_ERRLOG("Group 0 is invalid\n");
+				return -1;
+			}
+
+			/* Build portal group from cfg section PortalGroup */
+			rc = spdk_iscsi_parse_portal_grp(sp);
+			if (rc < 0) {
+				SPDK_ERRLOG("parse_portal_group() failed\n");
+				return -1;
+			}
+		}
+		sp = spdk_conf_next_section(sp);
+	}
+	return 0;
+}
+
+void
+spdk_iscsi_portal_grps_destroy(void)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_portal_grps_destroy\n");
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	while (!TAILQ_EMPTY(&g_spdk_iscsi.pg_head)) {
+		pg = TAILQ_FIRST(&g_spdk_iscsi.pg_head);
+		TAILQ_REMOVE(&g_spdk_iscsi.pg_head, pg, tailq);
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		spdk_iscsi_portal_grp_destroy(pg);
+		pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+}
+
+int
+spdk_iscsi_portal_grp_open(struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_portal *p;
+	int rc;
+
+	TAILQ_FOREACH(p, &pg->head, per_pg_tailq) {
+		rc = spdk_iscsi_portal_open(p);
+		if (rc < 0) {
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static void
+spdk_iscsi_portal_grp_close(struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_portal *p;
+
+	TAILQ_FOREACH(p, &pg->head, per_pg_tailq) {
+		spdk_iscsi_portal_close(p);
+	}
+}
+
+void
+spdk_iscsi_portal_grp_close_all(void)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_portal_grp_close_all\n");
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		spdk_iscsi_portal_grp_close(pg);
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+}
+
+struct spdk_iscsi_portal_grp *
+spdk_iscsi_portal_grp_unregister(int tag)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		if (pg->tag == tag) {
+			TAILQ_REMOVE(&g_spdk_iscsi.pg_head, pg, tailq);
+			pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+			return pg;
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return NULL;
+}
+
+void
+spdk_iscsi_portal_grp_release(struct spdk_iscsi_portal_grp *pg)
+{
+	spdk_iscsi_portal_grp_close(pg);
+	spdk_iscsi_portal_grp_destroy(pg);
+}
+
+static const char *portal_group_section = \
+		"\n"
+		"# Users must change the PortalGroup section(s) to match the IP addresses\n"
+		"#  for their environment.\n"
+		"# PortalGroup sections define which network portals the iSCSI target\n"
+		"# will use to listen for incoming connections.  These are also used to\n"
+		"#  determine which targets are accessible over each portal group.\n"
+		"# Up to 1024 Portal directives are allowed.  These define the network\n"
+		"#  portals of the portal group. The user must specify a IP address\n"
+		"#  for each network portal, and may optionally specify a port and\n"
+		"#  a cpumask. If the port is omitted, 3260 will be used. Cpumask will\n"
+		"#  be used to set the processor affinity of the iSCSI connection\n"
+		"#  through the portal.  If the cpumask is omitted, cpumask will be\n"
+		"#  set to all available processors.\n"
+		"#  Syntax:\n"
+		"#    Portal <Name> <IP address>[:<port>[@<cpumask>]]\n";
+
+#define PORTAL_GROUP_TMPL \
+"[PortalGroup%d]\n" \
+"  Comment \"Portal%d\"\n"
+
+#define PORTAL_TMPL \
+"  Portal DA1 %s:%s@0x%s\n"
+
+void
+spdk_iscsi_portal_grps_config_text(FILE *fp)
+{
+	struct spdk_iscsi_portal *p = NULL;
+	struct spdk_iscsi_portal_grp *pg = NULL;
+
+	/* Create portal group section */
+	fprintf(fp, "%s", portal_group_section);
+
+	/* Dump portal groups */
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		if (NULL == pg) { continue; }
+		fprintf(fp, PORTAL_GROUP_TMPL, pg->tag, pg->tag);
+		/* Dump portals */
+		TAILQ_FOREACH(p, &pg->head, per_pg_tailq) {
+			if (NULL == p) { continue; }
+			fprintf(fp, PORTAL_TMPL, p->host, p->port,
+				spdk_cpuset_fmt(p->cpumask));
+		}
+	}
+}
+
+static void
+spdk_iscsi_portal_grp_info_json(struct spdk_iscsi_portal_grp *pg,
+				struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_portal *portal;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_int32(w, "tag", pg->tag);
+
+	spdk_json_write_named_array_begin(w, "portals");
+	TAILQ_FOREACH(portal, &pg->head, per_pg_tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "host", portal->host);
+		spdk_json_write_named_string(w, "port", portal->port);
+		spdk_json_write_named_string_fmt(w, "cpumask", "0x%s",
+						 spdk_cpuset_fmt(portal->cpumask));
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_iscsi_portal_grp_config_json(struct spdk_iscsi_portal_grp *pg,
+				  struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "add_portal_group");
+
+	spdk_json_write_name(w, "params");
+	spdk_iscsi_portal_grp_info_json(pg, w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_iscsi_portal_grps_info_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		spdk_iscsi_portal_grp_info_json(pg, w);
+	}
+}
+
+void
+spdk_iscsi_portal_grps_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_portal_grp *pg;
+
+	TAILQ_FOREACH(pg, &g_spdk_iscsi.pg_head, tailq) {
+		spdk_iscsi_portal_grp_config_json(pg, w);
+	}
+}
diff --git a/src/spdk/lib/iscsi/portal_grp.h b/src/spdk/lib/iscsi/portal_grp.h
new file mode 100644
index 00000000..08cb3992
--- /dev/null
+++ b/src/spdk/lib/iscsi/portal_grp.h
@@ -0,0 +1,83 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_PORTAL_GRP_H
+#define SPDK_PORTAL_GRP_H
+
+#include "spdk/conf.h"
+#include "spdk/cpuset.h"
+
+struct spdk_json_write_ctx;
+
+struct spdk_iscsi_portal {
+	struct spdk_iscsi_portal_grp	*group;
+	char				*host;
+	char				*port;
+	struct spdk_sock		*sock;
+	struct spdk_cpuset		*cpumask;
+	struct spdk_poller		*acceptor_poller;
+	TAILQ_ENTRY(spdk_iscsi_portal)	per_pg_tailq;
+	TAILQ_ENTRY(spdk_iscsi_portal)	g_tailq;
+};
+
+struct spdk_iscsi_portal_grp {
+	int ref;
+	int tag;
+	TAILQ_ENTRY(spdk_iscsi_portal_grp)	tailq;
+	TAILQ_HEAD(, spdk_iscsi_portal)		head;
+};
+
+/* SPDK iSCSI Portal Group management API */
+
+struct spdk_iscsi_portal *spdk_iscsi_portal_create(const char *host, const char *port,
+		const char *cpumask);
+void spdk_iscsi_portal_destroy(struct spdk_iscsi_portal *p);
+
+struct spdk_iscsi_portal_grp *spdk_iscsi_portal_grp_create(int tag);
+void spdk_iscsi_portal_grp_add_portal(struct spdk_iscsi_portal_grp *pg,
+				      struct spdk_iscsi_portal *p);
+void spdk_iscsi_portal_grp_destroy(struct spdk_iscsi_portal_grp *pg);
+void spdk_iscsi_portal_grp_release(struct spdk_iscsi_portal_grp *pg);
+int spdk_iscsi_parse_portal_grps(void);
+void spdk_iscsi_portal_grps_destroy(void);
+int spdk_iscsi_portal_grp_register(struct spdk_iscsi_portal_grp *pg);
+struct spdk_iscsi_portal_grp *spdk_iscsi_portal_grp_unregister(int tag);
+struct spdk_iscsi_portal_grp *spdk_iscsi_portal_grp_find_by_tag(int tag);
+int spdk_iscsi_portal_grp_open(struct spdk_iscsi_portal_grp *pg);
+
+void spdk_iscsi_portal_grp_close_all(void);
+void spdk_iscsi_portal_grps_config_text(FILE *fp);
+void spdk_iscsi_portal_grps_info_json(struct spdk_json_write_ctx *w);
+void spdk_iscsi_portal_grps_config_json(struct spdk_json_write_ctx *w);
+#endif // SPDK_PORTAL_GRP_H
diff --git a/src/spdk/lib/iscsi/task.c b/src/spdk/lib/iscsi/task.c
new file mode 100644
index 00000000..6b56cd97
--- /dev/null
+++ b/src/spdk/lib/iscsi/task.c
@@ -0,0 +1,88 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "iscsi/conn.h"
+#include "iscsi/task.h"
+
+static void
+spdk_iscsi_task_free(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_iscsi_task *task = spdk_iscsi_task_from_scsi_task(scsi_task);
+
+	if (task->parent) {
+		spdk_scsi_task_put(&task->parent->scsi);
+		task->parent = NULL;
+	}
+
+	spdk_iscsi_task_disassociate_pdu(task);
+	assert(task->conn->pending_task_cnt > 0);
+	task->conn->pending_task_cnt--;
+	spdk_mempool_put(g_spdk_iscsi.task_pool, (void *)task);
+}
+
+struct spdk_iscsi_task *
+spdk_iscsi_task_get(struct spdk_iscsi_conn *conn, struct spdk_iscsi_task *parent,
+		    spdk_scsi_task_cpl cpl_fn)
+{
+	struct spdk_iscsi_task *task;
+
+	task = spdk_mempool_get(g_spdk_iscsi.task_pool);
+	if (!task) {
+		SPDK_ERRLOG("Unable to get task\n");
+		abort();
+	}
+
+	memset(task, 0, sizeof(*task));
+	task->conn = conn;
+	assert(conn->pending_task_cnt < UINT32_MAX);
+	conn->pending_task_cnt++;
+	spdk_scsi_task_construct(&task->scsi,
+				 cpl_fn,
+				 spdk_iscsi_task_free);
+	if (parent) {
+		parent->scsi.ref++;
+		task->parent = parent;
+		task->tag = parent->tag;
+		task->scsi.dxfer_dir = parent->scsi.dxfer_dir;
+		task->scsi.transfer_len = parent->scsi.transfer_len;
+		task->scsi.lun = parent->scsi.lun;
+		task->scsi.cdb = parent->scsi.cdb;
+		task->scsi.target_port = parent->scsi.target_port;
+		task->scsi.initiator_port = parent->scsi.initiator_port;
+	}
+
+	return task;
+}
diff --git a/src/spdk/lib/iscsi/task.h b/src/spdk/lib/iscsi/task.h
new file mode 100644
index 00000000..fea928ac
--- /dev/null
+++ b/src/spdk/lib/iscsi/task.h
@@ -0,0 +1,187 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ISCSI_TASK_H
+#define SPDK_ISCSI_TASK_H
+
+#include "iscsi/iscsi.h"
+#include "spdk/scsi.h"
+#include "spdk/util.h"
+
+struct spdk_iscsi_task {
+	struct spdk_scsi_task	scsi;
+
+	struct spdk_iscsi_task *parent;
+
+	struct spdk_iscsi_conn *conn;
+	struct spdk_iscsi_pdu *pdu;
+	uint32_t outstanding_r2t;
+
+	uint32_t desired_data_transfer_length;
+
+	/* Only valid for Read/Write */
+	uint32_t bytes_completed;
+
+	uint32_t data_out_cnt;
+
+	/*
+	 * Tracks the current offset of large read io.
+	 */
+	uint32_t current_datain_offset;
+
+	/*
+	 * next_expected_r2t_offset is used when we receive
+	 * the DataOUT PDU.
+	 */
+	uint32_t next_expected_r2t_offset;
+
+	/*
+	 * Tracks the length of the R2T that is in progress.
+	 * Used to check that an R2T burst does not exceed
+	 *  MaxBurstLength.
+	 */
+	uint32_t current_r2t_length;
+
+	/*
+	 * next_r2t_offset is used when we are sending the
+	 * R2T packet to keep track of next offset of r2t.
+	 */
+	uint32_t next_r2t_offset;
+	uint32_t R2TSN;
+	uint32_t r2t_datasn; /* record next datasn for a r2tsn */
+	uint32_t acked_r2tsn; /* next r2tsn to be acked */
+	uint32_t datain_datasn;
+	uint32_t acked_data_sn; /* next expected datain datasn */
+	uint32_t ttt;
+
+	uint32_t tag;
+
+	/**
+	 * Record the lun id just in case the lun is invalid,
+	 * which will happen when hot removing the lun.
+	 */
+	int lun_id;
+
+	TAILQ_ENTRY(spdk_iscsi_task) link;
+
+	TAILQ_HEAD(subtask_list, spdk_iscsi_task) subtask_list;
+	TAILQ_ENTRY(spdk_iscsi_task) subtask_link;
+	bool is_queued; /* is queued in scsi layer for handling */
+};
+
+static inline void
+spdk_iscsi_task_put(struct spdk_iscsi_task *task)
+{
+	spdk_scsi_task_put(&task->scsi);
+}
+
+static inline struct spdk_iscsi_pdu *
+spdk_iscsi_task_get_pdu(struct spdk_iscsi_task *task)
+{
+	return task->pdu;
+}
+
+static inline void
+spdk_iscsi_task_set_pdu(struct spdk_iscsi_task *task, struct spdk_iscsi_pdu *pdu)
+{
+	task->pdu = pdu;
+}
+
+static inline struct iscsi_bhs *
+spdk_iscsi_task_get_bhs(struct spdk_iscsi_task *task)
+{
+	return &spdk_iscsi_task_get_pdu(task)->bhs;
+}
+
+static inline void
+spdk_iscsi_task_associate_pdu(struct spdk_iscsi_task *task, struct spdk_iscsi_pdu *pdu)
+{
+	spdk_iscsi_task_set_pdu(task, pdu);
+	pdu->ref++;
+}
+
+static inline void
+spdk_iscsi_task_disassociate_pdu(struct spdk_iscsi_task *task)
+{
+	if (spdk_iscsi_task_get_pdu(task)) {
+		spdk_put_pdu(spdk_iscsi_task_get_pdu(task));
+		spdk_iscsi_task_set_pdu(task, NULL);
+	}
+}
+
+static inline int
+spdk_iscsi_task_is_immediate(struct spdk_iscsi_task *task)
+{
+	struct iscsi_bhs_scsi_req *scsi_req;
+
+	scsi_req = (struct iscsi_bhs_scsi_req *)spdk_iscsi_task_get_bhs(task);
+	return (scsi_req->immediate == 1);
+}
+
+static inline int
+spdk_iscsi_task_is_read(struct spdk_iscsi_task *task)
+{
+	struct iscsi_bhs_scsi_req *scsi_req;
+
+	scsi_req = (struct iscsi_bhs_scsi_req *)spdk_iscsi_task_get_bhs(task);
+	return (scsi_req->read_bit == 1);
+}
+
+static inline uint32_t
+spdk_iscsi_task_get_cmdsn(struct spdk_iscsi_task *task)
+{
+	return spdk_iscsi_task_get_pdu(task)->cmd_sn;
+}
+
+struct spdk_iscsi_task *spdk_iscsi_task_get(struct spdk_iscsi_conn *conn,
+		struct spdk_iscsi_task *parent,
+		spdk_scsi_task_cpl cpl_fn);
+
+static inline struct spdk_iscsi_task *
+spdk_iscsi_task_from_scsi_task(struct spdk_scsi_task *task)
+{
+	return SPDK_CONTAINEROF(task, struct spdk_iscsi_task, scsi);
+}
+
+static inline struct spdk_iscsi_task *
+spdk_iscsi_task_get_primary(struct spdk_iscsi_task *task)
+{
+	if (task->parent) {
+		return task->parent;
+	} else {
+		return task;
+	}
+}
+
+#endif /* SPDK_ISCSI_TASK_H */
diff --git a/src/spdk/lib/iscsi/tgt_node.c b/src/spdk/lib/iscsi/tgt_node.c
new file mode 100644
index 00000000..97b5bbe1
--- /dev/null
+++ b/src/spdk/lib/iscsi/tgt_node.c
@@ -0,0 +1,1538 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/conf.h"
+#include "spdk/sock.h"
+#include "spdk/scsi.h"
+
+#include "spdk_internal/log.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/conn.h"
+#include "iscsi/tgt_node.h"
+#include "iscsi/portal_grp.h"
+#include "iscsi/init_grp.h"
+#include "iscsi/task.h"
+
+#define MAX_TMPBUF 1024
+#define MAX_MASKBUF 128
+
+static bool
+spdk_iscsi_ipv6_netmask_allow_addr(const char *netmask, const char *addr)
+{
+	struct in6_addr in6_mask;
+	struct in6_addr in6_addr;
+	char mask[MAX_MASKBUF];
+	const char *p;
+	size_t n;
+	int bits, bmask;
+	int i;
+
+	if (netmask[0] != '[') {
+		return false;
+	}
+	p = strchr(netmask, ']');
+	if (p == NULL) {
+		return false;
+	}
+	n = p - (netmask + 1);
+	if (n + 1 > sizeof mask) {
+		return false;
+	}
+
+	memcpy(mask, netmask + 1, n);
+	mask[n] = '\0';
+	p++;
+
+	if (p[0] == '/') {
+		bits = (int) strtol(p + 1, NULL, 10);
+		if (bits <= 0 || bits > 128) {
+			return false;
+		}
+	} else {
+		bits = 128;
+	}
+
+#if 0
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "input %s\n", addr);
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "mask  %s / %d\n", mask, bits);
+#endif
+
+	/* presentation to network order binary */
+	if (inet_pton(AF_INET6, mask, &in6_mask) <= 0
+	    || inet_pton(AF_INET6, addr, &in6_addr) <= 0) {
+		return false;
+	}
+
+	/* check 128bits */
+	for (i = 0; i < (bits / 8); i++) {
+		if (in6_mask.s6_addr[i] != in6_addr.s6_addr[i]) {
+			return false;
+		}
+	}
+	if (bits % 8) {
+		bmask = (0xffU << (8 - (bits % 8))) & 0xffU;
+		if ((in6_mask.s6_addr[i] & bmask) != (in6_addr.s6_addr[i] & bmask)) {
+			return false;
+		}
+	}
+
+	/* match */
+	return true;
+}
+
+static bool
+spdk_iscsi_ipv4_netmask_allow_addr(const char *netmask, const char *addr)
+{
+	struct in_addr in4_mask;
+	struct in_addr in4_addr;
+	char mask[MAX_MASKBUF];
+	const char *p;
+	uint32_t bmask;
+	size_t n;
+	int bits;
+
+	p = strchr(netmask, '/');
+	if (p == NULL) {
+		p = netmask + strlen(netmask);
+	}
+	n = p - netmask;
+	if (n + 1 > sizeof mask) {
+		return false;
+	}
+
+	memcpy(mask, netmask, n);
+	mask[n] = '\0';
+
+	if (p[0] == '/') {
+		bits = (int) strtol(p + 1, NULL, 10);
+		if (bits <= 0 || bits > 32) {
+			return false;
+		}
+	} else {
+		bits = 32;
+	}
+
+	/* presentation to network order binary */
+	if (inet_pton(AF_INET, mask, &in4_mask) <= 0
+	    || inet_pton(AF_INET, addr, &in4_addr) <= 0) {
+		return false;
+	}
+
+	/* check 32bits */
+	bmask = (0xffffffffU << (32 - bits)) & 0xffffffffU;
+	if ((ntohl(in4_mask.s_addr) & bmask) != (ntohl(in4_addr.s_addr) & bmask)) {
+		return false;
+	}
+
+	/* match */
+	return true;
+}
+
+static bool
+spdk_iscsi_netmask_allow_addr(const char *netmask, const char *addr)
+{
+	if (netmask == NULL || addr == NULL) {
+		return false;
+	}
+	if (strcasecmp(netmask, "ANY") == 0) {
+		return true;
+	}
+	if (netmask[0] == '[') {
+		/* IPv6 */
+		if (spdk_iscsi_ipv6_netmask_allow_addr(netmask, addr)) {
+			return true;
+		}
+	} else {
+		/* IPv4 */
+		if (spdk_iscsi_ipv4_netmask_allow_addr(netmask, addr)) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool
+spdk_iscsi_init_grp_allow_addr(struct spdk_iscsi_init_grp *igp,
+			       const char *addr)
+{
+	struct spdk_iscsi_initiator_netmask *imask;
+
+	TAILQ_FOREACH(imask, &igp->netmask_head, tailq) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "netmask=%s, addr=%s\n",
+			      imask->mask, addr);
+		if (spdk_iscsi_netmask_allow_addr(imask->mask, addr)) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static int
+spdk_iscsi_init_grp_allow_iscsi_name(struct spdk_iscsi_init_grp *igp,
+				     const char *iqn, bool *result)
+{
+	struct spdk_iscsi_initiator_name *iname;
+
+	TAILQ_FOREACH(iname, &igp->initiator_head, tailq) {
+		/* denied if iqn is matched */
+		if ((iname->name[0] == '!')
+		    && (strcasecmp(&iname->name[1], "ANY") == 0
+			|| strcasecmp(&iname->name[1], iqn) == 0)) {
+			*result = false;
+			return 0;
+		}
+		/* allowed if iqn is matched */
+		if (strcasecmp(iname->name, "ANY") == 0
+		    || strcasecmp(iname->name, iqn) == 0) {
+			*result = true;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+static struct spdk_iscsi_pg_map *
+spdk_iscsi_tgt_node_find_pg_map(struct spdk_iscsi_tgt_node *target,
+				struct spdk_iscsi_portal_grp *pg);
+
+bool
+spdk_iscsi_tgt_node_access(struct spdk_iscsi_conn *conn,
+			   struct spdk_iscsi_tgt_node *target, const char *iqn, const char *addr)
+{
+	struct spdk_iscsi_portal_grp *pg;
+	struct spdk_iscsi_pg_map *pg_map;
+	struct spdk_iscsi_ig_map *ig_map;
+	int rc;
+	bool allowed = false;
+
+	if (conn == NULL || target == NULL || iqn == NULL || addr == NULL) {
+		return false;
+	}
+	pg = conn->portal->group;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "pg=%d, iqn=%s, addr=%s\n",
+		      pg->tag, iqn, addr);
+	pg_map = spdk_iscsi_tgt_node_find_pg_map(target, pg);
+	if (pg_map == NULL) {
+		return false;
+	}
+	TAILQ_FOREACH(ig_map, &pg_map->ig_map_head, tailq) {
+		rc = spdk_iscsi_init_grp_allow_iscsi_name(ig_map->ig, iqn, &allowed);
+		if (rc == 0) {
+			if (allowed == false) {
+				goto denied;
+			} else {
+				if (spdk_iscsi_init_grp_allow_addr(ig_map->ig, addr)) {
+					return true;
+				}
+			}
+		} else {
+			/* netmask is denied in this initiator group */
+		}
+	}
+
+denied:
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "access denied from %s (%s) to %s (%s:%s,%d)\n",
+		      iqn, addr, target->name, conn->portal->host,
+		      conn->portal->port, conn->portal->group->tag);
+	return false;
+}
+
+static bool
+spdk_iscsi_tgt_node_allow_iscsi_name(struct spdk_iscsi_tgt_node *target, const char *iqn)
+{
+	struct spdk_iscsi_pg_map *pg_map;
+	struct spdk_iscsi_ig_map *ig_map;
+	int rc;
+	bool result = false;
+
+	if (target == NULL || iqn == NULL) {
+		return false;
+	}
+
+	TAILQ_FOREACH(pg_map, &target->pg_map_head, tailq) {
+		TAILQ_FOREACH(ig_map, &pg_map->ig_map_head, tailq) {
+			rc = spdk_iscsi_init_grp_allow_iscsi_name(ig_map->ig, iqn, &result);
+			if (rc == 0) {
+				return result;
+			}
+		}
+	}
+
+	return false;
+}
+
+int
+spdk_iscsi_send_tgts(struct spdk_iscsi_conn *conn, const char *iiqn,
+		     const char *iaddr, const char *tiqn, uint8_t *data, int alloc_len,
+		     int data_len)
+{
+	char buf[MAX_TMPBUF];
+	struct spdk_iscsi_portal_grp	*pg;
+	struct spdk_iscsi_pg_map	*pg_map;
+	struct spdk_iscsi_portal	*p;
+	struct spdk_iscsi_tgt_node	*target;
+	char *host;
+	int total;
+	int len;
+	int rc;
+
+	if (conn == NULL) {
+		return 0;
+	}
+
+	total = data_len;
+	if (alloc_len < 1) {
+		return 0;
+	}
+	if (total > alloc_len) {
+		total = alloc_len;
+		data[total - 1] = '\0';
+		return total;
+	}
+
+	if (alloc_len - total < 1) {
+		SPDK_ERRLOG("data space small %d\n", alloc_len);
+		return total;
+	}
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		if (strcasecmp(tiqn, "ALL") != 0
+		    && strcasecmp(tiqn, target->name) != 0) {
+			continue;
+		}
+		rc = spdk_iscsi_tgt_node_allow_iscsi_name(target, iiqn);
+		if (rc == 0) {
+			continue;
+		}
+
+		/* DO SENDTARGETS */
+		len = snprintf((char *) data + total, alloc_len - total,
+			       "TargetName=%s", target->name);
+		total += len + 1;
+
+		/* write to data */
+		TAILQ_FOREACH(pg_map, &target->pg_map_head, tailq) {
+			pg = pg_map->pg;
+			TAILQ_FOREACH(p, &pg->head, per_pg_tailq) {
+				if (alloc_len - total < 1) {
+					pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+					SPDK_ERRLOG("data space small %d\n", alloc_len);
+					return total;
+				}
+				host = p->host;
+				/* wildcard? */
+				if (strcasecmp(host, "[::]") == 0
+				    || strcasecmp(host, "0.0.0.0") == 0) {
+					if (spdk_sock_is_ipv6(conn->sock)) {
+						snprintf(buf, sizeof buf, "[%s]",
+							 conn->target_addr);
+						host = buf;
+					} else if (spdk_sock_is_ipv4(conn->sock)) {
+						snprintf(buf, sizeof buf, "%s",
+							 conn->target_addr);
+						host = buf;
+					} else {
+						/* skip portal for the family */
+						continue;
+					}
+				}
+				SPDK_DEBUGLOG(SPDK_LOG_ISCSI,
+					      "TargetAddress=%s:%s,%d\n",
+					      host, p->port, pg->tag);
+				len = snprintf((char *) data + total,
+					       alloc_len - total,
+					       "TargetAddress=%s:%s,%d",
+					       host, p->port, pg->tag);
+				total += len + 1;
+			}
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	return total;
+}
+
+struct spdk_iscsi_tgt_node *
+spdk_iscsi_find_tgt_node(const char *target_name)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	if (target_name == NULL) {
+		return NULL;
+	}
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		if (strcasecmp(target_name, target->name) == 0) {
+			return target;
+		}
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "can't find target %s\n", target_name);
+	return NULL;
+}
+
+static int
+spdk_iscsi_tgt_node_register(struct spdk_iscsi_tgt_node *target)
+{
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+
+	if (spdk_iscsi_find_tgt_node(target->name) != NULL) {
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+		return -EEXIST;
+	}
+
+	TAILQ_INSERT_TAIL(&g_spdk_iscsi.target_head, target, tailq);
+
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return 0;
+}
+
+static int
+spdk_iscsi_tgt_node_unregister(struct spdk_iscsi_tgt_node *target)
+{
+	struct spdk_iscsi_tgt_node *t;
+
+	TAILQ_FOREACH(t, &g_spdk_iscsi.target_head, tailq) {
+		if (t == target) {
+			TAILQ_REMOVE(&g_spdk_iscsi.target_head, t, tailq);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static struct spdk_iscsi_ig_map *
+spdk_iscsi_pg_map_find_ig_map(struct spdk_iscsi_pg_map *pg_map,
+			      struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_ig_map *ig_map;
+
+	TAILQ_FOREACH(ig_map, &pg_map->ig_map_head, tailq) {
+		if (ig_map->ig == ig) {
+			return ig_map;
+		}
+	}
+
+	return NULL;
+}
+
+static struct spdk_iscsi_ig_map *
+spdk_iscsi_pg_map_add_ig_map(struct spdk_iscsi_pg_map *pg_map,
+			     struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_ig_map *ig_map;
+
+	if (spdk_iscsi_pg_map_find_ig_map(pg_map, ig) != NULL) {
+		return NULL;
+	}
+
+	ig_map = malloc(sizeof(*ig_map));
+	if (ig_map == NULL) {
+		return NULL;
+	}
+
+	ig_map->ig = ig;
+	ig->ref++;
+	pg_map->num_ig_maps++;
+	TAILQ_INSERT_TAIL(&pg_map->ig_map_head, ig_map, tailq);
+
+	return ig_map;
+}
+
+static void
+_spdk_iscsi_pg_map_delete_ig_map(struct spdk_iscsi_pg_map *pg_map,
+				 struct spdk_iscsi_ig_map *ig_map)
+{
+	TAILQ_REMOVE(&pg_map->ig_map_head, ig_map, tailq);
+	pg_map->num_ig_maps--;
+	ig_map->ig->ref--;
+	free(ig_map);
+}
+
+static int
+spdk_iscsi_pg_map_delete_ig_map(struct spdk_iscsi_pg_map *pg_map,
+				struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_ig_map *ig_map;
+
+	ig_map = spdk_iscsi_pg_map_find_ig_map(pg_map, ig);
+	if (ig_map == NULL) {
+		return -ENOENT;
+	}
+
+	_spdk_iscsi_pg_map_delete_ig_map(pg_map, ig_map);
+	return 0;
+}
+
+static void
+spdk_iscsi_pg_map_delete_all_ig_maps(struct spdk_iscsi_pg_map *pg_map)
+{
+	struct spdk_iscsi_ig_map *ig_map, *tmp;
+
+	TAILQ_FOREACH_SAFE(ig_map, &pg_map->ig_map_head, tailq, tmp) {
+		_spdk_iscsi_pg_map_delete_ig_map(pg_map, ig_map);
+	}
+}
+
+static struct spdk_iscsi_pg_map *
+spdk_iscsi_tgt_node_find_pg_map(struct spdk_iscsi_tgt_node *target,
+				struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_pg_map *pg_map;
+
+	TAILQ_FOREACH(pg_map, &target->pg_map_head, tailq) {
+		if (pg_map->pg == pg) {
+			return pg_map;
+		}
+	}
+
+	return NULL;
+}
+
+static struct spdk_iscsi_pg_map *
+spdk_iscsi_tgt_node_add_pg_map(struct spdk_iscsi_tgt_node *target,
+			       struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_pg_map *pg_map;
+	char port_name[MAX_TMPBUF];
+	int rc;
+
+	if (spdk_iscsi_tgt_node_find_pg_map(target, pg) != NULL) {
+		return NULL;
+	}
+
+	if (target->num_pg_maps >= SPDK_SCSI_DEV_MAX_PORTS) {
+		SPDK_ERRLOG("Number of PG maps is more than allowed (max=%d)\n",
+			    SPDK_SCSI_DEV_MAX_PORTS);
+		return NULL;
+	}
+
+	pg_map = malloc(sizeof(*pg_map));
+	if (pg_map == NULL) {
+		return NULL;
+	}
+
+	snprintf(port_name, sizeof(port_name), "%s,t,0x%4.4x",
+		 spdk_scsi_dev_get_name(target->dev), pg->tag);
+	rc = spdk_scsi_dev_add_port(target->dev, pg->tag, port_name);
+	if (rc != 0) {
+		free(pg_map);
+		return NULL;
+	}
+
+	TAILQ_INIT(&pg_map->ig_map_head);
+	pg_map->num_ig_maps = 0;
+	pg->ref++;
+	pg_map->pg = pg;
+	target->num_pg_maps++;
+	TAILQ_INSERT_TAIL(&target->pg_map_head, pg_map, tailq);
+
+	return pg_map;
+}
+
+static void
+_spdk_iscsi_tgt_node_delete_pg_map(struct spdk_iscsi_tgt_node *target,
+				   struct spdk_iscsi_pg_map *pg_map)
+{
+	TAILQ_REMOVE(&target->pg_map_head, pg_map, tailq);
+	target->num_pg_maps--;
+	pg_map->pg->ref--;
+
+	spdk_scsi_dev_delete_port(target->dev, pg_map->pg->tag);
+
+	free(pg_map);
+}
+
+static int
+spdk_iscsi_tgt_node_delete_pg_map(struct spdk_iscsi_tgt_node *target,
+				  struct spdk_iscsi_portal_grp *pg)
+{
+	struct spdk_iscsi_pg_map *pg_map;
+
+	pg_map = spdk_iscsi_tgt_node_find_pg_map(target, pg);
+	if (pg_map == NULL) {
+		return -ENOENT;
+	}
+
+	if (pg_map->num_ig_maps > 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "delete %d ig_maps forcefully\n",
+			      pg_map->num_ig_maps);
+	}
+
+	spdk_iscsi_pg_map_delete_all_ig_maps(pg_map);
+	_spdk_iscsi_tgt_node_delete_pg_map(target, pg_map);
+	return 0;
+}
+
+static void
+spdk_iscsi_tgt_node_delete_ig_maps(struct spdk_iscsi_tgt_node *target,
+				   struct spdk_iscsi_init_grp *ig)
+{
+	struct spdk_iscsi_pg_map *pg_map, *tmp;
+
+	TAILQ_FOREACH_SAFE(pg_map, &target->pg_map_head, tailq, tmp) {
+		spdk_iscsi_pg_map_delete_ig_map(pg_map, ig);
+		if (pg_map->num_ig_maps == 0) {
+			_spdk_iscsi_tgt_node_delete_pg_map(target, pg_map);
+		}
+	}
+}
+
+static void
+spdk_iscsi_tgt_node_delete_all_pg_maps(struct spdk_iscsi_tgt_node *target)
+{
+	struct spdk_iscsi_pg_map *pg_map, *tmp;
+
+	TAILQ_FOREACH_SAFE(pg_map, &target->pg_map_head, tailq, tmp) {
+		spdk_iscsi_pg_map_delete_all_ig_maps(pg_map);
+		_spdk_iscsi_tgt_node_delete_pg_map(target, pg_map);
+	}
+}
+
+static void
+spdk_iscsi_tgt_node_destruct(struct spdk_iscsi_tgt_node *target)
+{
+	if (target == NULL) {
+		return;
+	}
+
+	free(target->name);
+	free(target->alias);
+	spdk_iscsi_tgt_node_delete_all_pg_maps(target);
+	spdk_scsi_dev_destruct(target->dev);
+
+	pthread_mutex_destroy(&target->mutex);
+	free(target);
+}
+
+static int
+spdk_iscsi_tgt_node_delete_pg_ig_map(struct spdk_iscsi_tgt_node *target,
+				     int pg_tag, int ig_tag)
+{
+	struct spdk_iscsi_portal_grp	*pg;
+	struct spdk_iscsi_init_grp	*ig;
+	struct spdk_iscsi_pg_map	*pg_map;
+	struct spdk_iscsi_ig_map	*ig_map;
+
+	pg = spdk_iscsi_portal_grp_find_by_tag(pg_tag);
+	if (pg == NULL) {
+		SPDK_ERRLOG("%s: PortalGroup%d not found\n", target->name, pg_tag);
+		return -ENOENT;
+	}
+	ig = spdk_iscsi_init_grp_find_by_tag(ig_tag);
+	if (ig == NULL) {
+		SPDK_ERRLOG("%s: InitiatorGroup%d not found\n", target->name, ig_tag);
+		return -ENOENT;
+	}
+
+	pg_map = spdk_iscsi_tgt_node_find_pg_map(target, pg);
+	if (pg_map == NULL) {
+		SPDK_ERRLOG("%s: PortalGroup%d is not mapped\n", target->name, pg_tag);
+		return -ENOENT;
+	}
+	ig_map = spdk_iscsi_pg_map_find_ig_map(pg_map, ig);
+	if (ig_map == NULL) {
+		SPDK_ERRLOG("%s: InitiatorGroup%d is not mapped\n", target->name, pg_tag);
+		return -ENOENT;
+	}
+
+	_spdk_iscsi_pg_map_delete_ig_map(pg_map, ig_map);
+	if (pg_map->num_ig_maps == 0) {
+		_spdk_iscsi_tgt_node_delete_pg_map(target, pg_map);
+	}
+
+	return 0;
+}
+
+static int
+spdk_iscsi_tgt_node_add_pg_ig_map(struct spdk_iscsi_tgt_node *target,
+				  int pg_tag, int ig_tag)
+{
+	struct spdk_iscsi_portal_grp	*pg;
+	struct spdk_iscsi_pg_map	*pg_map;
+	struct spdk_iscsi_init_grp	*ig;
+	struct spdk_iscsi_ig_map	*ig_map;
+	bool				new_pg_map = false;
+
+	pg = spdk_iscsi_portal_grp_find_by_tag(pg_tag);
+	if (pg == NULL) {
+		SPDK_ERRLOG("%s: PortalGroup%d not found\n", target->name, pg_tag);
+		return -ENOENT;
+	}
+	ig = spdk_iscsi_init_grp_find_by_tag(ig_tag);
+	if (ig == NULL) {
+		SPDK_ERRLOG("%s: InitiatorGroup%d not found\n", target->name, ig_tag);
+		return -ENOENT;
+	}
+
+	/* get existing pg_map or create new pg_map and add it to target */
+	pg_map = spdk_iscsi_tgt_node_find_pg_map(target, pg);
+	if (pg_map == NULL) {
+		pg_map = spdk_iscsi_tgt_node_add_pg_map(target, pg);
+		if (pg_map == NULL) {
+			goto failed;
+		}
+		new_pg_map = true;
+	}
+
+	/* create new ig_map and add it to pg_map */
+	ig_map = spdk_iscsi_pg_map_add_ig_map(pg_map, ig);
+	if (ig_map == NULL) {
+		goto failed;
+	}
+
+	return 0;
+
+failed:
+	if (new_pg_map) {
+		_spdk_iscsi_tgt_node_delete_pg_map(target, pg_map);
+	}
+
+	return -1;
+}
+
+int
+spdk_iscsi_tgt_node_add_pg_ig_maps(struct spdk_iscsi_tgt_node *target,
+				   int *pg_tag_list, int *ig_tag_list, uint16_t num_maps)
+{
+	uint16_t i;
+	int rc;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	for (i = 0; i < num_maps; i++) {
+		rc = spdk_iscsi_tgt_node_add_pg_ig_map(target, pg_tag_list[i],
+						       ig_tag_list[i]);
+		if (rc != 0) {
+			SPDK_ERRLOG("could not add map to target\n");
+			goto invalid;
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return 0;
+
+invalid:
+	for (; i > 0; --i) {
+		spdk_iscsi_tgt_node_delete_pg_ig_map(target, pg_tag_list[i - 1],
+						     ig_tag_list[i - 1]);
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return -1;
+}
+
+int
+spdk_iscsi_tgt_node_delete_pg_ig_maps(struct spdk_iscsi_tgt_node *target,
+				      int *pg_tag_list, int *ig_tag_list, uint16_t num_maps)
+{
+	uint16_t i;
+	int rc;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	for (i = 0; i < num_maps; i++) {
+		rc = spdk_iscsi_tgt_node_delete_pg_ig_map(target, pg_tag_list[i],
+				ig_tag_list[i]);
+		if (rc != 0) {
+			SPDK_ERRLOG("could not delete map from target\n");
+			goto invalid;
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return 0;
+
+invalid:
+	for (; i > 0; --i) {
+		rc = spdk_iscsi_tgt_node_add_pg_ig_map(target, pg_tag_list[i - 1],
+						       ig_tag_list[i - 1]);
+		if (rc != 0) {
+			spdk_iscsi_tgt_node_delete_all_pg_maps(target);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+	return -1;
+}
+
+static int
+spdk_check_iscsi_name(const char *name)
+{
+	const unsigned char *up = (const unsigned char *) name;
+	size_t n;
+
+	/* valid iSCSI name? */
+	for (n = 0; up[n] != 0; n++) {
+		if (up[n] > 0x00U && up[n] <= 0x2cU) {
+			return -1;
+		}
+		if (up[n] == 0x2fU) {
+			return -1;
+		}
+		if (up[n] >= 0x3bU && up[n] <= 0x40U) {
+			return -1;
+		}
+		if (up[n] >= 0x5bU && up[n] <= 0x60U) {
+			return -1;
+		}
+		if (up[n] >= 0x7bU && up[n] <= 0x7fU) {
+			return -1;
+		}
+		if (isspace(up[n])) {
+			return -1;
+		}
+	}
+	/* valid format? */
+	if (strncasecmp(name, "iqn.", 4) == 0) {
+		/* iqn.YYYY-MM.reversed.domain.name */
+		if (!isdigit(up[4]) || !isdigit(up[5]) || !isdigit(up[6])
+		    || !isdigit(up[7]) || up[8] != '-' || !isdigit(up[9])
+		    || !isdigit(up[10]) || up[11] != '.') {
+			SPDK_ERRLOG("invalid iqn format. "
+				    "expect \"iqn.YYYY-MM.reversed.domain.name\"\n");
+			return -1;
+		}
+	} else if (strncasecmp(name, "eui.", 4) == 0) {
+		/* EUI-64 -> 16bytes */
+		/* XXX */
+	} else if (strncasecmp(name, "naa.", 4) == 0) {
+		/* 64bit -> 16bytes, 128bit -> 32bytes */
+		/* XXX */
+	}
+	/* OK */
+	return 0;
+}
+
+bool
+spdk_iscsi_check_chap_params(bool disable, bool require, bool mutual, int group)
+{
+	if (group < 0) {
+		SPDK_ERRLOG("Invalid auth group ID (%d)\n", group);
+		return false;
+	}
+	if ((!disable && !require && !mutual) ||	/* Auto */
+	    (disable && !require && !mutual) ||	/* None */
+	    (!disable && require && !mutual) ||	/* CHAP */
+	    (!disable && require && mutual)) {	/* CHAP Mutual */
+		return true;
+	}
+	SPDK_ERRLOG("Invalid combination of CHAP params (d=%d,r=%d,m=%d)\n",
+		    disable, require, mutual);
+	return false;
+}
+
+_spdk_iscsi_tgt_node *
+spdk_iscsi_tgt_node_construct(int target_index,
+			      const char *name, const char *alias,
+			      int *pg_tag_list, int *ig_tag_list, uint16_t num_maps,
+			      const char *bdev_name_list[], int *lun_id_list, int num_luns,
+			      int queue_depth,
+			      bool disable_chap, bool require_chap, bool mutual_chap, int chap_group,
+			      bool header_digest, bool data_digest)
+{
+	char				fullname[MAX_TMPBUF];
+	struct spdk_iscsi_tgt_node	*target;
+	int				rc;
+
+	if (!spdk_iscsi_check_chap_params(disable_chap, require_chap,
+					  mutual_chap, chap_group)) {
+		return NULL;
+	}
+
+	if (num_maps == 0) {
+		SPDK_ERRLOG("num_maps = 0\n");
+		return NULL;
+	}
+
+	if (name == NULL) {
+		SPDK_ERRLOG("TargetName not found\n");
+		return NULL;
+	}
+
+	if (strncasecmp(name, "iqn.", 4) != 0
+	    && strncasecmp(name, "eui.", 4) != 0
+	    && strncasecmp(name, "naa.", 4) != 0) {
+		snprintf(fullname, sizeof(fullname), "%s:%s", g_spdk_iscsi.nodebase, name);
+	} else {
+		snprintf(fullname, sizeof(fullname), "%s", name);
+	}
+
+	if (spdk_check_iscsi_name(fullname) != 0) {
+		SPDK_ERRLOG("TargetName %s contains an invalid character or format.\n",
+			    name);
+		return NULL;
+	}
+
+	target = malloc(sizeof(*target));
+	if (!target) {
+		SPDK_ERRLOG("could not allocate target\n");
+		return NULL;
+	}
+
+	memset(target, 0, sizeof(*target));
+
+	rc = pthread_mutex_init(&target->mutex, NULL);
+	if (rc != 0) {
+		SPDK_ERRLOG("tgt_node%d: mutex_init() failed\n", target->num);
+		spdk_iscsi_tgt_node_destruct(target);
+		return NULL;
+	}
+
+	target->num = target_index;
+
+	target->name = strdup(fullname);
+	if (!target->name) {
+		SPDK_ERRLOG("Could not allocate TargetName\n");
+		spdk_iscsi_tgt_node_destruct(target);
+		return NULL;
+	}
+
+	if (alias == NULL) {
+		target->alias = NULL;
+	} else {
+		target->alias = strdup(alias);
+		if (!target->alias) {
+			SPDK_ERRLOG("Could not allocate TargetAlias\n");
+			spdk_iscsi_tgt_node_destruct(target);
+			return NULL;
+		}
+	}
+
+	target->dev = spdk_scsi_dev_construct(fullname, bdev_name_list, lun_id_list, num_luns,
+					      SPDK_SPC_PROTOCOL_IDENTIFIER_ISCSI, NULL, NULL);
+	if (!target->dev) {
+		SPDK_ERRLOG("Could not construct SCSI device\n");
+		spdk_iscsi_tgt_node_destruct(target);
+		return NULL;
+	}
+
+	TAILQ_INIT(&target->pg_map_head);
+	rc = spdk_iscsi_tgt_node_add_pg_ig_maps(target, pg_tag_list, ig_tag_list, num_maps);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not add map to target\n");
+		spdk_iscsi_tgt_node_destruct(target);
+		return NULL;
+	}
+
+	target->disable_chap = disable_chap;
+	target->require_chap = require_chap;
+	target->mutual_chap = mutual_chap;
+	target->chap_group = chap_group;
+	target->header_digest = header_digest;
+	target->data_digest = data_digest;
+
+	if (queue_depth > 0 && ((uint32_t)queue_depth <= g_spdk_iscsi.MaxQueueDepth)) {
+		target->queue_depth = queue_depth;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "QueueDepth %d is invalid and %d is used instead.\n",
+			      queue_depth, g_spdk_iscsi.MaxQueueDepth);
+		target->queue_depth = g_spdk_iscsi.MaxQueueDepth;
+	}
+
+	rc = spdk_iscsi_tgt_node_register(target);
+	if (rc != 0) {
+		SPDK_ERRLOG("register target is failed\n");
+		spdk_iscsi_tgt_node_destruct(target);
+		return NULL;
+	}
+
+	return target;
+}
+
+static int
+spdk_iscsi_parse_tgt_node(struct spdk_conf_section *sp)
+{
+	char buf[MAX_TMPBUF];
+	struct spdk_iscsi_tgt_node *target;
+	int pg_tag_list[MAX_TARGET_MAP], ig_tag_list[MAX_TARGET_MAP];
+	int num_target_maps;
+	const char *alias, *pg_tag, *ig_tag;
+	const char *ag_tag;
+	const char *val, *name;
+	int target_num, chap_group, pg_tag_i, ig_tag_i;
+	bool header_digest, data_digest;
+	bool disable_chap, require_chap, mutual_chap;
+	int i;
+	int lun_id_list[SPDK_SCSI_DEV_MAX_LUN];
+	const char *bdev_name_list[SPDK_SCSI_DEV_MAX_LUN];
+	int num_luns, queue_depth;
+
+	target_num = spdk_conf_section_get_num(sp);
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "add unit %d\n", target_num);
+
+	data_digest = false;
+	header_digest = false;
+
+	name = spdk_conf_section_get_val(sp, "TargetName");
+
+	if (name == NULL) {
+		SPDK_ERRLOG("tgt_node%d: TargetName not found\n", target_num);
+		return -1;
+	}
+
+	alias = spdk_conf_section_get_val(sp, "TargetAlias");
+
+	/* Setup initiator and portal group mapping */
+	val = spdk_conf_section_get_val(sp, "Mapping");
+	if (val == NULL) {
+		/* no map */
+		SPDK_ERRLOG("tgt_node%d: no Mapping\n", target_num);
+		return -1;
+	}
+
+	for (i = 0; i < MAX_TARGET_MAP; i++) {
+		val = spdk_conf_section_get_nmval(sp, "Mapping", i, 0);
+		if (val == NULL) {
+			break;
+		}
+		pg_tag = spdk_conf_section_get_nmval(sp, "Mapping", i, 0);
+		ig_tag = spdk_conf_section_get_nmval(sp, "Mapping", i, 1);
+		if (pg_tag == NULL || ig_tag == NULL) {
+			SPDK_ERRLOG("tgt_node%d: mapping error\n", target_num);
+			return -1;
+		}
+		if (strncasecmp(pg_tag, "PortalGroup",
+				strlen("PortalGroup")) != 0
+		    || sscanf(pg_tag, "%*[^0-9]%d", &pg_tag_i) != 1) {
+			SPDK_ERRLOG("tgt_node%d: mapping portal error\n", target_num);
+			return -1;
+		}
+		if (strncasecmp(ig_tag, "InitiatorGroup",
+				strlen("InitiatorGroup")) != 0
+		    || sscanf(ig_tag, "%*[^0-9]%d", &ig_tag_i) != 1) {
+			SPDK_ERRLOG("tgt_node%d: mapping initiator error\n", target_num);
+			return -1;
+		}
+		if (pg_tag_i < 1 || ig_tag_i < 1) {
+			SPDK_ERRLOG("tgt_node%d: invalid group tag\n", target_num);
+			return -1;
+		}
+		pg_tag_list[i] = pg_tag_i;
+		ig_tag_list[i] = ig_tag_i;
+	}
+
+	num_target_maps = i;
+
+	/* Setup AuthMethod */
+	val = spdk_conf_section_get_val(sp, "AuthMethod");
+	disable_chap = false;
+	require_chap = false;
+	mutual_chap = false;
+	if (val != NULL) {
+		for (i = 0; ; i++) {
+			val = spdk_conf_section_get_nmval(sp, "AuthMethod", 0, i);
+			if (val == NULL) {
+				break;
+			}
+			if (strcasecmp(val, "CHAP") == 0) {
+				require_chap = true;
+			} else if (strcasecmp(val, "Mutual") == 0) {
+				mutual_chap = true;
+			} else if (strcasecmp(val, "Auto") == 0) {
+				disable_chap = false;
+				require_chap = false;
+				mutual_chap = false;
+			} else if (strcasecmp(val, "None") == 0) {
+				disable_chap = true;
+				require_chap = false;
+				mutual_chap = false;
+			} else {
+				SPDK_ERRLOG("tgt_node%d: unknown auth\n", target_num);
+				return -1;
+			}
+		}
+		if (mutual_chap && !require_chap) {
+			SPDK_ERRLOG("tgt_node%d: Mutual but not CHAP\n", target_num);
+			return -1;
+		}
+	}
+	if (disable_chap) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthMethod None\n");
+	} else if (!require_chap) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthMethod Auto\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthMethod CHAP %s\n",
+			      mutual_chap ? "Mutual" : "");
+	}
+
+	val = spdk_conf_section_get_val(sp, "AuthGroup");
+	if (val == NULL) {
+		chap_group = 0;
+	} else {
+		ag_tag = val;
+		if (strcasecmp(ag_tag, "None") == 0) {
+			chap_group = 0;
+		} else {
+			if (strncasecmp(ag_tag, "AuthGroup",
+					strlen("AuthGroup")) != 0
+			    || sscanf(ag_tag, "%*[^0-9]%d", &chap_group) != 1) {
+				SPDK_ERRLOG("tgt_node%d: auth group error\n", target_num);
+				return -1;
+			}
+			if (chap_group == 0) {
+				SPDK_ERRLOG("tgt_node%d: invalid auth group 0\n", target_num);
+				return -1;
+			}
+		}
+	}
+	if (chap_group == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthGroup None\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "AuthGroup AuthGroup%d\n", chap_group);
+	}
+
+	val = spdk_conf_section_get_val(sp, "UseDigest");
+	if (val != NULL) {
+		for (i = 0; ; i++) {
+			val = spdk_conf_section_get_nmval(sp, "UseDigest", 0, i);
+			if (val == NULL) {
+				break;
+			}
+			if (strcasecmp(val, "Header") == 0) {
+				header_digest = true;
+			} else if (strcasecmp(val, "Data") == 0) {
+				data_digest = true;
+			} else if (strcasecmp(val, "Auto") == 0) {
+				header_digest = false;
+				data_digest = false;
+			} else {
+				SPDK_ERRLOG("tgt_node%d: unknown digest\n", target_num);
+				return -1;
+			}
+		}
+	}
+	if (!header_digest && !data_digest) {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "UseDigest Auto\n");
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "UseDigest %s %s\n",
+			      header_digest ? "Header" : "",
+			      data_digest ? "Data" : "");
+	}
+
+	val = spdk_conf_section_get_val(sp, "QueueDepth");
+	if (val == NULL) {
+		queue_depth = g_spdk_iscsi.MaxQueueDepth;
+	} else {
+		queue_depth = (int) strtol(val, NULL, 10);
+	}
+
+	num_luns = 0;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		snprintf(buf, sizeof(buf), "LUN%d", i);
+		val = spdk_conf_section_get_val(sp, buf);
+		if (val == NULL) {
+			continue;
+		}
+
+		bdev_name_list[num_luns] = val;
+		lun_id_list[num_luns] = i;
+		num_luns++;
+	}
+
+	if (num_luns == 0) {
+		SPDK_ERRLOG("tgt_node%d: No LUN specified for target %s.\n", target_num, name);
+		return -1;
+	}
+
+	target = spdk_iscsi_tgt_node_construct(target_num, name, alias,
+					       pg_tag_list, ig_tag_list, num_target_maps,
+					       bdev_name_list, lun_id_list, num_luns, queue_depth,
+					       disable_chap, require_chap, mutual_chap, chap_group,
+					       header_digest, data_digest);
+
+	if (target == NULL) {
+		SPDK_ERRLOG("tgt_node%d: add_iscsi_target_node error\n", target_num);
+		return -1;
+	}
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		struct spdk_scsi_lun *lun = spdk_scsi_dev_get_lun(target->dev, i);
+
+		if (lun) {
+			SPDK_INFOLOG(SPDK_LOG_ISCSI, "device %d: LUN%d %s\n",
+				     spdk_scsi_dev_get_id(target->dev),
+				     spdk_scsi_lun_get_id(lun),
+				     spdk_scsi_lun_get_bdev_name(lun));
+		}
+	}
+
+	return 0;
+}
+
+int spdk_iscsi_parse_tgt_nodes(void)
+{
+	struct spdk_conf_section *sp;
+	int rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "spdk_iscsi_parse_tgt_nodes\n");
+
+	sp = spdk_conf_first_section(NULL);
+	while (sp != NULL) {
+		if (spdk_conf_section_match_prefix(sp, "TargetNode")) {
+			int tag = spdk_conf_section_get_num(sp);
+
+			if (tag > SPDK_TN_TAG_MAX) {
+				SPDK_ERRLOG("tag %d is invalid\n", tag);
+				return -1;
+			}
+			rc = spdk_iscsi_parse_tgt_node(sp);
+			if (rc < 0) {
+				SPDK_ERRLOG("spdk_iscsi_parse_tgt_node() failed\n");
+				return -1;
+			}
+		}
+		sp = spdk_conf_next_section(sp);
+	}
+	return 0;
+}
+
+void
+spdk_iscsi_shutdown_tgt_nodes(void)
+{
+	struct spdk_iscsi_tgt_node *target, *tmp;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH_SAFE(target, &g_spdk_iscsi.target_head, tailq, tmp) {
+		TAILQ_REMOVE(&g_spdk_iscsi.target_head, target, tailq);
+		spdk_iscsi_tgt_node_destruct(target);
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+}
+
+int
+spdk_iscsi_shutdown_tgt_node_by_name(const char *target_name)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	target = spdk_iscsi_find_tgt_node(target_name);
+	if (target != NULL) {
+		spdk_iscsi_tgt_node_unregister(target);
+		spdk_iscsi_tgt_node_destruct(target);
+		pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+		return 0;
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+
+	return -ENOENT;
+}
+
+int
+spdk_iscsi_tgt_node_cleanup_luns(struct spdk_iscsi_conn *conn,
+				 struct spdk_iscsi_tgt_node *target)
+{
+	int i;
+	struct spdk_iscsi_task *task;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		struct spdk_scsi_lun *lun = spdk_scsi_dev_get_lun(target->dev, i);
+
+		if (!lun) {
+			continue;
+		}
+
+		/* we create a fake management task per LUN to cleanup */
+		task = spdk_iscsi_task_get(conn, NULL, spdk_iscsi_task_mgmt_cpl);
+		if (!task) {
+			SPDK_ERRLOG("Unable to acquire task\n");
+			return -1;
+		}
+
+		task->scsi.target_port = conn->target_port;
+		task->scsi.initiator_port = conn->initiator_port;
+		task->scsi.lun = lun;
+
+		spdk_scsi_dev_queue_mgmt_task(target->dev, &task->scsi, SPDK_SCSI_TASK_FUNC_LUN_RESET);
+	}
+
+	return 0;
+}
+
+void spdk_iscsi_tgt_node_delete_map(struct spdk_iscsi_portal_grp *portal_group,
+				    struct spdk_iscsi_init_grp *initiator_group)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	pthread_mutex_lock(&g_spdk_iscsi.mutex);
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		if (portal_group) {
+			spdk_iscsi_tgt_node_delete_pg_map(target, portal_group);
+		}
+		if (initiator_group) {
+			spdk_iscsi_tgt_node_delete_ig_maps(target, initiator_group);
+		}
+	}
+	pthread_mutex_unlock(&g_spdk_iscsi.mutex);
+}
+
+int
+spdk_iscsi_tgt_node_add_lun(struct spdk_iscsi_tgt_node *target,
+			    const char *bdev_name, int lun_id)
+{
+	struct spdk_scsi_dev *dev;
+	int rc;
+
+	if (target->num_active_conns > 0) {
+		SPDK_ERRLOG("Target has active connections (count=%d)\n",
+			    target->num_active_conns);
+		return -1;
+	}
+
+	if (lun_id < -1 || lun_id >= SPDK_SCSI_DEV_MAX_LUN) {
+		SPDK_ERRLOG("Specified LUN ID (%d) is invalid\n", lun_id);
+		return -1;
+	}
+
+	dev = target->dev;
+	if (dev == NULL) {
+		SPDK_ERRLOG("SCSI device is not found\n");
+		return -1;
+	}
+
+	rc = spdk_scsi_dev_add_lun(dev, bdev_name, lun_id, NULL, NULL);
+	if (rc != 0) {
+		SPDK_ERRLOG("spdk_scsi_dev_add_lun failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+spdk_iscsi_tgt_node_set_chap_params(struct spdk_iscsi_tgt_node *target,
+				    bool disable_chap, bool require_chap,
+				    bool mutual_chap, int32_t chap_group)
+{
+	if (!spdk_iscsi_check_chap_params(disable_chap, require_chap,
+					  mutual_chap, chap_group)) {
+		return -EINVAL;
+	}
+
+	pthread_mutex_lock(&target->mutex);
+	target->disable_chap = disable_chap;
+	target->require_chap = require_chap;
+	target->mutual_chap = mutual_chap;
+	target->chap_group = chap_group;
+	pthread_mutex_unlock(&target->mutex);
+
+	return 0;
+}
+
+static const char *target_nodes_section = \
+		"\n"
+		"# Users should change the TargetNode section(s) below to match the\n"
+		"#  desired iSCSI target node configuration.\n"
+		"# TargetName, Mapping, LUN0 are minimum required\n";
+
+#define TARGET_NODE_TMPL \
+"[TargetNode%d]\n" \
+"  Comment \"Target%d\"\n" \
+"  TargetName %s\n" \
+"  TargetAlias \"%s\"\n"
+
+#define TARGET_NODE_PGIG_MAPPING_TMPL \
+"  Mapping PortalGroup%d InitiatorGroup%d\n"
+
+#define TARGET_NODE_AUTH_TMPL \
+"  AuthMethod %s\n" \
+"  AuthGroup %s\n" \
+"  UseDigest %s\n"
+
+#define TARGET_NODE_QD_TMPL \
+"  QueueDepth %d\n\n"
+
+#define TARGET_NODE_LUN_TMPL \
+"  LUN%d %s\n"
+
+void
+spdk_iscsi_tgt_nodes_config_text(FILE *fp)
+{
+	int l = 0;
+	struct spdk_scsi_dev *dev = NULL;
+	struct spdk_iscsi_tgt_node *target = NULL;
+	struct spdk_iscsi_pg_map *pg_map;
+	struct spdk_iscsi_ig_map *ig_map;
+
+	/* Create target nodes section */
+	fprintf(fp, "%s", target_nodes_section);
+
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		int idx;
+		const char *authmethod = "None";
+		char authgroup[32] = "None";
+		const char *usedigest = "Auto";
+
+		dev = target->dev;
+		if (NULL == dev) { continue; }
+
+		idx = target->num;
+		fprintf(fp, TARGET_NODE_TMPL, idx, idx, target->name, spdk_scsi_dev_get_name(dev));
+
+		TAILQ_FOREACH(pg_map, &target->pg_map_head, tailq) {
+			TAILQ_FOREACH(ig_map, &pg_map->ig_map_head, tailq) {
+				fprintf(fp, TARGET_NODE_PGIG_MAPPING_TMPL,
+					pg_map->pg->tag,
+					ig_map->ig->tag);
+			}
+		}
+
+		if (target->disable_chap) {
+			authmethod = "None";
+		} else if (!target->require_chap) {
+			authmethod = "Auto";
+		} else if (target->mutual_chap) {
+			authmethod = "CHAP Mutual";
+		} else {
+			authmethod = "CHAP";
+		}
+
+		if (target->chap_group > 0) {
+			snprintf(authgroup, sizeof(authgroup), "AuthGroup%d", target->chap_group);
+		}
+
+		if (target->header_digest) {
+			usedigest = "Header";
+		} else if (target->data_digest) {
+			usedigest = "Data";
+		}
+
+		fprintf(fp, TARGET_NODE_AUTH_TMPL,
+			authmethod, authgroup, usedigest);
+
+		for (l = 0; l < SPDK_SCSI_DEV_MAX_LUN; l++) {
+			struct spdk_scsi_lun *lun = spdk_scsi_dev_get_lun(dev, l);
+
+			if (!lun) {
+				continue;
+			}
+
+			fprintf(fp, TARGET_NODE_LUN_TMPL,
+				spdk_scsi_lun_get_id(lun),
+				spdk_scsi_lun_get_bdev_name(lun));
+		}
+
+		fprintf(fp, TARGET_NODE_QD_TMPL,
+			target->queue_depth);
+	}
+}
+
+static void
+spdk_iscsi_tgt_node_info_json(struct spdk_iscsi_tgt_node *target,
+			      struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_pg_map *pg_map;
+	struct spdk_iscsi_ig_map *ig_map;
+	int i;
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "name", target->name);
+
+	if (target->alias) {
+		spdk_json_write_named_string(w, "alias_name", target->alias);
+	}
+
+	spdk_json_write_named_array_begin(w, "pg_ig_maps");
+	TAILQ_FOREACH(pg_map, &target->pg_map_head, tailq) {
+		TAILQ_FOREACH(ig_map, &pg_map->ig_map_head, tailq) {
+			spdk_json_write_object_begin(w);
+			spdk_json_write_named_int32(w, "pg_tag", pg_map->pg->tag);
+			spdk_json_write_named_int32(w, "ig_tag", ig_map->ig->tag);
+			spdk_json_write_object_end(w);
+		}
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_named_array_begin(w, "luns");
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		struct spdk_scsi_lun *lun = spdk_scsi_dev_get_lun(target->dev, i);
+
+		if (lun) {
+			spdk_json_write_object_begin(w);
+			spdk_json_write_named_string(w, "bdev_name", spdk_scsi_lun_get_bdev_name(lun));
+			spdk_json_write_named_int32(w, "lun_id", spdk_scsi_lun_get_id(lun));
+			spdk_json_write_object_end(w);
+		}
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_json_write_named_int32(w, "queue_depth", target->queue_depth);
+
+	spdk_json_write_named_bool(w, "disable_chap", target->disable_chap);
+	spdk_json_write_named_bool(w, "require_chap", target->require_chap);
+	spdk_json_write_named_bool(w, "mutual_chap", target->mutual_chap);
+	spdk_json_write_named_int32(w, "chap_group", target->chap_group);
+
+	spdk_json_write_named_bool(w, "header_digest", target->header_digest);
+	spdk_json_write_named_bool(w, "data_digest", target->data_digest);
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_iscsi_tgt_node_config_json(struct spdk_iscsi_tgt_node *target,
+				struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "method", "construct_target_node");
+
+	spdk_json_write_name(w, "params");
+	spdk_iscsi_tgt_node_info_json(target, w);
+
+	spdk_json_write_object_end(w);
+}
+
+void
+spdk_iscsi_tgt_nodes_info_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		spdk_iscsi_tgt_node_info_json(target, w);
+	}
+}
+
+void
+spdk_iscsi_tgt_nodes_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_iscsi_tgt_node *target;
+
+	TAILQ_FOREACH(target, &g_spdk_iscsi.target_head, tailq) {
+		spdk_iscsi_tgt_node_config_json(target, w);
+	}
+}
diff --git a/src/spdk/lib/iscsi/tgt_node.h b/src/spdk/lib/iscsi/tgt_node.h
new file mode 100644
index 00000000..1d54922a
--- /dev/null
+++ b/src/spdk/lib/iscsi/tgt_node.h
@@ -0,0 +1,146 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_ISCSI_TGT_NODE_H_
+#define SPDK_ISCSI_TGT_NODE_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/scsi.h"
+
+struct spdk_iscsi_conn;
+struct spdk_iscsi_init_grp;
+struct spdk_iscsi_portal_grp;
+struct spdk_iscsi_portal;
+struct spdk_json_write_ctx;
+
+#define MAX_TARGET_MAP			256
+#define SPDK_TN_TAG_MAX			0x0000ffff
+
+struct spdk_iscsi_ig_map {
+	struct spdk_iscsi_init_grp *ig;
+	TAILQ_ENTRY(spdk_iscsi_ig_map) tailq;
+};
+
+struct spdk_iscsi_pg_map {
+	struct spdk_iscsi_portal_grp *pg;
+	int num_ig_maps;
+	TAILQ_HEAD(, spdk_iscsi_ig_map) ig_map_head;
+	TAILQ_ENTRY(spdk_iscsi_pg_map) tailq ;
+};
+
+struct spdk_iscsi_tgt_node {
+	int num;
+	char *name;
+	char *alias;
+
+	pthread_mutex_t mutex;
+
+	bool disable_chap;
+	bool require_chap;
+	bool mutual_chap;
+	int chap_group;
+	bool header_digest;
+	bool data_digest;
+	int queue_depth;
+
+	struct spdk_scsi_dev *dev;
+	/**
+	 * Counts number of active iSCSI connections associated with this
+	 *  target node.
+	 */
+	uint32_t num_active_conns;
+	int lcore;
+
+	int num_pg_maps;
+	TAILQ_HEAD(, spdk_iscsi_pg_map) pg_map_head;
+	TAILQ_ENTRY(spdk_iscsi_tgt_node) tailq;
+};
+
+int spdk_iscsi_parse_tgt_nodes(void);
+
+void spdk_iscsi_shutdown_tgt_nodes(void);
+int spdk_iscsi_shutdown_tgt_node_by_name(const char *target_name);
+int spdk_iscsi_send_tgts(struct spdk_iscsi_conn *conn, const char *iiqn,
+			 const char *iaddr, const char *tiqn, uint8_t *data, int alloc_len,
+			 int data_len);
+
+/* This typedef exists to work around an astyle 2.05 bug.
+ * Remove it when astyle is fixed.
+ */
+typedef struct spdk_iscsi_tgt_node _spdk_iscsi_tgt_node;
+
+/*
+ * bdev_name_list and lun_id_list are equal sized arrays of size num_luns.
+ * bdev_name_list refers to the names of the bdevs that will be used for the LUNs on the
+ *  new target node.
+ * lun_id_list refers to the LUN IDs that will be used for the LUNs on the target node.
+ */
+_spdk_iscsi_tgt_node *
+spdk_iscsi_tgt_node_construct(int target_index,
+			      const char *name, const char *alias,
+			      int *pg_tag_list, int *ig_tag_list, uint16_t num_maps,
+			      const char *bdev_name_list[], int *lun_id_list, int num_luns,
+			      int queue_depth,
+			      bool disable_chap, bool require_chap, bool mutual_chap, int chap_group,
+			      bool header_digest, bool data_digest);
+
+bool spdk_iscsi_check_chap_params(bool disable, bool require, bool mutual, int group);
+
+int spdk_iscsi_tgt_node_add_pg_ig_maps(struct spdk_iscsi_tgt_node *target,
+				       int *pg_tag_list, int *ig_tag_list,
+				       uint16_t num_maps);
+int spdk_iscsi_tgt_node_delete_pg_ig_maps(struct spdk_iscsi_tgt_node *target,
+		int *pg_tag_list, int *ig_tag_list,
+		uint16_t num_maps);
+
+bool spdk_iscsi_tgt_node_access(struct spdk_iscsi_conn *conn,
+				struct spdk_iscsi_tgt_node *target, const char *iqn,
+				const char *addr);
+struct spdk_iscsi_tgt_node *spdk_iscsi_find_tgt_node(const char *target_name);
+int spdk_iscsi_tgt_node_reset(struct spdk_iscsi_tgt_node *target,
+			      uint64_t lun);
+int spdk_iscsi_tgt_node_cleanup_luns(struct spdk_iscsi_conn *conn,
+				     struct spdk_iscsi_tgt_node *target);
+void spdk_iscsi_tgt_node_delete_map(struct spdk_iscsi_portal_grp *portal_group,
+				    struct spdk_iscsi_init_grp *initiator_group);
+int spdk_iscsi_tgt_node_add_lun(struct spdk_iscsi_tgt_node *target,
+				const char *bdev_name, int lun_id);
+int spdk_iscsi_tgt_node_set_chap_params(struct spdk_iscsi_tgt_node *target,
+					bool disable_chap, bool require_chap,
+					bool mutual_chap, int32_t chap_group);
+void spdk_iscsi_tgt_nodes_config_text(FILE *fp);
+void spdk_iscsi_tgt_nodes_info_json(struct spdk_json_write_ctx *w);
+void spdk_iscsi_tgt_nodes_config_json(struct spdk_json_write_ctx *w);
+#endif /* SPDK_ISCSI_TGT_NODE_H_ */
diff --git a/src/spdk/lib/json/Makefile b/src/spdk/lib/json/Makefile
new file mode 100644
index 00000000..8808df9e
--- /dev/null
+++ b/src/spdk/lib/json/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = json_parse.c json_util.c json_write.c
+LIBNAME = json
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/json/json_parse.c b/src/spdk/lib/json/json_parse.c
new file mode 100644
index 00000000..8639d5ff
--- /dev/null
+++ b/src/spdk/lib/json/json_parse.c
@@ -0,0 +1,668 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/json.h"
+
+#include "spdk_internal/utf.h"
+
+#define SPDK_JSON_MAX_NESTING_DEPTH	64
+
+static int
+hex_value(uint8_t c)
+{
+#define V(x, y) [x] = y + 1
+	static const int8_t val[256] = {
+		V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
+		V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
+		V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
+		V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
+	};
+#undef V
+
+	return val[c] - 1;
+}
+
+static int
+json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+	uint8_t *str = *strp;
+	int v0, v1, v2, v3;
+	uint32_t val;
+	uint32_t surrogate_high = 0;
+	int rc;
+decode:
+	/* \uXXXX */
+	assert(buf_end > str);
+
+	if (*str++ != '\\') { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	if (*str++ != 'u') { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	if ((v3 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	if ((v2 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	if ((v1 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	if ((v0 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+	if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+	val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12);
+
+	if (surrogate_high) {
+		/* We already parsed the high surrogate, so this should be the low part. */
+		if (!utf16_valid_surrogate_low(val)) {
+			return SPDK_JSON_PARSE_INVALID;
+		}
+
+		/* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
+		val = utf16_decode_surrogate_pair(surrogate_high, val);
+	} else if (utf16_valid_surrogate_high(val)) {
+		surrogate_high = val;
+
+		/*
+		 * We parsed a \uXXXX sequence that decoded to the first half of a
+		 *  UTF-16 surrogate pair, so it must be immediately followed by another
+		 *  \uXXXX escape.
+		 *
+		 * Loop around to get the low half of the surrogate pair.
+		 */
+		if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+		goto decode;
+	} else if (utf16_valid_surrogate_low(val)) {
+		/*
+		 * We found the second half of surrogate pair without the first half;
+		 *  this is an invalid encoding.
+		 */
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	/*
+	 * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
+	 *
+	 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
+	 *  (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
+	 *  single codepoint is 4 bytes.
+	 */
+	if (out) {
+		rc = utf8_encode_unsafe(out, val);
+	} else {
+		rc = utf8_codepoint_len(val);
+	}
+	if (rc < 0) {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	*strp = str; /* update input pointer */
+	return rc; /* return number of bytes decoded */
+}
+
+static int
+json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+	static const uint8_t escapes[256] = {
+		['b'] = '\b',
+		['f'] = '\f',
+		['n'] = '\n',
+		['r'] = '\r',
+		['t'] = '\t',
+		['/'] = '/',
+		['"'] = '"',
+		['\\'] = '\\',
+	};
+	uint8_t *str = *strp;
+	uint8_t c;
+
+	assert(buf_end > str);
+	if (buf_end - str < 2) {
+		return SPDK_JSON_PARSE_INCOMPLETE;
+	}
+
+	assert(str[0] == '\\');
+
+	c = escapes[str[1]];
+	if (c) {
+		if (out) {
+			*out = c;
+		}
+		*strp += 2; /* consumed two bytes */
+		return 1; /* produced one byte */
+	}
+
+	return SPDK_JSON_PARSE_INVALID;
+}
+
+/*
+ * Decode JSON string backslash escape.
+ * \param strp pointer to pointer to first character of escape (the backslash).
+ *  *strp is also advanced to indicate how much input was consumed.
+ *
+ * \return Number of bytes appended to out
+ */
+static int
+json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+	int rc;
+
+	rc = json_decode_string_escape_twochar(strp, buf_end, out);
+	if (rc > 0) {
+		return rc;
+	}
+
+	return json_decode_string_escape_unicode(strp, buf_end, out);
+}
+
+/*
+ * Decode JSON string in place.
+ *
+ * \param str_start Pointer to the beginning of the string (the opening " character).
+ *
+ * \return Number of bytes in decoded string (beginning from start).
+ */
+static int
+json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags)
+{
+	uint8_t *str = str_start;
+	uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */
+	int rc;
+
+	if (buf_end - str_start < 2) {
+		/*
+		 * Shortest valid string (the empty string) is two bytes (""),
+		 *  so this can't possibly be valid
+		 */
+		*str_end = str;
+		return SPDK_JSON_PARSE_INCOMPLETE;
+	}
+
+	if (*str++ != '"') {
+		*str_end = str;
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	while (str < buf_end) {
+		if (str[0] == '"') {
+			/*
+			 * End of string.
+			 * Update str_end to point at next input byte and return output length.
+			 */
+			*str_end = str + 1;
+			return out - str_start - 1;
+		} else if (str[0] == '\\') {
+			rc = json_decode_string_escape(&str, buf_end,
+						       flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL);
+			assert(rc != 0);
+			if (rc < 0) {
+				*str_end = str;
+				return rc;
+			}
+			out += rc;
+		} else if (str[0] <= 0x1f) {
+			/* control characters must be escaped */
+			*str_end = str;
+			return SPDK_JSON_PARSE_INVALID;
+		} else {
+			rc = utf8_valid(str, buf_end);
+			if (rc == 0) {
+				*str_end = str;
+				return SPDK_JSON_PARSE_INCOMPLETE;
+			} else if (rc < 0) {
+				*str_end = str;
+				return SPDK_JSON_PARSE_INVALID;
+			}
+
+			if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) {
+				memmove(out, str, rc);
+			}
+			out += rc;
+			str += rc;
+		}
+	}
+
+	/* If execution gets here, we ran out of buffer. */
+	*str_end = str;
+	return SPDK_JSON_PARSE_INCOMPLETE;
+}
+
+static int
+json_valid_number(uint8_t *start, uint8_t *buf_end)
+{
+	uint8_t *p = start;
+	uint8_t c;
+
+	if (p >= buf_end) { return -1; }
+
+	c = *p++;
+	if (c >= '1' && c <= '9') { goto num_int_digits; }
+	if (c == '0') { goto num_frac_or_exp; }
+	if (c == '-') { goto num_int_first_digit; }
+	p--;
+	goto done_invalid;
+
+num_int_first_digit:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c == '0') { goto num_frac_or_exp; }
+		if (c >= '1' && c <= '9') { goto num_int_digits; }
+		p--;
+	}
+	goto done_invalid;
+
+num_int_digits:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_int_digits; }
+		if (c == '.') { goto num_frac_first_digit; }
+		if (c == 'e' || c == 'E') { goto num_exp_sign; }
+		p--;
+	}
+	goto done_valid;
+
+num_frac_or_exp:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c == '.') { goto num_frac_first_digit; }
+		if (c == 'e' || c == 'E') { goto num_exp_sign; }
+		p--;
+	}
+	goto done_valid;
+
+num_frac_first_digit:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_frac_digits; }
+		p--;
+	}
+	goto done_invalid;
+
+num_frac_digits:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_frac_digits; }
+		if (c == 'e' || c == 'E') { goto num_exp_sign; }
+		p--;
+	}
+	goto done_valid;
+
+num_exp_sign:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_exp_digits; }
+		if (c == '-' || c == '+') { goto num_exp_first_digit; }
+		p--;
+	}
+	goto done_invalid;
+
+num_exp_first_digit:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_exp_digits; }
+		p--;
+	}
+	goto done_invalid;
+
+num_exp_digits:
+	if (spdk_likely(p != buf_end)) {
+		c = *p++;
+		if (c >= '0' && c <= '9') { goto num_exp_digits; }
+		p--;
+	}
+	goto done_valid;
+
+done_valid:
+	/* Valid end state */
+	return p - start;
+
+done_invalid:
+	/* Invalid end state */
+	if (p == buf_end) {
+		/* Hit the end of the buffer - the stream is incomplete. */
+		return SPDK_JSON_PARSE_INCOMPLETE;
+	}
+
+	/* Found an invalid character in an invalid end state */
+	return SPDK_JSON_PARSE_INVALID;
+}
+
+static int
+json_valid_comment(const uint8_t *start, const uint8_t *buf_end)
+{
+	const uint8_t *p = start;
+	bool multiline;
+
+	assert(buf_end > p);
+	if (buf_end - p < 2) {
+		return SPDK_JSON_PARSE_INCOMPLETE;
+	}
+
+	if (p[0] != '/') {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+	if (p[1] == '*') {
+		multiline = true;
+	} else if (p[1] == '/') {
+		multiline = false;
+	} else {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+	p += 2;
+
+	if (multiline) {
+		while (p != buf_end - 1) {
+			if (p[0] == '*' && p[1] == '/') {
+				/* Include the terminating star and slash in the comment */
+				return p - start + 2;
+			}
+			p++;
+		}
+	} else {
+		while (p != buf_end) {
+			if (*p == '\r' || *p == '\n') {
+				/* Do not include the line terminator in the comment */
+				return p - start;
+			}
+			p++;
+		}
+	}
+
+	return SPDK_JSON_PARSE_INCOMPLETE;
+}
+
+struct json_literal {
+	enum spdk_json_val_type type;
+	uint32_t len;
+	uint8_t str[8];
+};
+
+/*
+ * JSON only defines 3 possible literals; they can be uniquely identified by bits
+ *  3 and 4 of the first character:
+ *   'f' = 0b11[00]110
+ *   'n' = 0b11[01]110
+ *   't' = 0b11[10]100
+ * These two bits can be used as an index into the g_json_literals array.
+ */
+static const struct json_literal g_json_literals[] = {
+	{SPDK_JSON_VAL_FALSE, 5, "false"},
+	{SPDK_JSON_VAL_NULL,  4, "null"},
+	{SPDK_JSON_VAL_TRUE,  4, "true"},
+	{}
+};
+
+static int
+match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len)
+{
+	assert(end >= start);
+	if ((size_t)(end - start) < len) {
+		return SPDK_JSON_PARSE_INCOMPLETE;
+	}
+
+	if (memcmp(start, literal, len) != 0) {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	return len;
+}
+
+ssize_t
+spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
+		void **end, uint32_t flags)
+{
+	uint8_t *json_end = json + size;
+	enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH];
+	size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH];
+	enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID;
+	bool trailing_comma = false;
+	size_t depth = 0; /* index into containers */
+	size_t cur_value = 0; /* index into values */
+	size_t con_start_value;
+	uint8_t *data = json;
+	uint8_t *new_data;
+	int rc = 0;
+	const struct json_literal *lit;
+	enum {
+		STATE_VALUE, /* initial state */
+		STATE_VALUE_SEPARATOR, /* value separator (comma) */
+		STATE_NAME, /* "name": value */
+		STATE_NAME_SEPARATOR, /* colon */
+		STATE_END, /* parsed the complete value, so only whitespace is valid */
+	} state = STATE_VALUE;
+
+#define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
+	if (values && cur_value < num_values) { \
+		values[cur_value].type = t; \
+		values[cur_value].start = val_start_ptr; \
+		values[cur_value].len = val_end_ptr - val_start_ptr; \
+	} \
+	cur_value++
+
+	while (data < json_end) {
+		uint8_t c = *data;
+
+		switch (c) {
+		case ' ':
+		case '\t':
+		case '\r':
+		case '\n':
+			/* Whitespace is allowed between any tokens. */
+			data++;
+			break;
+
+		case 't':
+		case 'f':
+		case 'n':
+			/* true, false, or null */
+			if (state != STATE_VALUE) { goto done_invalid; }
+			lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */
+			assert(lit->str[0] == c);
+			rc = match_literal(data, json_end, lit->str, lit->len);
+			if (rc < 0) { goto done_rc; }
+			ADD_VALUE(lit->type, data, data + rc);
+			data += rc;
+			state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+			trailing_comma = false;
+			break;
+
+		case '"':
+			if (state != STATE_VALUE && state != STATE_NAME) { goto done_invalid; }
+			rc = json_decode_string(data, json_end, &new_data, flags);
+			if (rc < 0) {
+				data = new_data;
+				goto done_rc;
+			}
+			/*
+			 * Start is data + 1 to skip initial quote.
+			 * Length is data + rc - 1 to skip both quotes.
+			 */
+			ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME,
+				  data + 1, data + rc - 1);
+			data = new_data;
+			if (state == STATE_NAME) {
+				state = STATE_NAME_SEPARATOR;
+			} else {
+				state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+			}
+			trailing_comma = false;
+			break;
+
+		case '-':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (state != STATE_VALUE) { goto done_invalid; }
+			rc = json_valid_number(data, json_end);
+			if (rc < 0) { goto done_rc; }
+			ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc);
+			data += rc;
+			state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+			trailing_comma = false;
+			break;
+
+		case '{':
+		case '[':
+			if (state != STATE_VALUE) { goto done_invalid; }
+			if (depth == SPDK_JSON_MAX_NESTING_DEPTH) {
+				rc = SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED;
+				goto done_rc;
+			}
+			if (c == '{') {
+				con_type = SPDK_JSON_VAL_OBJECT_BEGIN;
+				state = STATE_NAME;
+			} else {
+				con_type = SPDK_JSON_VAL_ARRAY_BEGIN;
+				state = STATE_VALUE;
+			}
+			con_value[depth] = cur_value;
+			containers[depth++] = con_type;
+			ADD_VALUE(con_type, data, data + 1);
+			data++;
+			trailing_comma = false;
+			break;
+
+		case '}':
+		case ']':
+			if (trailing_comma) { goto done_invalid; }
+			if (depth == 0) { goto done_invalid; }
+			con_type = containers[--depth];
+			con_start_value = con_value[depth];
+			if (values && con_start_value < num_values) {
+				values[con_start_value].len = cur_value - con_start_value - 1;
+			}
+			if (c == '}') {
+				if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) {
+					goto done_invalid;
+				}
+				if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+					goto done_invalid;
+				}
+				ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1);
+			} else {
+				if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) {
+					goto done_invalid;
+				}
+				if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) {
+					goto done_invalid;
+				}
+				ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1);
+			}
+			con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1];
+			data++;
+			state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+			trailing_comma = false;
+			break;
+
+		case ',':
+			if (state != STATE_VALUE_SEPARATOR) { goto done_invalid; }
+			data++;
+			assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN ||
+			       con_type == SPDK_JSON_VAL_OBJECT_BEGIN);
+			state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME;
+			trailing_comma = true;
+			break;
+
+		case ':':
+			if (state != STATE_NAME_SEPARATOR) { goto done_invalid; }
+			data++;
+			state = STATE_VALUE;
+			break;
+
+		case '/':
+			if (!(flags & SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS)) {
+				goto done_invalid;
+			}
+			rc = json_valid_comment(data, json_end);
+			if (rc < 0) { goto done_rc; }
+			/* Skip over comment */
+			data += rc;
+			break;
+
+		default:
+			goto done_invalid;
+		}
+
+		if (state == STATE_END) {
+			break;
+		}
+	}
+
+	if (state == STATE_END) {
+		/* Skip trailing whitespace */
+		while (data < json_end) {
+			uint8_t c = *data;
+
+			if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+				data++;
+			} else {
+				break;
+			}
+		}
+
+		/*
+		 * These asserts are just for sanity checking - they are guaranteed by the allowed
+		 *  state transitions.
+		 */
+		assert(depth == 0);
+		assert(trailing_comma == false);
+		assert(data <= json_end);
+		if (end) {
+			*end = data;
+		}
+		return cur_value;
+	}
+
+	/* Invalid end state - ran out of data */
+	rc = SPDK_JSON_PARSE_INCOMPLETE;
+
+done_rc:
+	assert(rc < 0);
+	if (end) {
+		*end = data;
+	}
+	return rc;
+
+done_invalid:
+	rc = SPDK_JSON_PARSE_INVALID;
+	goto done_rc;
+}
diff --git a/src/spdk/lib/json/json_util.c b/src/spdk/lib/json/json_util.c
new file mode 100644
index 00000000..1146e6fa
--- /dev/null
+++ b/src/spdk/lib/json/json_util.c
@@ -0,0 +1,650 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/json.h"
+
+#include "spdk_internal/utf.h"
+#include "spdk_internal/log.h"
+
+#define SPDK_JSON_DEBUG(...) SPDK_DEBUGLOG(SPDK_LOG_JSON_UTIL, __VA_ARGS__)
+
+size_t
+spdk_json_val_len(const struct spdk_json_val *val)
+{
+	if (val == NULL) {
+		return 0;
+	}
+
+	if (val->type == SPDK_JSON_VAL_ARRAY_BEGIN || val->type == SPDK_JSON_VAL_OBJECT_BEGIN) {
+		return val->len + 2;
+	}
+
+	return 1;
+}
+
+bool
+spdk_json_strequal(const struct spdk_json_val *val, const char *str)
+{
+	size_t len;
+
+	if (val->type != SPDK_JSON_VAL_STRING && val->type != SPDK_JSON_VAL_NAME) {
+		return false;
+	}
+
+	len = strlen(str);
+	if (val->len != len) {
+		return false;
+	}
+
+	return memcmp(val->start, str, len) == 0;
+}
+
+char *
+spdk_json_strdup(const struct spdk_json_val *val)
+{
+	size_t len;
+	char *s;
+
+	if (val->type != SPDK_JSON_VAL_STRING && val->type != SPDK_JSON_VAL_NAME) {
+		return NULL;
+	}
+
+	len = val->len;
+
+	if (memchr(val->start, '\0', len)) {
+		/* String contains embedded NUL, so it is not a valid C string. */
+		return NULL;
+	}
+
+	s = malloc(len + 1);
+	if (s == NULL) {
+		return s;
+	}
+
+	memcpy(s, val->start, len);
+	s[len] = '\0';
+
+	return s;
+}
+
+struct spdk_json_num {
+	bool negative;
+	uint64_t significand;
+	int64_t exponent;
+};
+
+static int
+spdk_json_number_split(const struct spdk_json_val *val, struct spdk_json_num *num)
+{
+	const char *iter;
+	size_t remaining;
+	uint64_t *pval;
+	uint64_t frac_digits = 0;
+	uint64_t exponent_u64 = 0;
+	bool exponent_negative = false;
+	enum {
+		NUM_STATE_INT,
+		NUM_STATE_FRAC,
+		NUM_STATE_EXP,
+	} state;
+
+	memset(num, 0, sizeof(*num));
+
+	if (val->type != SPDK_JSON_VAL_NUMBER) {
+		return -EINVAL;
+	}
+
+	remaining = val->len;
+	if (remaining == 0) {
+		return -EINVAL;
+	}
+
+	iter = val->start;
+	if (*iter == '-') {
+		num->negative = true;
+		iter++;
+		remaining--;
+	}
+
+	state = NUM_STATE_INT;
+	pval = &num->significand;
+	while (remaining--) {
+		char c = *iter++;
+
+		if (c == '.') {
+			state = NUM_STATE_FRAC;
+		} else if (c == 'e' || c == 'E') {
+			state = NUM_STATE_EXP;
+			pval = &exponent_u64;
+		} else if (c == '-') {
+			assert(state == NUM_STATE_EXP);
+			exponent_negative = true;
+		} else if (c == '+') {
+			assert(state == NUM_STATE_EXP);
+			/* exp_negative = false; */ /* already false by default */
+		} else {
+			uint64_t new_val;
+
+			assert(c >= '0' && c <= '9');
+			new_val = *pval * 10 + c - '0';
+			if (new_val < *pval) {
+				return -ERANGE;
+			}
+
+			if (state == NUM_STATE_FRAC) {
+				frac_digits++;
+			}
+
+			*pval = new_val;
+		}
+	}
+
+	if (exponent_negative) {
+		if (exponent_u64 > 9223372036854775808ULL) { /* abs(INT64_MIN) */
+			return -ERANGE;
+		}
+		num->exponent = (int64_t) - exponent_u64;
+	} else {
+		if (exponent_u64 > INT64_MAX) {
+			return -ERANGE;
+		}
+		num->exponent = exponent_u64;
+	}
+	num->exponent -= frac_digits;
+
+	/* Apply as much of the exponent as possible without overflow or truncation */
+	if (num->exponent < 0) {
+		while (num->exponent && num->significand >= 10 && num->significand % 10 == 0) {
+			num->significand /= 10;
+			num->exponent++;
+		}
+	} else { /* positive exponent */
+		while (num->exponent) {
+			uint64_t new_val = num->significand * 10;
+
+			if (new_val < num->significand) {
+				break;
+			}
+
+			num->significand = new_val;
+			num->exponent--;
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_json_number_to_uint16(const struct spdk_json_val *val, uint16_t *num)
+{
+	struct spdk_json_num split_num;
+	int rc;
+
+	rc = spdk_json_number_split(val, &split_num);
+	if (rc) {
+		return rc;
+	}
+
+	if (split_num.exponent || split_num.negative) {
+		return -ERANGE;
+	}
+
+	if (split_num.significand > UINT16_MAX) {
+		return -ERANGE;
+	}
+	*num = (uint16_t)split_num.significand;
+	return 0;
+}
+
+int
+spdk_json_number_to_int32(const struct spdk_json_val *val, int32_t *num)
+{
+	struct spdk_json_num split_num;
+	int rc;
+
+	rc = spdk_json_number_split(val, &split_num);
+	if (rc) {
+		return rc;
+	}
+
+	if (split_num.exponent) {
+		return -ERANGE;
+	}
+
+	if (split_num.negative) {
+		if (split_num.significand > 2147483648) { /* abs(INT32_MIN) */
+			return -ERANGE;
+		}
+		*num = (int32_t) - (int64_t)split_num.significand;
+		return 0;
+	}
+
+	/* positive */
+	if (split_num.significand > INT32_MAX) {
+		return -ERANGE;
+	}
+	*num = (int32_t)split_num.significand;
+	return 0;
+}
+
+int
+spdk_json_number_to_uint32(const struct spdk_json_val *val, uint32_t *num)
+{
+	struct spdk_json_num split_num;
+	int rc;
+
+	rc = spdk_json_number_split(val, &split_num);
+	if (rc) {
+		return rc;
+	}
+
+	if (split_num.exponent || split_num.negative) {
+		return -ERANGE;
+	}
+
+	if (split_num.significand > UINT32_MAX) {
+		return -ERANGE;
+	}
+	*num = (uint32_t)split_num.significand;
+	return 0;
+}
+
+int
+spdk_json_number_to_uint64(const struct spdk_json_val *val, uint64_t *num)
+{
+	struct spdk_json_num split_num;
+	int rc;
+
+	rc = spdk_json_number_split(val, &split_num);
+	if (rc) {
+		return rc;
+	}
+
+	if (split_num.exponent || split_num.negative) {
+		return -ERANGE;
+	}
+
+	*num = split_num.significand;
+	return 0;
+}
+
+int
+spdk_json_decode_object(const struct spdk_json_val *values,
+			const struct spdk_json_object_decoder *decoders, size_t num_decoders, void *out)
+{
+	uint32_t i;
+	bool invalid = false;
+	size_t decidx;
+	bool *seen;
+
+	if (values == NULL || values->type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+		return -1;
+	}
+
+	seen = calloc(sizeof(bool), num_decoders);
+	if (seen == NULL) {
+		return -1;
+	}
+
+	for (i = 0; i < values->len;) {
+		const struct spdk_json_val *name = &values[i + 1];
+		const struct spdk_json_val *v = &values[i + 2];
+		bool found = false;
+
+		for (decidx = 0; decidx < num_decoders; decidx++) {
+			const struct spdk_json_object_decoder *dec = &decoders[decidx];
+			if (spdk_json_strequal(name, dec->name)) {
+				void *field = (void *)((uintptr_t)out + dec->offset);
+
+				found = true;
+
+				if (seen[decidx]) {
+					/* duplicate field name */
+					invalid = true;
+				} else {
+					seen[decidx] = true;
+					if (dec->decode_func(v, field)) {
+						invalid = true;
+						/* keep going to fill out any other valid keys */
+					}
+				}
+				break;
+			}
+		}
+
+		if (!found) {
+			invalid = true;
+		}
+
+		i += 1 + spdk_json_val_len(v);
+	}
+
+	for (decidx = 0; decidx < num_decoders; decidx++) {
+		if (!decoders[decidx].optional && !seen[decidx]) {
+			/* required field is missing */
+			invalid = true;
+			break;
+		}
+	}
+
+	free(seen);
+	return invalid ? -1 : 0;
+}
+
+int
+spdk_json_decode_array(const struct spdk_json_val *values, spdk_json_decode_fn decode_func,
+		       void *out, size_t max_size, size_t *out_size, size_t stride)
+{
+	uint32_t i;
+	char *field;
+	char *out_end;
+
+	if (values == NULL || values->type != SPDK_JSON_VAL_ARRAY_BEGIN) {
+		return -1;
+	}
+
+	*out_size = 0;
+	field = out;
+	out_end = field + max_size * stride;
+	for (i = 0; i < values->len;) {
+		const struct spdk_json_val *v = &values[i + 1];
+
+		if (field == out_end) {
+			return -1;
+		}
+
+		if (decode_func(v, field)) {
+			return -1;
+		}
+
+		i += spdk_json_val_len(v);
+		field += stride;
+		(*out_size)++;
+	}
+
+	return 0;
+}
+
+int
+spdk_json_decode_bool(const struct spdk_json_val *val, void *out)
+{
+	bool *f = out;
+
+	if (val->type != SPDK_JSON_VAL_TRUE && val->type != SPDK_JSON_VAL_FALSE) {
+		return -1;
+	}
+
+	*f = val->type == SPDK_JSON_VAL_TRUE;
+	return 0;
+}
+
+int
+spdk_json_decode_uint16(const struct spdk_json_val *val, void *out)
+{
+	uint16_t *i = out;
+
+	return spdk_json_number_to_uint16(val, i);
+}
+
+int
+spdk_json_decode_int32(const struct spdk_json_val *val, void *out)
+{
+	int32_t *i = out;
+
+	return spdk_json_number_to_int32(val, i);
+}
+
+int
+spdk_json_decode_uint32(const struct spdk_json_val *val, void *out)
+{
+	uint32_t *i = out;
+
+	return spdk_json_number_to_uint32(val, i);
+}
+
+int
+spdk_json_decode_uint64(const struct spdk_json_val *val, void *out)
+{
+	uint64_t *i = out;
+
+	return spdk_json_number_to_uint64(val, i);
+}
+
+int
+spdk_json_decode_string(const struct spdk_json_val *val, void *out)
+{
+	char **s = out;
+
+	free(*s);
+
+	*s = spdk_json_strdup(val);
+
+	if (*s) {
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+static struct spdk_json_val *
+spdk_json_first(struct spdk_json_val *object, enum spdk_json_val_type type)
+{
+	/* 'object' must be JSON object or array. 'type' might be combination of these two. */
+	assert((type & (SPDK_JSON_VAL_ARRAY_BEGIN | SPDK_JSON_VAL_OBJECT_BEGIN)) != 0);
+
+	assert(object != NULL);
+
+	if ((object->type & type) == 0) {
+		return NULL;
+	}
+
+	object++;
+	if (object->len == 0) {
+		return NULL;
+	}
+
+	return object;
+}
+
+static struct spdk_json_val *
+spdk_json_value(struct spdk_json_val *key)
+{
+	return key->type == SPDK_JSON_VAL_NAME ? key + 1 : NULL;
+}
+
+int
+spdk_json_find(struct spdk_json_val *object, const char *key_name, struct spdk_json_val **key,
+	       struct spdk_json_val **val, enum spdk_json_val_type type)
+{
+	struct spdk_json_val *_key = NULL;
+	struct spdk_json_val *_val = NULL;
+	struct spdk_json_val *it;
+
+	assert(object != NULL);
+
+	for (it = spdk_json_first(object, SPDK_JSON_VAL_ARRAY_BEGIN | SPDK_JSON_VAL_OBJECT_BEGIN);
+	     it != NULL;
+	     it = spdk_json_next(it)) {
+		if (it->type != SPDK_JSON_VAL_NAME) {
+			continue;
+		}
+
+		if (spdk_json_strequal(it, key_name) != true) {
+			continue;
+		}
+
+		if (_key) {
+			SPDK_JSON_DEBUG("Duplicate key '%s'", key_name);
+			return -EINVAL;
+		}
+
+		_key = it;
+		_val = spdk_json_value(_key);
+
+		if (type != SPDK_JSON_VAL_INVALID && (_val->type & type) == 0) {
+			SPDK_JSON_DEBUG("key '%s' type is %#x but expected one of %#x\n", key_name, _val->type, type);
+			return -EDOM;
+		}
+	}
+
+	if (key) {
+		*key = _key;
+	}
+
+	if (val) {
+		*val = _val;
+	}
+
+	return _val ? 0 : -ENOENT;
+}
+
+int
+spdk_json_find_string(struct spdk_json_val *object, const char *key_name,
+		      struct spdk_json_val **key, struct spdk_json_val **val)
+{
+	return spdk_json_find(object, key_name, key, val, SPDK_JSON_VAL_STRING);
+}
+
+int
+spdk_json_find_array(struct spdk_json_val *object, const char *key_name,
+		     struct spdk_json_val **key, struct spdk_json_val **val)
+{
+	return spdk_json_find(object, key_name, key, val, SPDK_JSON_VAL_ARRAY_BEGIN);
+}
+
+struct spdk_json_val *
+spdk_json_object_first(struct spdk_json_val *object)
+{
+	struct spdk_json_val *first = spdk_json_first(object, SPDK_JSON_VAL_OBJECT_BEGIN);
+
+	/* Empty object? */
+	return first && first->type != SPDK_JSON_VAL_OBJECT_END ? first : NULL;
+}
+
+struct spdk_json_val *
+spdk_json_array_first(struct spdk_json_val *array_begin)
+{
+	struct spdk_json_val *first = spdk_json_first(array_begin, SPDK_JSON_VAL_ARRAY_BEGIN);
+
+	/* Empty array? */
+	return first && first->type != SPDK_JSON_VAL_ARRAY_END ? first : NULL;
+}
+
+static struct spdk_json_val *
+spdk_json_skip_object_or_array(struct spdk_json_val *val)
+{
+	unsigned lvl;
+	enum spdk_json_val_type end_type;
+	struct spdk_json_val *it;
+
+	if (val->type == SPDK_JSON_VAL_OBJECT_BEGIN) {
+		end_type = SPDK_JSON_VAL_OBJECT_END;
+	} else if (val->type == SPDK_JSON_VAL_ARRAY_BEGIN) {
+		end_type = SPDK_JSON_VAL_ARRAY_END;
+	} else {
+		SPDK_JSON_DEBUG("Expected JSON object (%#x) or array (%#x) but got %#x\n",
+				SPDK_JSON_VAL_OBJECT_BEGIN, SPDK_JSON_VAL_ARRAY_END, val->type);
+		return NULL;
+	}
+
+	lvl = 1;
+	for (it = val + 1; it->type != SPDK_JSON_VAL_INVALID && lvl != 0; it++) {
+		if (it->type == val->type) {
+			lvl++;
+		} else if (it->type == end_type) {
+			lvl--;
+		}
+	}
+
+	/* if lvl != 0 we have invalid JSON object */
+	if (lvl != 0) {
+		SPDK_JSON_DEBUG("Can't find end of object (type: %#x): lvl (%u) != 0)\n", val->type, lvl);
+		it = NULL;
+	}
+
+	return it;
+}
+
+struct spdk_json_val *
+spdk_json_next(struct spdk_json_val *it)
+{
+	struct spdk_json_val *val, *next;
+
+	switch (it->type) {
+	case SPDK_JSON_VAL_NAME:
+		val = spdk_json_value(it);
+		next = spdk_json_next(val);
+		break;
+
+	/* We are in the middle of an array - get to next entry */
+	case SPDK_JSON_VAL_NULL:
+	case SPDK_JSON_VAL_TRUE:
+	case SPDK_JSON_VAL_FALSE:
+	case SPDK_JSON_VAL_NUMBER:
+	case SPDK_JSON_VAL_STRING:
+		val = it + 1;
+		return val;
+
+	case SPDK_JSON_VAL_ARRAY_BEGIN:
+	case SPDK_JSON_VAL_OBJECT_BEGIN:
+		next = spdk_json_skip_object_or_array(it);
+		break;
+
+	/* Can't go to the next object if started from the end of array or object */
+	case SPDK_JSON_VAL_ARRAY_END:
+	case SPDK_JSON_VAL_OBJECT_END:
+	case SPDK_JSON_VAL_INVALID:
+		return NULL;
+	default:
+		assert(false);
+		return NULL;
+
+	}
+
+	/* EOF ? */
+	if (next == NULL) {
+		return NULL;
+	}
+
+	switch (next->type) {
+	case SPDK_JSON_VAL_ARRAY_END:
+	case SPDK_JSON_VAL_OBJECT_END:
+	case SPDK_JSON_VAL_INVALID:
+		return NULL;
+	default:
+		/* Next value */
+		return next;
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("json_util", SPDK_LOG_JSON_UTIL)
diff --git a/src/spdk/lib/json/json_write.c b/src/spdk/lib/json/json_write.c
new file mode 100644
index 00000000..0cd600be
--- /dev/null
+++ b/src/spdk/lib/json/json_write.c
@@ -0,0 +1,687 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/json.h"
+
+#include "spdk_internal/utf.h"
+
+struct spdk_json_write_ctx {
+	spdk_json_write_cb write_cb;
+	void *cb_ctx;
+	uint32_t flags;
+	uint32_t indent;
+	bool new_indent;
+	bool first_value;
+	bool failed;
+	size_t buf_filled;
+	uint8_t buf[4096];
+};
+
+static int emit_buf_full(struct spdk_json_write_ctx *w, const void *data, size_t size);
+
+static int
+fail(struct spdk_json_write_ctx *w)
+{
+	w->failed = true;
+	return -1;
+}
+
+static int
+flush_buf(struct spdk_json_write_ctx *w)
+{
+	int rc;
+
+	rc = w->write_cb(w->cb_ctx, w->buf, w->buf_filled);
+	if (rc != 0) {
+		return fail(w);
+	}
+
+	w->buf_filled = 0;
+
+	return 0;
+}
+
+struct spdk_json_write_ctx *
+spdk_json_write_begin(spdk_json_write_cb write_cb, void *cb_ctx, uint32_t flags)
+{
+	struct spdk_json_write_ctx *w;
+
+	w = calloc(1, sizeof(*w));
+	if (w == NULL) {
+		return w;
+	}
+
+	w->write_cb = write_cb;
+	w->cb_ctx = cb_ctx;
+	w->flags = flags;
+	w->indent = 0;
+	w->new_indent = false;
+	w->first_value = true;
+	w->failed = false;
+	w->buf_filled = 0;
+
+	return w;
+}
+
+int
+spdk_json_write_end(struct spdk_json_write_ctx *w)
+{
+	bool failed;
+	int rc;
+
+	if (w == NULL) {
+		return 0;
+	}
+
+	failed = w->failed;
+
+	rc = flush_buf(w);
+	if (rc != 0) {
+		failed = true;
+	}
+
+	free(w);
+
+	return failed ? -1 : 0;
+}
+
+static inline int
+emit(struct spdk_json_write_ctx *w, const void *data, size_t size)
+{
+	size_t buf_remain = sizeof(w->buf) - w->buf_filled;
+
+	if (spdk_unlikely(size > buf_remain)) {
+		/* Not enough space in buffer for the new data. */
+		return emit_buf_full(w, data, size);
+	}
+
+	/* Copy the new data into buf. */
+	memcpy(w->buf + w->buf_filled, data, size);
+	w->buf_filled += size;
+	return 0;
+}
+
+static int
+emit_buf_full(struct spdk_json_write_ctx *w, const void *data, size_t size)
+{
+	size_t buf_remain = sizeof(w->buf) - w->buf_filled;
+	int rc;
+
+	assert(size > buf_remain);
+
+	/* Copy as much of the new data as possible into the buffer and flush it. */
+	memcpy(w->buf + w->buf_filled, data, buf_remain);
+	w->buf_filled += buf_remain;
+
+	rc = flush_buf(w);
+	if (rc != 0) {
+		return fail(w);
+	}
+
+	/* Recurse to emit the rest of the data. */
+	return emit(w, data + buf_remain, size - buf_remain);
+}
+
+static int
+emit_fmt(struct spdk_json_write_ctx *w, const void *data, size_t size)
+{
+	if (w->flags & SPDK_JSON_WRITE_FLAG_FORMATTED) {
+		return emit(w, data, size);
+	}
+	return 0;
+}
+
+static int
+emit_indent(struct spdk_json_write_ctx *w)
+{
+	uint32_t i;
+
+	if (w->flags & SPDK_JSON_WRITE_FLAG_FORMATTED) {
+		for (i = 0; i < w->indent; i++) {
+			if (emit(w, "  ", 2)) { return fail(w); }
+		}
+	}
+	return 0;
+}
+
+static int
+begin_value(struct spdk_json_write_ctx *w)
+{
+	// TODO: check for value state
+	if (w->new_indent) {
+		if (emit_fmt(w, "\n", 1)) { return fail(w); }
+		if (emit_indent(w)) { return fail(w); }
+	}
+	if (!w->first_value) {
+		if (emit(w, ",", 1)) { return fail(w); }
+		if (emit_fmt(w, "\n", 1)) { return fail(w); }
+		if (emit_indent(w)) { return fail(w); }
+	}
+	w->first_value = false;
+	w->new_indent = false;
+	return 0;
+}
+
+int
+spdk_json_write_val_raw(struct spdk_json_write_ctx *w, const void *data, size_t len)
+{
+	if (begin_value(w)) { return fail(w); }
+	return emit(w, data, len);
+}
+
+int
+spdk_json_write_null(struct spdk_json_write_ctx *w)
+{
+	if (begin_value(w)) { return fail(w); }
+	return emit(w, "null", 4);
+}
+
+int
+spdk_json_write_bool(struct spdk_json_write_ctx *w, bool val)
+{
+	if (begin_value(w)) { return fail(w); }
+	if (val) {
+		return emit(w, "true", 4);
+	} else {
+		return emit(w, "false", 5);
+	}
+}
+
+int
+spdk_json_write_int32(struct spdk_json_write_ctx *w, int32_t val)
+{
+	char buf[32];
+	int count;
+
+	if (begin_value(w)) { return fail(w); }
+	count = snprintf(buf, sizeof(buf), "%" PRId32, val);
+	if (count <= 0 || (size_t)count >= sizeof(buf)) { return fail(w); }
+	return emit(w, buf, count);
+}
+
+int
+spdk_json_write_uint32(struct spdk_json_write_ctx *w, uint32_t val)
+{
+	char buf[32];
+	int count;
+
+	if (begin_value(w)) { return fail(w); }
+	count = snprintf(buf, sizeof(buf), "%" PRIu32, val);
+	if (count <= 0 || (size_t)count >= sizeof(buf)) { return fail(w); }
+	return emit(w, buf, count);
+}
+
+int
+spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val)
+{
+	char buf[32];
+	int count;
+
+	if (begin_value(w)) { return fail(w); }
+	count = snprintf(buf, sizeof(buf), "%" PRId64, val);
+	if (count <= 0 || (size_t)count >= sizeof(buf)) { return fail(w); }
+	return emit(w, buf, count);
+}
+
+int
+spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val)
+{
+	char buf[32];
+	int count;
+
+	if (begin_value(w)) { return fail(w); }
+	count = snprintf(buf, sizeof(buf), "%" PRIu64, val);
+	if (count <= 0 || (size_t)count >= sizeof(buf)) { return fail(w); }
+	return emit(w, buf, count);
+}
+
+static void
+write_hex_4(void *dest, uint16_t val)
+{
+	uint8_t *p = dest;
+	char hex[] = "0123456789ABCDEF";
+
+	p[0] = hex[(val >> 12)];
+	p[1] = hex[(val >> 8) & 0xF];
+	p[2] = hex[(val >> 4) & 0xF];
+	p[3] = hex[val & 0xF];
+}
+
+static inline int
+write_codepoint(struct spdk_json_write_ctx *w, uint32_t codepoint)
+{
+	static const uint8_t escapes[] = {
+		['\b'] = 'b',
+		['\f'] = 'f',
+		['\n'] = 'n',
+		['\r'] = 'r',
+		['\t'] = 't',
+		['"'] = '"',
+		['\\'] = '\\',
+		/*
+		 * Forward slash (/) is intentionally not converted to an escape
+		 *  (it is valid unescaped).
+		 */
+	};
+	uint16_t high, low;
+	char out[13];
+	size_t out_len;
+
+	if (codepoint < sizeof(escapes) && escapes[codepoint]) {
+		out[0] = '\\';
+		out[1] = escapes[codepoint];
+		out_len = 2;
+	} else if (codepoint >= 0x20 && codepoint < 0x7F) {
+		/*
+		 * Encode plain ASCII directly (except 0x7F, since it is really
+		 *  a control character, despite the JSON spec not considering it one).
+		 */
+		out[0] = (uint8_t)codepoint;
+		out_len = 1;
+	} else if (codepoint < 0x10000) {
+		out[0] = '\\';
+		out[1] = 'u';
+		write_hex_4(&out[2], (uint16_t)codepoint);
+		out_len = 6;
+	} else {
+		utf16_encode_surrogate_pair(codepoint, &high, &low);
+		out[0] = '\\';
+		out[1] = 'u';
+		write_hex_4(&out[2], high);
+		out[6] = '\\';
+		out[7] = 'u';
+		write_hex_4(&out[8], low);
+		out_len = 12;
+	}
+
+	return emit(w, out, out_len);
+}
+
+static int
+write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
+{
+	const uint8_t *p = val;
+	const uint8_t *end = val + len;
+
+	if (emit(w, "\"", 1)) { return fail(w); }
+
+	while (p != end) {
+		int codepoint_len;
+		uint32_t codepoint;
+
+		codepoint_len = utf8_valid(p, end);
+		switch (codepoint_len) {
+		case 1:
+			codepoint = utf8_decode_unsafe_1(p);
+			break;
+		case 2:
+			codepoint = utf8_decode_unsafe_2(p);
+			break;
+		case 3:
+			codepoint = utf8_decode_unsafe_3(p);
+			break;
+		case 4:
+			codepoint = utf8_decode_unsafe_4(p);
+			break;
+		default:
+			return fail(w);
+		}
+
+		if (write_codepoint(w, codepoint)) { return fail(w); }
+		p += codepoint_len;
+	}
+
+	return emit(w, "\"", 1);
+}
+
+static int
+write_string_or_name_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
+{
+	const uint16_t *p = val;
+	const uint16_t *end = val + len;
+
+	if (emit(w, "\"", 1)) { return fail(w); }
+
+	while (p != end) {
+		int codepoint_len;
+		uint32_t codepoint;
+
+		codepoint_len = utf16le_valid(p, end);
+		switch (codepoint_len) {
+		case 1:
+			codepoint = from_le16(&p[0]);
+			break;
+		case 2:
+			codepoint = utf16_decode_surrogate_pair(from_le16(&p[0]), from_le16(&p[1]));
+			break;
+		default:
+			return fail(w);
+		}
+
+		if (write_codepoint(w, codepoint)) { return fail(w); }
+		p += codepoint_len;
+	}
+
+	return emit(w, "\"", 1);
+}
+
+int
+spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len)
+{
+	if (begin_value(w)) { return fail(w); }
+	return write_string_or_name(w, val, len);
+}
+
+int
+spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val)
+{
+	return spdk_json_write_string_raw(w, val, strlen(val));
+}
+
+int
+spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
+{
+	if (begin_value(w)) { return fail(w); }
+	return write_string_or_name_utf16le(w, val, len);
+}
+
+int
+spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val)
+{
+	const uint16_t *p;
+	size_t len;
+
+	for (len = 0, p = val; *p; p++) {
+		len++;
+	}
+
+	return spdk_json_write_string_utf16le_raw(w, val, len);
+}
+
+int
+spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt, ...)
+{
+	va_list args;
+	int rc;
+
+	va_start(args, fmt);
+	rc = spdk_json_write_string_fmt_v(w, fmt, args);
+	va_end(args);
+
+	return rc;
+}
+
+int
+spdk_json_write_string_fmt_v(struct spdk_json_write_ctx *w, const char *fmt, va_list args)
+{
+	char *s;
+	int rc;
+
+	s = spdk_vsprintf_alloc(fmt, args);
+	if (s == NULL) {
+		return -1;
+	}
+
+	rc = spdk_json_write_string(w, s);
+	free(s);
+	return rc;
+}
+
+int
+spdk_json_write_array_begin(struct spdk_json_write_ctx *w)
+{
+	if (begin_value(w)) { return fail(w); }
+	w->first_value = true;
+	w->new_indent = true;
+	w->indent++;
+	if (emit(w, "[", 1)) { return fail(w); }
+	return 0;
+}
+
+int
+spdk_json_write_array_end(struct spdk_json_write_ctx *w)
+{
+	w->first_value = false;
+	if (w->indent == 0) { return fail(w); }
+	w->indent--;
+	if (!w->new_indent) {
+		if (emit_fmt(w, "\n", 1)) { return fail(w); }
+		if (emit_indent(w)) { return fail(w); }
+	}
+	w->new_indent = false;
+	return emit(w, "]", 1);
+}
+
+int
+spdk_json_write_object_begin(struct spdk_json_write_ctx *w)
+{
+	if (begin_value(w)) { return fail(w); }
+	w->first_value = true;
+	w->new_indent = true;
+	w->indent++;
+	if (emit(w, "{", 1)) { return fail(w); }
+	return 0;
+}
+
+int
+spdk_json_write_object_end(struct spdk_json_write_ctx *w)
+{
+	w->first_value = false;
+	w->indent--;
+	if (!w->new_indent) {
+		if (emit_fmt(w, "\n", 1)) { return fail(w); }
+		if (emit_indent(w)) { return fail(w); }
+	}
+	w->new_indent = false;
+	return emit(w, "}", 1);
+}
+
+int
+spdk_json_write_name_raw(struct spdk_json_write_ctx *w, const char *name, size_t len)
+{
+	/* TODO: check that container is an object */
+	if (begin_value(w)) { return fail(w); }
+	if (write_string_or_name(w, name, len)) { return fail(w); }
+	w->first_value = true;
+	if (emit(w, ":", 1)) { return fail(w); }
+	return emit_fmt(w, " ", 1);
+}
+
+int
+spdk_json_write_name(struct spdk_json_write_ctx *w, const char *name)
+{
+	return spdk_json_write_name_raw(w, name, strlen(name));
+}
+
+int
+spdk_json_write_val(struct spdk_json_write_ctx *w, const struct spdk_json_val *val)
+{
+	size_t num_values, i;
+
+	switch (val->type) {
+	case SPDK_JSON_VAL_NUMBER:
+		return spdk_json_write_val_raw(w, val->start, val->len);
+
+	case SPDK_JSON_VAL_STRING:
+		return spdk_json_write_string_raw(w, val->start, val->len);
+
+	case SPDK_JSON_VAL_NAME:
+		return spdk_json_write_name_raw(w, val->start, val->len);
+
+	case SPDK_JSON_VAL_TRUE:
+		return spdk_json_write_bool(w, true);
+
+	case SPDK_JSON_VAL_FALSE:
+		return spdk_json_write_bool(w, false);
+
+	case SPDK_JSON_VAL_NULL:
+		return spdk_json_write_null(w);
+
+	case SPDK_JSON_VAL_ARRAY_BEGIN:
+	case SPDK_JSON_VAL_OBJECT_BEGIN:
+		num_values = val[0].len;
+
+		if (val[0].type == SPDK_JSON_VAL_OBJECT_BEGIN) {
+			if (spdk_json_write_object_begin(w)) {
+				return fail(w);
+			}
+		} else {
+			if (spdk_json_write_array_begin(w)) {
+				return fail(w);
+			}
+		}
+
+		// Loop up to and including the _END value
+		for (i = 0; i < num_values + 1;) {
+			if (spdk_json_write_val(w, &val[i + 1])) {
+				return fail(w);
+			}
+			if (val[i + 1].type == SPDK_JSON_VAL_ARRAY_BEGIN ||
+			    val[i + 1].type == SPDK_JSON_VAL_OBJECT_BEGIN) {
+				i += val[i + 1].len + 2;
+			} else {
+				i++;
+			}
+		}
+		return 0;
+
+	case SPDK_JSON_VAL_ARRAY_END:
+		return spdk_json_write_array_end(w);
+
+	case SPDK_JSON_VAL_OBJECT_END:
+		return spdk_json_write_object_end(w);
+
+	case SPDK_JSON_VAL_INVALID:
+		// Handle INVALID to make the compiler happy (and catch other unhandled types)
+		return fail(w);
+	}
+
+	return fail(w);
+}
+
+int spdk_json_write_named_null(struct spdk_json_write_ctx *w, const char *name)
+{
+	int rc = spdk_json_write_name(w, name);
+	return rc ? rc : spdk_json_write_null(w);
+}
+
+int spdk_json_write_named_bool(struct spdk_json_write_ctx *w, const char *name, bool val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_bool(w, val);
+}
+
+int spdk_json_write_named_int32(struct spdk_json_write_ctx *w, const char *name, int32_t val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_int32(w, val);
+}
+
+int spdk_json_write_named_uint32(struct spdk_json_write_ctx *w, const char *name, uint32_t val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_uint32(w, val);
+}
+
+int spdk_json_write_named_uint64(struct spdk_json_write_ctx *w, const char *name, uint64_t val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_uint64(w, val);
+}
+
+int spdk_json_write_named_int64(struct spdk_json_write_ctx *w, const char *name, int64_t val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_int64(w, val);
+}
+
+int spdk_json_write_named_string(struct spdk_json_write_ctx *w, const char *name, const char *val)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_string(w, val);
+}
+
+int spdk_json_write_named_string_fmt(struct spdk_json_write_ctx *w, const char *name,
+				     const char *fmt, ...)
+{
+	va_list args;
+	int rc;
+
+	va_start(args, fmt);
+	rc = spdk_json_write_named_string_fmt_v(w, name, fmt, args);
+	va_end(args);
+
+	return rc;
+}
+
+int spdk_json_write_named_string_fmt_v(struct spdk_json_write_ctx *w, const char *name,
+				       const char *fmt, va_list args)
+{
+	char *s;
+	int rc;
+
+	rc = spdk_json_write_name(w, name);
+	if (rc) {
+		return rc;
+	}
+
+	s = spdk_vsprintf_alloc(fmt, args);
+
+	if (s == NULL) {
+		return -1;
+	}
+
+	rc = spdk_json_write_string(w, s);
+	free(s);
+	return rc;
+}
+
+int spdk_json_write_named_array_begin(struct spdk_json_write_ctx *w, const char *name)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_array_begin(w);
+}
+
+int spdk_json_write_named_object_begin(struct spdk_json_write_ctx *w, const char *name)
+{
+	int rc = spdk_json_write_name(w, name);
+
+	return rc ? rc : spdk_json_write_object_begin(w);
+}
diff --git a/src/spdk/lib/jsonrpc/Makefile b/src/spdk/lib/jsonrpc/Makefile
new file mode 100644
index 00000000..dd323f1e
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = jsonrpc
+C_SRCS = jsonrpc_server.c jsonrpc_server_tcp.c
+C_SRCS += jsonrpc_client.c jsonrpc_client_tcp.c
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/jsonrpc/jsonrpc_client.c b/src/spdk/lib/jsonrpc/jsonrpc_client.c
new file mode 100644
index 00000000..2426f4e3
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/jsonrpc_client.c
@@ -0,0 +1,213 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/util.h"
+#include "jsonrpc_internal.h"
+
+struct jsonrpc_response {
+	const struct spdk_json_val *version;
+	const struct spdk_json_val *id;
+	const struct spdk_json_val *result;
+};
+
+static int
+capture_string(const struct spdk_json_val *val, void *out)
+{
+	const struct spdk_json_val **vptr = out;
+
+	if (spdk_json_strequal(val, "2.0") != true) {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	*vptr = val;
+	return 0;
+}
+
+static int
+capture_id(const struct spdk_json_val *val, void *out)
+{
+	const struct spdk_json_val **vptr = out;
+
+	if (val->type != SPDK_JSON_VAL_STRING && val->type != SPDK_JSON_VAL_NUMBER) {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	*vptr = val;
+	return 0;
+}
+
+static int
+capture_any(const struct spdk_json_val *val, void *out)
+{
+	const struct spdk_json_val **vptr = out;
+
+	*vptr = val;
+	return 0;
+}
+
+static const struct spdk_json_object_decoder jsonrpc_response_decoders[] = {
+	{"jsonrpc", offsetof(struct jsonrpc_response, version), capture_string},
+	{"id", offsetof(struct jsonrpc_response, id), capture_id},
+	{"result", offsetof(struct jsonrpc_response, result), capture_any},
+};
+
+static int
+parse_single_response(struct spdk_json_val *values,
+		      spdk_jsonrpc_client_response_parser parser_fn,
+		      void *parser_ctx)
+{
+	struct jsonrpc_response resp = {};
+
+	if (spdk_json_decode_object(values, jsonrpc_response_decoders,
+				    SPDK_COUNTOF(jsonrpc_response_decoders),
+				    &resp)) {
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	return parser_fn(parser_ctx, resp.result);
+}
+
+int
+spdk_jsonrpc_parse_response(struct spdk_jsonrpc_client *client, void *json, size_t size)
+{
+	ssize_t rc;
+	void *end = NULL;
+
+	/* Check to see if we have received a full JSON value. */
+	rc = spdk_json_parse(json, size, NULL, 0, &end, 0);
+	if (rc == SPDK_JSON_PARSE_INCOMPLETE) {
+		return rc;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_RPC_CLIENT, "Json string is :\n%s\n", (char *)json);
+	if (rc < 0 || rc > SPDK_JSONRPC_MAX_VALUES) {
+		SPDK_ERRLOG("JSON parse error\n");
+		/*
+		 * Can't recover from parse error (no guaranteed resync point in streaming JSON).
+		 * Return an error to indicate that the connection should be closed.
+		 */
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	/* Decode a second time now that there is a full JSON value available. */
+	rc = spdk_json_parse(json, size, client->values, SPDK_JSONRPC_MAX_VALUES, &end,
+			     SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE);
+	if (rc < 0 || rc > SPDK_JSONRPC_MAX_VALUES) {
+		SPDK_ERRLOG("JSON parse error on second pass\n");
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	assert(end != NULL);
+
+	if (client->values[0].type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+		SPDK_ERRLOG("top-level JSON value was not object\n");
+		return SPDK_JSON_PARSE_INVALID;
+	}
+
+	rc = parse_single_response(client->values, client->parser_fn, client->parser_ctx);
+
+	return rc;
+}
+
+static int
+jsonrpc_client_write_cb(void *cb_ctx, const void *data, size_t size)
+{
+	struct spdk_jsonrpc_client_request *request = cb_ctx;
+	size_t new_size = request->send_buf_size;
+
+	while (new_size - request->send_len < size) {
+		if (new_size >= SPDK_JSONRPC_SEND_BUF_SIZE_MAX) {
+			SPDK_ERRLOG("Send buf exceeded maximum size (%zu)\n",
+				    (size_t)SPDK_JSONRPC_SEND_BUF_SIZE_MAX);
+			return -ENOSPC;
+		}
+
+		new_size *= 2;
+	}
+
+	if (new_size != request->send_buf_size) {
+		uint8_t *new_buf;
+
+		new_buf = realloc(request->send_buf, new_size);
+		if (new_buf == NULL) {
+			SPDK_ERRLOG("Resizing send_buf failed (current size %zu, new size %zu)\n",
+				    request->send_buf_size, new_size);
+			return -ENOMEM;
+		}
+
+		request->send_buf = new_buf;
+		request->send_buf_size = new_size;
+	}
+
+	memcpy(request->send_buf + request->send_len, data, size);
+	request->send_len += size;
+
+	return 0;
+}
+
+struct spdk_json_write_ctx *
+spdk_jsonrpc_begin_request(struct spdk_jsonrpc_client_request *request, int32_t id,
+			   const char *method)
+{
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_json_write_begin(jsonrpc_client_write_cb, request, 0);
+	if (w == NULL) {
+		return NULL;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "jsonrpc", "2.0");
+
+	if (id >= 0) {
+		spdk_json_write_named_int32(w, "id", id);
+	}
+
+	if (method) {
+		spdk_json_write_named_string(w, "method", method);
+	}
+
+	return w;
+}
+
+void
+spdk_jsonrpc_end_request(struct spdk_jsonrpc_client_request *request, struct spdk_json_write_ctx *w)
+{
+	assert(w != NULL);
+
+	spdk_json_write_object_end(w);
+	spdk_json_write_end(w);
+	jsonrpc_client_write_cb(request, "\n", 1);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("rpc_client", SPDK_LOG_RPC_CLIENT)
diff --git a/src/spdk/lib/jsonrpc/jsonrpc_client_tcp.c b/src/spdk/lib/jsonrpc/jsonrpc_client_tcp.c
new file mode 100644
index 00000000..a7696c84
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/jsonrpc_client_tcp.c
@@ -0,0 +1,284 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "spdk/string.h"
+#include "jsonrpc_internal.h"
+
+#define RPC_DEFAULT_PORT	"5260"
+
+static struct spdk_jsonrpc_client *
+_spdk_jsonrpc_client_connect(int domain, int protocol,
+			     struct sockaddr *server_addr, socklen_t addrlen)
+{
+	struct spdk_jsonrpc_client *client;
+	int rc;
+
+	client = calloc(1, sizeof(struct spdk_jsonrpc_client));
+	if (client == NULL) {
+		return NULL;
+	}
+
+	client->sockfd = socket(domain, SOCK_STREAM, protocol);
+	if (client->sockfd < 0) {
+		SPDK_ERRLOG("socket() failed\n");
+		free(client);
+		return NULL;
+	}
+
+	rc = connect(client->sockfd, server_addr, addrlen);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not connet JSON-RPC server: %s\n", spdk_strerror(errno));
+		close(client->sockfd);
+		free(client);
+		return NULL;
+	}
+
+	/* memory malloc for recv-buf */
+	client->recv_buf = malloc(SPDK_JSONRPC_SEND_BUF_SIZE_INIT);
+	if (!client->recv_buf) {
+		SPDK_ERRLOG("memory malloc for recv-buf failed\n");
+		close(client->sockfd);
+		free(client);
+		return NULL;
+	}
+	client->recv_buf_size = SPDK_JSONRPC_SEND_BUF_SIZE_INIT;
+
+	return client;
+}
+
+struct spdk_jsonrpc_client *
+spdk_jsonrpc_client_connect(const char *rpc_sock_addr, int addr_family)
+{
+	struct spdk_jsonrpc_client *client;
+
+	if (addr_family == AF_UNIX) {
+		/* Unix Domain Socket */
+		struct sockaddr_un rpc_sock_addr_unix = {};
+		int rc;
+
+		rpc_sock_addr_unix.sun_family = AF_UNIX;
+		rc = snprintf(rpc_sock_addr_unix.sun_path,
+			      sizeof(rpc_sock_addr_unix.sun_path),
+			      "%s", rpc_sock_addr);
+		if (rc < 0 || (size_t)rc >= sizeof(rpc_sock_addr_unix.sun_path)) {
+			SPDK_ERRLOG("RPC Listen address Unix socket path too long\n");
+			return NULL;
+		}
+
+		client = _spdk_jsonrpc_client_connect(AF_UNIX, 0,
+						      (struct sockaddr *)&rpc_sock_addr_unix,
+						      sizeof(rpc_sock_addr_unix));
+	} else {
+		/* TCP/IP socket */
+		struct addrinfo		hints;
+		struct addrinfo		*res;
+		char *tmp;
+		char *host, *port;
+
+		tmp = strdup(rpc_sock_addr);
+		if (!tmp) {
+			SPDK_ERRLOG("Out of memory\n");
+			return NULL;
+		}
+
+		if (spdk_parse_ip_addr(tmp, &host, &port) < 0) {
+			free(tmp);
+			SPDK_ERRLOG("Invalid listen address '%s'\n", rpc_sock_addr);
+			return NULL;
+		}
+
+		if (port == NULL) {
+			port = RPC_DEFAULT_PORT;
+		}
+
+		memset(&hints, 0, sizeof(hints));
+		hints.ai_family = AF_UNSPEC;
+		hints.ai_socktype = SOCK_STREAM;
+		hints.ai_protocol = IPPROTO_TCP;
+
+		if (getaddrinfo(host, port, &hints, &res) != 0) {
+			free(tmp);
+			SPDK_ERRLOG("Unable to look up RPC connnect address '%s'\n", rpc_sock_addr);
+			return NULL;
+		}
+
+		client = _spdk_jsonrpc_client_connect(res->ai_family, res->ai_protocol,
+						      res->ai_addr, res->ai_addrlen);
+
+		freeaddrinfo(res);
+		free(tmp);
+	}
+
+	return client;
+}
+
+void
+spdk_jsonrpc_client_close(struct spdk_jsonrpc_client *client)
+{
+	if (client->sockfd >= 0) {
+		close(client->sockfd);
+		free(client->recv_buf);
+		client->sockfd = -1;
+	}
+
+	free(client);
+}
+
+struct spdk_jsonrpc_client_request *
+spdk_jsonrpc_client_create_request(void)
+{
+	struct spdk_jsonrpc_client_request *request;
+
+	request = calloc(1, sizeof(*request));
+	if (request == NULL) {
+		return NULL;
+	}
+
+	/* memory malloc for send-buf */
+	request->send_buf = malloc(SPDK_JSONRPC_SEND_BUF_SIZE_INIT);
+	if (!request->send_buf) {
+		SPDK_ERRLOG("memory malloc for send-buf failed\n");
+		free(request);
+		return NULL;
+	}
+	request->send_buf_size = SPDK_JSONRPC_SEND_BUF_SIZE_INIT;
+
+	return request;
+}
+
+void
+spdk_jsonrpc_client_free_request(struct spdk_jsonrpc_client_request *req)
+{
+	free(req->send_buf);
+	free(req);
+}
+
+int
+spdk_jsonrpc_client_send_request(struct spdk_jsonrpc_client *client,
+				 struct spdk_jsonrpc_client_request *request)
+{
+	ssize_t rc;
+
+	/* Reset offset in request */
+	request->send_offset = 0;
+
+	while (request->send_len > 0) {
+		rc = send(client->sockfd, request->send_buf + request->send_offset,
+			  request->send_len, 0);
+		if (rc <= 0) {
+			if (rc < 0 && errno == EINTR) {
+				rc = 0;
+			} else {
+				return rc;
+			}
+		}
+
+		request->send_offset += rc;
+		request->send_len -= rc;
+	}
+
+	return 0;
+}
+
+static int
+recv_buf_expand(struct spdk_jsonrpc_client *client)
+{
+	uint8_t *new_buf;
+
+	if (client->recv_buf_size * 2 > SPDK_JSONRPC_SEND_BUF_SIZE_MAX) {
+		return -ENOSPC;
+	}
+
+	new_buf = realloc(client->recv_buf, client->recv_buf_size * 2);
+	if (new_buf == NULL) {
+		SPDK_ERRLOG("Resizing recv_buf failed (current size %zu, new size %zu)\n",
+			    client->recv_buf_size, client->recv_buf_size * 2);
+		return -ENOMEM;
+	}
+
+	client->recv_buf = new_buf;
+	client->recv_buf_size *= 2;
+
+	return 0;
+}
+
+int
+spdk_jsonrpc_client_recv_response(struct spdk_jsonrpc_client *client,
+				  spdk_jsonrpc_client_response_parser parser_fn,
+				  void *parser_ctx)
+{
+	ssize_t rc = 0;
+	size_t recv_avail;
+	size_t recv_offset = 0;
+
+	client->parser_fn = parser_fn;
+	client->parser_ctx = parser_ctx;
+
+	recv_avail = client->recv_buf_size;
+
+	while (recv_avail > 0) {
+		rc = recv(client->sockfd, client->recv_buf + recv_offset, recv_avail, 0);
+		if (rc < 0) {
+			if (errno == EINTR) {
+				continue;
+			} else {
+				return errno;
+			}
+		} else if (rc == 0) {
+			return -EIO;
+		}
+
+		recv_offset += rc;
+		recv_avail -= rc;
+
+		/* Check to see if we have received a full JSON value. */
+		rc = spdk_jsonrpc_parse_response(client, client->recv_buf, recv_offset);
+		if (rc == 0) {
+			/* Successfully parsed response */
+			return 0;
+		} else if (rc != SPDK_JSON_PARSE_INCOMPLETE) {
+			SPDK_ERRLOG("jsonrpc parse request failed\n");
+			return -EINVAL;
+		}
+
+		/* Expand receive buffer if larger one is needed */
+		if (recv_avail == 0) {
+			rc = recv_buf_expand(client);
+			if (rc != 0) {
+				return rc;
+			}
+			recv_avail = client->recv_buf_size - recv_offset;
+		}
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/jsonrpc/jsonrpc_internal.h b/src/spdk/lib/jsonrpc/jsonrpc_internal.h
new file mode 100644
index 00000000..87355fdb
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/jsonrpc_internal.h
@@ -0,0 +1,149 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_JSONRPC_INTERNAL_H_
+#define SPDK_JSONRPC_INTERNAL_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/jsonrpc.h"
+
+#include "spdk_internal/log.h"
+
+#define SPDK_JSONRPC_RECV_BUF_SIZE	(32 * 1024)
+#define SPDK_JSONRPC_SEND_BUF_SIZE_INIT	(32 * 1024)
+#define SPDK_JSONRPC_SEND_BUF_SIZE_MAX	(32 * 1024 * 1024)
+#define SPDK_JSONRPC_ID_MAX_LEN		128
+#define SPDK_JSONRPC_MAX_CONNS		64
+#define SPDK_JSONRPC_MAX_VALUES		1024
+
+struct spdk_jsonrpc_request {
+	struct spdk_jsonrpc_server_conn *conn;
+
+	/* Copy of request id value */
+	struct spdk_json_val id;
+	uint8_t id_data[SPDK_JSONRPC_ID_MAX_LEN];
+
+	/* Total space allocated for send_buf */
+	size_t send_buf_size;
+
+	/* Number of bytes used in send_buf (<= send_buf_size) */
+	size_t send_len;
+
+	size_t send_offset;
+
+	uint8_t *send_buf;
+
+	STAILQ_ENTRY(spdk_jsonrpc_request) link;
+};
+
+struct spdk_jsonrpc_server_conn {
+	struct spdk_jsonrpc_server *server;
+	int sockfd;
+	bool closed;
+	struct spdk_json_val values[SPDK_JSONRPC_MAX_VALUES];
+	size_t recv_len;
+	uint8_t recv_buf[SPDK_JSONRPC_RECV_BUF_SIZE];
+	uint32_t outstanding_requests;
+
+	pthread_spinlock_t queue_lock;
+	STAILQ_HEAD(, spdk_jsonrpc_request) send_queue;
+
+	struct spdk_jsonrpc_request *send_request;
+
+	TAILQ_ENTRY(spdk_jsonrpc_server_conn) link;
+};
+
+struct spdk_jsonrpc_server {
+	int sockfd;
+	spdk_jsonrpc_handle_request_fn handle_request;
+
+	TAILQ_HEAD(, spdk_jsonrpc_server_conn) free_conns;
+	TAILQ_HEAD(, spdk_jsonrpc_server_conn) conns;
+
+	struct spdk_jsonrpc_server_conn conns_array[SPDK_JSONRPC_MAX_CONNS];
+};
+
+struct spdk_jsonrpc_client_request {
+	/* Total space allocated for send_buf */
+	size_t send_buf_size;
+
+	/* Number of bytes used in send_buf (<= send_buf_size) */
+	size_t send_len;
+
+	size_t send_offset;
+
+	uint8_t *send_buf;
+};
+
+struct spdk_jsonrpc_client {
+	int sockfd;
+
+	struct spdk_json_val values[SPDK_JSONRPC_MAX_VALUES];
+	size_t recv_buf_size;
+	uint8_t *recv_buf;
+
+	spdk_jsonrpc_client_response_parser parser_fn;
+	void *parser_ctx;
+};
+
+/* jsonrpc_server_tcp */
+void spdk_jsonrpc_server_handle_request(struct spdk_jsonrpc_request *request,
+					const struct spdk_json_val *method,
+					const struct spdk_json_val *params);
+void spdk_jsonrpc_server_handle_error(struct spdk_jsonrpc_request *request, int error);
+
+/* Might be called from any thread */
+void spdk_jsonrpc_server_send_response(struct spdk_jsonrpc_request *request);
+
+/* jsonrpc_server */
+int spdk_jsonrpc_parse_request(struct spdk_jsonrpc_server_conn *conn, void *json, size_t size);
+
+/* Must be called only from server poll thread */
+void spdk_jsonrpc_free_request(struct spdk_jsonrpc_request *request);
+
+/*
+ * Parse JSON data as RPC command response.
+ *
+ * \param client structure pointer of jsonrpc client
+ * \param json Raw JSON data; must be encoded in UTF-8.
+ * \param size Size of data in bytes.
+ *
+ * \return 0 On success
+ *         SPDK_JSON_PARSE_INCOMPLETE If the provided data is not a complete JSON value
+ *         SPDK_JSON_PARSE_INVALID if the provided data has invalid JSON syntax.
+ */
+int spdk_jsonrpc_parse_response(struct spdk_jsonrpc_client *client, void *json,
+				size_t size);
+
+#endif
diff --git a/src/spdk/lib/jsonrpc/jsonrpc_server.c b/src/spdk/lib/jsonrpc/jsonrpc_server.c
new file mode 100644
index 00000000..6e2a5b2c
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/jsonrpc_server.c
@@ -0,0 +1,360 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "jsonrpc_internal.h"
+
+#include "spdk/util.h"
+
+struct jsonrpc_request {
+	const struct spdk_json_val *version;
+	const struct spdk_json_val *method;
+	const struct spdk_json_val *params;
+	const struct spdk_json_val *id;
+};
+
+static int
+capture_val(const struct spdk_json_val *val, void *out)
+{
+	const struct spdk_json_val **vptr = out;
+
+	*vptr = val;
+	return 0;
+}
+
+static const struct spdk_json_object_decoder jsonrpc_request_decoders[] = {
+	{"jsonrpc", offsetof(struct jsonrpc_request, version), capture_val, true},
+	{"method", offsetof(struct jsonrpc_request, method), capture_val},
+	{"params", offsetof(struct jsonrpc_request, params), capture_val, true},
+	{"id", offsetof(struct jsonrpc_request, id), capture_val, true},
+};
+
+static void
+parse_single_request(struct spdk_jsonrpc_request *request, struct spdk_json_val *values)
+{
+	bool invalid = false;
+	struct jsonrpc_request req = {};
+
+	if (spdk_json_decode_object(values, jsonrpc_request_decoders,
+				    SPDK_COUNTOF(jsonrpc_request_decoders),
+				    &req)) {
+		invalid = true;
+		goto done;
+	}
+
+	if (req.version && (req.version->type != SPDK_JSON_VAL_STRING ||
+			    !spdk_json_strequal(req.version, "2.0"))) {
+		invalid = true;
+	}
+
+	if (!req.method || req.method->type != SPDK_JSON_VAL_STRING) {
+		req.method = NULL;
+		invalid = true;
+	}
+
+	if (req.id) {
+		if (req.id->type == SPDK_JSON_VAL_STRING ||
+		    req.id->type == SPDK_JSON_VAL_NUMBER) {
+			/* Copy value into request */
+			if (req.id->len <= SPDK_JSONRPC_ID_MAX_LEN) {
+				request->id.type = req.id->type;
+				request->id.len = req.id->len;
+				memcpy(request->id.start, req.id->start, req.id->len);
+			} else {
+				SPDK_DEBUGLOG(SPDK_LOG_RPC, "JSON-RPC request id too long (%u)\n",
+					      req.id->len);
+				invalid = true;
+			}
+		} else if (req.id->type == SPDK_JSON_VAL_NULL) {
+			request->id.type = SPDK_JSON_VAL_NULL;
+		} else  {
+			invalid = true;
+		}
+	}
+
+	if (req.params) {
+		if (req.params->type != SPDK_JSON_VAL_ARRAY_BEGIN &&
+		    req.params->type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+			req.params = NULL;
+			invalid = true;
+		}
+	}
+
+done:
+	if (invalid) {
+		spdk_jsonrpc_server_handle_error(request, SPDK_JSONRPC_ERROR_INVALID_REQUEST);
+	} else {
+		spdk_jsonrpc_server_handle_request(request, req.method, req.params);
+	}
+}
+
+int
+spdk_jsonrpc_parse_request(struct spdk_jsonrpc_server_conn *conn, void *json, size_t size)
+{
+	struct spdk_jsonrpc_request *request;
+	ssize_t rc;
+	void *end = NULL;
+
+	/* Check to see if we have received a full JSON value. */
+	rc = spdk_json_parse(json, size, NULL, 0, &end, 0);
+	if (rc == SPDK_JSON_PARSE_INCOMPLETE) {
+		return 0;
+	}
+
+	request = calloc(1, sizeof(*request));
+	if (request == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "Out of memory allocating request\n");
+		return -1;
+	}
+
+	conn->outstanding_requests++;
+
+	request->conn = conn;
+	request->id.start = request->id_data;
+	request->id.len = 0;
+	request->id.type = SPDK_JSON_VAL_INVALID;
+	request->send_offset = 0;
+	request->send_len = 0;
+	request->send_buf_size = SPDK_JSONRPC_SEND_BUF_SIZE_INIT;
+	request->send_buf = malloc(request->send_buf_size);
+	if (request->send_buf == NULL) {
+		SPDK_ERRLOG("Failed to allocate send_buf (%zu bytes)\n", request->send_buf_size);
+		conn->outstanding_requests--;
+		free(request);
+		return -1;
+	}
+
+	if (rc < 0 || rc > SPDK_JSONRPC_MAX_VALUES) {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "JSON parse error\n");
+		spdk_jsonrpc_server_handle_error(request, SPDK_JSONRPC_ERROR_PARSE_ERROR);
+
+		/*
+		 * Can't recover from parse error (no guaranteed resync point in streaming JSON).
+		 * Return an error to indicate that the connection should be closed.
+		 */
+		return -1;
+	}
+
+	/* Decode a second time now that there is a full JSON value available. */
+	rc = spdk_json_parse(json, size, conn->values, SPDK_JSONRPC_MAX_VALUES, &end,
+			     SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE);
+	if (rc < 0 || rc > SPDK_JSONRPC_MAX_VALUES) {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "JSON parse error on second pass\n");
+		spdk_jsonrpc_server_handle_error(request, SPDK_JSONRPC_ERROR_PARSE_ERROR);
+		return -1;
+	}
+
+	assert(end != NULL);
+
+	if (conn->values[0].type == SPDK_JSON_VAL_OBJECT_BEGIN) {
+		parse_single_request(request, conn->values);
+	} else if (conn->values[0].type == SPDK_JSON_VAL_ARRAY_BEGIN) {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "Got batch array (not currently supported)\n");
+		spdk_jsonrpc_server_handle_error(request, SPDK_JSONRPC_ERROR_INVALID_REQUEST);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "top-level JSON value was not array or object\n");
+		spdk_jsonrpc_server_handle_error(request, SPDK_JSONRPC_ERROR_INVALID_REQUEST);
+	}
+
+	return end - json;
+}
+
+static int
+spdk_jsonrpc_server_write_cb(void *cb_ctx, const void *data, size_t size)
+{
+	struct spdk_jsonrpc_request *request = cb_ctx;
+	size_t new_size = request->send_buf_size;
+
+	while (new_size - request->send_len < size) {
+		if (new_size >= SPDK_JSONRPC_SEND_BUF_SIZE_MAX) {
+			SPDK_ERRLOG("Send buf exceeded maximum size (%zu)\n",
+				    (size_t)SPDK_JSONRPC_SEND_BUF_SIZE_MAX);
+			return -1;
+		}
+
+		new_size *= 2;
+	}
+
+	if (new_size != request->send_buf_size) {
+		uint8_t *new_buf;
+
+		new_buf = realloc(request->send_buf, new_size);
+		if (new_buf == NULL) {
+			SPDK_ERRLOG("Resizing send_buf failed (current size %zu, new size %zu)\n",
+				    request->send_buf_size, new_size);
+			return -1;
+		}
+
+		request->send_buf = new_buf;
+		request->send_buf_size = new_size;
+	}
+
+	memcpy(request->send_buf + request->send_len, data, size);
+	request->send_len += size;
+
+	return 0;
+}
+
+static struct spdk_json_write_ctx *
+begin_response(struct spdk_jsonrpc_request *request)
+{
+	struct spdk_json_write_ctx *w;
+
+	w = spdk_json_write_begin(spdk_jsonrpc_server_write_cb, request, 0);
+	if (w == NULL) {
+		return NULL;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "jsonrpc");
+	spdk_json_write_string(w, "2.0");
+
+	spdk_json_write_name(w, "id");
+	spdk_json_write_val(w, &request->id);
+
+	return w;
+}
+
+static void
+skip_response(struct spdk_jsonrpc_request *request)
+{
+	request->send_len = 0;
+	spdk_jsonrpc_server_send_response(request);
+}
+
+static void
+end_response(struct spdk_jsonrpc_request *request, struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_object_end(w);
+	spdk_json_write_end(w);
+	spdk_jsonrpc_server_write_cb(request, "\n", 1);
+	spdk_jsonrpc_server_send_response(request);
+}
+
+void
+spdk_jsonrpc_free_request(struct spdk_jsonrpc_request *request)
+{
+	request->conn->outstanding_requests--;
+	free(request->send_buf);
+	free(request);
+}
+
+struct spdk_json_write_ctx *
+spdk_jsonrpc_begin_result(struct spdk_jsonrpc_request *request)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (request->id.type == SPDK_JSON_VAL_INVALID) {
+		/* Notification - no response required */
+		skip_response(request);
+		return NULL;
+	}
+
+	w = begin_response(request);
+	if (w == NULL) {
+		skip_response(request);
+		return NULL;
+	}
+
+	spdk_json_write_name(w, "result");
+
+	return w;
+}
+
+void
+spdk_jsonrpc_end_result(struct spdk_jsonrpc_request *request, struct spdk_json_write_ctx *w)
+{
+	assert(w != NULL);
+
+	end_response(request, w);
+}
+
+void
+spdk_jsonrpc_send_error_response(struct spdk_jsonrpc_request *request,
+				 int error_code, const char *msg)
+{
+	struct spdk_json_write_ctx *w;
+
+	if (request->id.type == SPDK_JSON_VAL_INVALID) {
+		/* For error responses, if id is missing, explicitly respond with "id": null. */
+		request->id.type = SPDK_JSON_VAL_NULL;
+	}
+
+	w = begin_response(request);
+	if (w == NULL) {
+		skip_response(request);
+		return;
+	}
+
+	spdk_json_write_name(w, "error");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "code");
+	spdk_json_write_int32(w, error_code);
+	spdk_json_write_name(w, "message");
+	spdk_json_write_string(w, msg);
+	spdk_json_write_object_end(w);
+
+	end_response(request, w);
+}
+
+void
+spdk_jsonrpc_send_error_response_fmt(struct spdk_jsonrpc_request *request,
+				     int error_code, const char *fmt, ...)
+{
+	struct spdk_json_write_ctx *w;
+	va_list args;
+
+	if (request->id.type == SPDK_JSON_VAL_INVALID) {
+		/* For error responses, if id is missing, explicitly respond with "id": null. */
+		request->id.type = SPDK_JSON_VAL_NULL;
+	}
+
+	w = begin_response(request);
+	if (w == NULL) {
+		skip_response(request);
+		return;
+	}
+
+	spdk_json_write_name(w, "error");
+	spdk_json_write_object_begin(w);
+	spdk_json_write_name(w, "code");
+	spdk_json_write_int32(w, error_code);
+	spdk_json_write_name(w, "message");
+	va_start(args, fmt);
+	spdk_json_write_string_fmt_v(w, fmt, args);
+	va_end(args);
+	spdk_json_write_object_end(w);
+
+	end_response(request, w);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("rpc", SPDK_LOG_RPC)
diff --git a/src/spdk/lib/jsonrpc/jsonrpc_server_tcp.c b/src/spdk/lib/jsonrpc/jsonrpc_server_tcp.c
new file mode 100644
index 00000000..c69d7483
--- /dev/null
+++ b/src/spdk/lib/jsonrpc/jsonrpc_server_tcp.c
@@ -0,0 +1,394 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "jsonrpc_internal.h"
+#include "spdk/string.h"
+
+struct spdk_jsonrpc_server *
+spdk_jsonrpc_server_listen(int domain, int protocol,
+			   struct sockaddr *listen_addr, socklen_t addrlen,
+			   spdk_jsonrpc_handle_request_fn handle_request)
+{
+	struct spdk_jsonrpc_server *server;
+	int rc, val, flag, i;
+
+	server = calloc(1, sizeof(struct spdk_jsonrpc_server));
+	if (server == NULL) {
+		return NULL;
+	}
+
+	TAILQ_INIT(&server->free_conns);
+	TAILQ_INIT(&server->conns);
+
+	for (i = 0; i < SPDK_JSONRPC_MAX_CONNS; i++) {
+		TAILQ_INSERT_TAIL(&server->free_conns, &server->conns_array[i], link);
+	}
+
+	server->handle_request = handle_request;
+
+	server->sockfd = socket(domain, SOCK_STREAM, protocol);
+	if (server->sockfd < 0) {
+		SPDK_ERRLOG("socket() failed\n");
+		free(server);
+		return NULL;
+	}
+
+	val = 1;
+	setsockopt(server->sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+	if (protocol == IPPROTO_TCP) {
+		setsockopt(server->sockfd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
+	}
+
+	flag = fcntl(server->sockfd, F_GETFL);
+	if (fcntl(server->sockfd, F_SETFL, flag | O_NONBLOCK) < 0) {
+		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+			    server->sockfd, spdk_strerror(errno));
+		close(server->sockfd);
+		free(server);
+		return NULL;
+	}
+
+	rc = bind(server->sockfd, listen_addr, addrlen);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not bind JSON-RPC server: %s\n", spdk_strerror(errno));
+		close(server->sockfd);
+		free(server);
+		return NULL;
+	}
+
+	rc = listen(server->sockfd, 512);
+	if (rc != 0) {
+		SPDK_ERRLOG("listen() failed, errno = %d\n", errno);
+		close(server->sockfd);
+		free(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+void
+spdk_jsonrpc_server_shutdown(struct spdk_jsonrpc_server *server)
+{
+	struct spdk_jsonrpc_server_conn *conn;
+
+	close(server->sockfd);
+
+	TAILQ_FOREACH(conn, &server->conns, link) {
+		close(conn->sockfd);
+	}
+
+	free(server);
+}
+
+static void
+spdk_jsonrpc_server_conn_close(struct spdk_jsonrpc_server_conn *conn)
+{
+	conn->closed = true;
+
+	if (conn->sockfd >= 0) {
+		close(conn->sockfd);
+		conn->sockfd = -1;
+	}
+}
+
+static void
+spdk_jsonrpc_server_conn_remove(struct spdk_jsonrpc_server_conn *conn)
+{
+	struct spdk_jsonrpc_server *server = conn->server;
+
+	spdk_jsonrpc_server_conn_close(conn);
+
+	pthread_spin_destroy(&conn->queue_lock);
+	assert(STAILQ_EMPTY(&conn->send_queue));
+
+	TAILQ_REMOVE(&server->conns, conn, link);
+	TAILQ_INSERT_HEAD(&server->free_conns, conn, link);
+}
+
+static int
+spdk_jsonrpc_server_accept(struct spdk_jsonrpc_server *server)
+{
+	struct spdk_jsonrpc_server_conn *conn;
+	int rc, flag;
+
+	rc = accept(server->sockfd, NULL, NULL);
+	if (rc >= 0) {
+		conn = TAILQ_FIRST(&server->free_conns);
+		assert(conn != NULL);
+
+		conn->server = server;
+		conn->sockfd = rc;
+		conn->closed = false;
+		conn->recv_len = 0;
+		conn->outstanding_requests = 0;
+		pthread_spin_init(&conn->queue_lock, PTHREAD_PROCESS_PRIVATE);
+		STAILQ_INIT(&conn->send_queue);
+		conn->send_request = NULL;
+
+		flag = fcntl(conn->sockfd, F_GETFL);
+		if (fcntl(conn->sockfd, F_SETFL, flag | O_NONBLOCK) < 0) {
+			SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+				    conn->sockfd, spdk_strerror(errno));
+			close(conn->sockfd);
+			return -1;
+		}
+
+		TAILQ_REMOVE(&server->free_conns, conn, link);
+		TAILQ_INSERT_TAIL(&server->conns, conn, link);
+		return 0;
+	}
+
+	if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+		return 0;
+	}
+
+	return -1;
+}
+
+void
+spdk_jsonrpc_server_handle_request(struct spdk_jsonrpc_request *request,
+				   const struct spdk_json_val *method, const struct spdk_json_val *params)
+{
+	request->conn->server->handle_request(request, method, params);
+}
+
+void
+spdk_jsonrpc_server_handle_error(struct spdk_jsonrpc_request *request, int error)
+{
+	const char *msg;
+
+	switch (error) {
+	case SPDK_JSONRPC_ERROR_PARSE_ERROR:
+		msg = "Parse error";
+		break;
+
+	case SPDK_JSONRPC_ERROR_INVALID_REQUEST:
+		msg = "Invalid request";
+		break;
+
+	case SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND:
+		msg = "Method not found";
+		break;
+
+	case SPDK_JSONRPC_ERROR_INVALID_PARAMS:
+		msg = "Invalid parameters";
+		break;
+
+	case SPDK_JSONRPC_ERROR_INTERNAL_ERROR:
+		msg = "Internal error";
+		break;
+
+	default:
+		msg = "Error";
+		break;
+	}
+
+	spdk_jsonrpc_send_error_response(request, error, msg);
+}
+
+static int
+spdk_jsonrpc_server_conn_recv(struct spdk_jsonrpc_server_conn *conn)
+{
+	ssize_t rc;
+	size_t recv_avail = SPDK_JSONRPC_RECV_BUF_SIZE - conn->recv_len;
+
+	rc = recv(conn->sockfd, conn->recv_buf + conn->recv_len, recv_avail, 0);
+	if (rc == -1) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+			return 0;
+		}
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "recv() failed: %s\n", spdk_strerror(errno));
+		return -1;
+	}
+
+	if (rc == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_RPC, "remote closed connection\n");
+		return -1;
+	}
+
+	conn->recv_len += rc;
+
+	rc = spdk_jsonrpc_parse_request(conn, conn->recv_buf, conn->recv_len);
+	if (rc < 0) {
+		SPDK_ERRLOG("jsonrpc parse request failed\n");
+		return -1;
+	}
+
+	if (rc > 0) {
+		/*
+		 * Successfully parsed a request - move any data past the end of the
+		 * parsed request down to the beginning.
+		 */
+		assert((size_t)rc <= conn->recv_len);
+		memmove(conn->recv_buf, conn->recv_buf + rc, conn->recv_len - rc);
+		conn->recv_len -= rc;
+	}
+
+	return 0;
+}
+
+void
+spdk_jsonrpc_server_send_response(struct spdk_jsonrpc_request *request)
+{
+	struct spdk_jsonrpc_server_conn *conn = request->conn;
+
+	/* Queue the response to be sent */
+	pthread_spin_lock(&conn->queue_lock);
+	STAILQ_INSERT_TAIL(&conn->send_queue, request, link);
+	pthread_spin_unlock(&conn->queue_lock);
+}
+
+static struct spdk_jsonrpc_request *
+spdk_jsonrpc_server_dequeue_request(struct spdk_jsonrpc_server_conn *conn)
+{
+	struct spdk_jsonrpc_request *request = NULL;
+
+	pthread_spin_lock(&conn->queue_lock);
+	request = STAILQ_FIRST(&conn->send_queue);
+	if (request) {
+		STAILQ_REMOVE_HEAD(&conn->send_queue, link);
+	}
+	pthread_spin_unlock(&conn->queue_lock);
+	return request;
+}
+
+
+static int
+spdk_jsonrpc_server_conn_send(struct spdk_jsonrpc_server_conn *conn)
+{
+	struct spdk_jsonrpc_request *request;
+	ssize_t rc;
+
+more:
+	if (conn->outstanding_requests == 0) {
+		return 0;
+	}
+
+	if (conn->send_request == NULL) {
+		conn->send_request = spdk_jsonrpc_server_dequeue_request(conn);
+	}
+
+	request = conn->send_request;
+	if (request == NULL) {
+		/* Nothing to send right now */
+		return 0;
+	}
+
+	if (request->send_len > 0) {
+		rc = send(conn->sockfd, request->send_buf + request->send_offset,
+			  request->send_len, 0);
+		if (rc < 0) {
+			if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
+				return 0;
+			}
+
+			SPDK_DEBUGLOG(SPDK_LOG_RPC, "send() failed: %s\n", spdk_strerror(errno));
+			return -1;
+		}
+
+		request->send_offset += rc;
+		request->send_len -= rc;
+	}
+
+	if (request->send_len == 0) {
+		/*
+		 * Full response has been sent.
+		 * Free it and set send_request to NULL to move on to the next queued response.
+		 */
+		conn->send_request = NULL;
+		spdk_jsonrpc_free_request(request);
+		goto more;
+	}
+
+	return 0;
+}
+
+int
+spdk_jsonrpc_server_poll(struct spdk_jsonrpc_server *server)
+{
+	int rc;
+	struct spdk_jsonrpc_server_conn *conn, *conn_tmp;
+
+	TAILQ_FOREACH_SAFE(conn, &server->conns, link, conn_tmp) {
+		if (conn->closed) {
+			struct spdk_jsonrpc_request *request;
+
+			/*
+			 * The client closed the connection, but there may still be requests
+			 * outstanding; we have no way to cancel outstanding requests, so wait until
+			 * each outstanding request sends a response (which will be discarded, since
+			 * the connection is closed).
+			 */
+
+			if (conn->send_request) {
+				spdk_jsonrpc_free_request(conn->send_request);
+				conn->send_request = NULL;
+			}
+
+			while ((request = spdk_jsonrpc_server_dequeue_request(conn)) != NULL) {
+				spdk_jsonrpc_free_request(request);
+			}
+
+			if (conn->outstanding_requests == 0) {
+				SPDK_DEBUGLOG(SPDK_LOG_RPC, "all outstanding requests completed\n");
+				spdk_jsonrpc_server_conn_remove(conn);
+			}
+		}
+	}
+
+	/* Check listen socket */
+	if (!TAILQ_EMPTY(&server->free_conns)) {
+		spdk_jsonrpc_server_accept(server);
+	}
+
+	TAILQ_FOREACH(conn, &server->conns, link) {
+		if (conn->closed) {
+			continue;
+		}
+
+		rc = spdk_jsonrpc_server_conn_send(conn);
+		if (rc != 0) {
+			spdk_jsonrpc_server_conn_close(conn);
+			continue;
+		}
+
+		rc = spdk_jsonrpc_server_conn_recv(conn);
+		if (rc != 0) {
+			spdk_jsonrpc_server_conn_close(conn);
+			continue;
+		}
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/log/Makefile b/src/spdk/lib/log/Makefile
new file mode 100644
index 00000000..8125ebb1
--- /dev/null
+++ b/src/spdk/lib/log/Makefile
@@ -0,0 +1,45 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = log.c log_flags.c
+LIBNAME = log
+ifeq ($(CONFIG_LOG_BACKTRACE),y)
+LOCAL_SYS_LIBS += -lunwind
+endif
+
+DIRS-y = rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/log/log.c b/src/spdk/lib/log/log.c
new file mode 100644
index 00000000..9f4546ce
--- /dev/null
+++ b/src/spdk/lib/log/log.c
@@ -0,0 +1,189 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/log.h"
+
+#ifdef SPDK_LOG_BACKTRACE_LVL
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+#endif
+
+static const char *const spdk_level_names[] = {
+	[SPDK_LOG_ERROR]	= "ERROR",
+	[SPDK_LOG_WARN]		= "WARNING",
+	[SPDK_LOG_NOTICE]	= "NOTICE",
+	[SPDK_LOG_INFO]		= "INFO",
+	[SPDK_LOG_DEBUG]	= "DEBUG",
+};
+
+#define MAX_TMPBUF 1024
+
+void
+spdk_log_open(void)
+{
+	openlog("spdk", LOG_PID, LOG_LOCAL7);
+}
+
+void
+spdk_log_close(void)
+{
+	closelog();
+}
+
+#ifdef SPDK_LOG_BACKTRACE_LVL
+static void
+spdk_log_unwind_stack(FILE *fp, enum spdk_log_level level)
+{
+	unw_error_t err;
+	unw_cursor_t cursor;
+	unw_context_t uc;
+	unw_word_t ip;
+	unw_word_t offp;
+	char f_name[64];
+	int frame;
+
+	if (level > g_spdk_log_backtrace_level) {
+		return;
+	}
+
+	unw_getcontext(&uc);
+	unw_init_local(&cursor, &uc);
+	fprintf(fp, "*%s*: === BACKTRACE START ===\n", spdk_level_names[level]);
+
+	unw_step(&cursor);
+	for (frame = 1; unw_step(&cursor) > 0; frame++) {
+		unw_get_reg(&cursor, UNW_REG_IP, &ip);
+		err = unw_get_proc_name(&cursor, f_name, sizeof(f_name), &offp);
+		if (err || strcmp(f_name, "main") == 0) {
+			break;
+		}
+
+		fprintf(fp, "*%s*: %3d: %*s%s() at %#lx\n", spdk_level_names[level], frame, frame - 1, "", f_name,
+			(unsigned long)ip);
+	}
+	fprintf(fp, "*%s*: === BACKTRACE END ===\n", spdk_level_names[level]);
+}
+
+#else
+#define spdk_log_unwind_stack(fp, lvl)
+#endif
+
+void
+spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func,
+	 const char *format, ...)
+{
+	int severity = LOG_INFO;
+	char buf[MAX_TMPBUF];
+	va_list ap;
+
+	switch (level) {
+	case SPDK_LOG_ERROR:
+		severity = LOG_ERR;
+		break;
+	case SPDK_LOG_WARN:
+		severity = LOG_WARNING;
+		break;
+	case SPDK_LOG_NOTICE:
+		severity = LOG_NOTICE;
+		break;
+	case SPDK_LOG_INFO:
+	case SPDK_LOG_DEBUG:
+		severity = LOG_INFO;
+		break;
+	case SPDK_LOG_DISABLED:
+		return;
+	}
+
+	va_start(ap, format);
+
+	vsnprintf(buf, sizeof(buf), format, ap);
+
+	if (level <= g_spdk_log_print_level) {
+		fprintf(stderr, "%s:%4d:%s: *%s*: %s", file, line, func, spdk_level_names[level], buf);
+		spdk_log_unwind_stack(stderr, level);
+	}
+
+	if (level <= g_spdk_log_level) {
+		syslog(severity, "%s:%4d:%s: *%s*: %s", file, line, func, spdk_level_names[level], buf);
+	}
+
+	va_end(ap);
+}
+
+static void
+fdump(FILE *fp, const char *label, const uint8_t *buf, size_t len)
+{
+	char tmpbuf[MAX_TMPBUF];
+	char buf16[16 + 1];
+	size_t total;
+	unsigned int idx;
+
+	fprintf(fp, "%s\n", label);
+
+	memset(buf16, 0, sizeof buf16);
+	total = 0;
+	for (idx = 0; idx < len; idx++) {
+		if (idx != 0 && idx % 16 == 0) {
+			snprintf(tmpbuf + total, sizeof tmpbuf - total,
+				 " %s", buf16);
+			fprintf(fp, "%s\n", tmpbuf);
+			total = 0;
+		}
+		if (idx % 16 == 0) {
+			total += snprintf(tmpbuf + total, sizeof tmpbuf - total,
+					  "%08x ", idx);
+		}
+		if (idx % 8 == 0) {
+			total += snprintf(tmpbuf + total, sizeof tmpbuf - total,
+					  "%s", " ");
+		}
+		total += snprintf(tmpbuf + total, sizeof tmpbuf - total,
+				  "%2.2x ", buf[idx] & 0xff);
+		buf16[idx % 16] = isprint(buf[idx]) ? buf[idx] : '.';
+	}
+	for (; idx % 16 != 0; idx++) {
+		total += snprintf(tmpbuf + total, sizeof tmpbuf - total, "   ");
+		buf16[idx % 16] = ' ';
+	}
+	snprintf(tmpbuf + total, sizeof tmpbuf - total, "  %s", buf16);
+	fprintf(fp, "%s\n", tmpbuf);
+	fflush(fp);
+}
+
+void
+spdk_trace_dump(FILE *fp, const char *label, const void *buf, size_t len)
+{
+	fdump(fp, label, buf, len);
+}
diff --git a/src/spdk/lib/log/log_flags.c b/src/spdk/lib/log/log_flags.c
new file mode 100644
index 00000000..1b766c44
--- /dev/null
+++ b/src/spdk/lib/log/log_flags.c
@@ -0,0 +1,196 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/log.h"
+
+static TAILQ_HEAD(, spdk_trace_flag) g_trace_flags = TAILQ_HEAD_INITIALIZER(g_trace_flags);
+
+enum spdk_log_level g_spdk_log_level = SPDK_LOG_NOTICE;
+enum spdk_log_level g_spdk_log_print_level = SPDK_LOG_NOTICE;
+enum spdk_log_level g_spdk_log_backtrace_level = SPDK_LOG_DISABLED;
+
+SPDK_LOG_REGISTER_COMPONENT("log", SPDK_LOG_LOG)
+
+#define MAX_TMPBUF 1024
+
+void
+spdk_log_set_level(enum spdk_log_level level)
+{
+	g_spdk_log_level = level;
+}
+
+enum spdk_log_level
+spdk_log_get_level(void) {
+	return g_spdk_log_level;
+}
+
+void
+spdk_log_set_print_level(enum spdk_log_level level)
+{
+	g_spdk_log_print_level = level;
+}
+
+enum spdk_log_level
+spdk_log_get_print_level(void) {
+	return g_spdk_log_print_level;
+}
+
+void
+spdk_log_set_backtrace_level(enum spdk_log_level level)
+{
+	g_spdk_log_backtrace_level = level;
+}
+
+enum spdk_log_level
+spdk_log_get_backtrace_level(void) {
+	return g_spdk_log_backtrace_level;
+}
+
+static struct spdk_trace_flag *
+get_trace_flag(const char *name)
+{
+	struct spdk_trace_flag *flag;
+
+	TAILQ_FOREACH(flag, &g_trace_flags, tailq) {
+		if (strcasecmp(name, flag->name) == 0) {
+			return flag;
+		}
+	}
+
+	return NULL;
+}
+
+void
+spdk_log_register_trace_flag(const char *name, struct spdk_trace_flag *flag)
+{
+	struct spdk_trace_flag *iter;
+
+	if (name == NULL || flag == NULL) {
+		SPDK_ERRLOG("missing spdk_trace_flag parameters\n");
+		assert(false);
+		return;
+	}
+
+	if (get_trace_flag(name)) {
+		SPDK_ERRLOG("duplicate spdk_trace_flag '%s'\n", name);
+		assert(false);
+		return;
+	}
+
+	TAILQ_FOREACH(iter, &g_trace_flags, tailq) {
+		if (strcasecmp(iter->name, flag->name) > 0) {
+			TAILQ_INSERT_BEFORE(iter, flag, tailq);
+			return;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&g_trace_flags, flag, tailq);
+}
+
+bool
+spdk_log_get_trace_flag(const char *name)
+{
+	struct spdk_trace_flag *flag = get_trace_flag(name);
+
+	if (flag && flag->enabled) {
+		return true;
+	}
+
+	return false;
+}
+
+static int
+set_trace_flag(const char *name, bool value)
+{
+	struct spdk_trace_flag *flag;
+
+	if (strcasecmp(name, "all") == 0) {
+		TAILQ_FOREACH(flag, &g_trace_flags, tailq) {
+			flag->enabled = value;
+		}
+		return 0;
+	}
+
+	flag = get_trace_flag(name);
+	if (flag == NULL) {
+		return -1;
+	}
+
+	flag->enabled = value;
+
+	return 0;
+}
+
+int
+spdk_log_set_trace_flag(const char *name)
+{
+	return set_trace_flag(name, true);
+}
+
+int
+spdk_log_clear_trace_flag(const char *name)
+{
+	return set_trace_flag(name, false);
+}
+
+struct spdk_trace_flag *
+spdk_log_get_first_trace_flag(void)
+{
+	return TAILQ_FIRST(&g_trace_flags);
+}
+
+struct spdk_trace_flag *
+spdk_log_get_next_trace_flag(struct spdk_trace_flag *flag)
+{
+	return TAILQ_NEXT(flag, tailq);
+}
+
+void
+spdk_tracelog_usage(FILE *f, const char *trace_arg)
+{
+#ifdef DEBUG
+	struct spdk_trace_flag *flag;
+	fprintf(f, " %s, --traceflag <flag>    enable debug log flag (all", trace_arg);
+
+	TAILQ_FOREACH(flag, &g_trace_flags, tailq) {
+		fprintf(f, ", %s", flag->name);
+	}
+
+	fprintf(f, ")\n");
+#else
+	fprintf(f, " %s, --traceflag <flag>    enable debug log flag (not supported"
+		" - must rebuild with --enable-debug)\n", trace_arg);
+#endif
+}
diff --git a/src/spdk/lib/log/rpc/Makefile b/src/spdk/lib/log/rpc/Makefile
new file mode 100644
index 00000000..bf53a64c
--- /dev/null
+++ b/src/spdk/lib/log/rpc/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = log_rpc.c
+LIBNAME = log_rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/log/rpc/log_rpc.c b/src/spdk/lib/log/rpc/log_rpc.c
new file mode 100644
index 00000000..fea7607a
--- /dev/null
+++ b/src/spdk/lib/log/rpc/log_rpc.c
@@ -0,0 +1,336 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_trace_flag {
+	char *flag;
+};
+
+struct rpc_log_level {
+	char *level;
+};
+
+static void
+free_rpc_trace_flag(struct rpc_trace_flag *p)
+{
+	free(p->flag);
+}
+
+static void
+free_rpc_log_level(struct rpc_log_level *p)
+{
+	free(p->level);
+}
+
+static const struct spdk_json_object_decoder rpc_trace_flag_decoders[] = {
+	{"flag", offsetof(struct rpc_trace_flag, flag), spdk_json_decode_string},
+};
+
+static const struct spdk_json_object_decoder rpc_log_level_decoders[] = {
+	{"level", offsetof(struct rpc_log_level, level), spdk_json_decode_string},
+};
+
+static int
+_parse_log_level(char *level)
+{
+	if (!strcasecmp(level, "ERROR")) {
+		return SPDK_LOG_ERROR;
+	} else if (!strcasecmp(level, "WARNING")) {
+		return SPDK_LOG_WARN;
+	} else if (!strcasecmp(level, "NOTICE")) {
+		return SPDK_LOG_NOTICE;
+	} else if (!strcasecmp(level, "INFO")) {
+		return SPDK_LOG_INFO;
+	} else if (!strcasecmp(level, "DEBUG")) {
+		return SPDK_LOG_DEBUG;
+	}
+	return -1;
+}
+
+static const char *
+_get_log_level_name(int level)
+{
+	if (level == SPDK_LOG_ERROR) {
+		return "ERROR";
+	} else if (level == SPDK_LOG_WARN) {
+		return "WARNING";
+	} else if (level == SPDK_LOG_NOTICE) {
+		return "NOTICE";
+	} else if (level == SPDK_LOG_INFO) {
+		return "INFO";
+	} else if (level == SPDK_LOG_DEBUG) {
+		return "DEBUG";
+	}
+	return NULL;
+}
+
+static void
+spdk_rpc_set_log_print_level(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct rpc_log_level req = {};
+	int level;
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_log_level_decoders,
+				    SPDK_COUNTOF(rpc_log_level_decoders), &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	level = _parse_log_level(req.level);
+	if (level == -1) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "try to set invalid log level\n");
+		goto invalid;
+	}
+
+	spdk_log_set_print_level(level);
+	free_rpc_log_level(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_log_level(&req);
+}
+SPDK_RPC_REGISTER("set_log_print_level", spdk_rpc_set_log_print_level,
+		  SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_log_print_level(struct spdk_jsonrpc_request *request,
+			     const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	int level;
+	const char *name;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_trace_flags requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	level = spdk_log_get_print_level();
+	name = _get_log_level_name(level);
+	spdk_json_write_string(w, name);
+
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_log_print_level", spdk_rpc_get_log_print_level,
+		  SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_set_log_level(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	struct rpc_log_level req = {};
+	int level;
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_log_level_decoders,
+				    SPDK_COUNTOF(rpc_log_level_decoders), &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	level = _parse_log_level(req.level);
+	if (level == -1) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "try to set invalid log level\n");
+		goto invalid;
+	}
+
+
+	spdk_log_set_level(level);
+	free_rpc_log_level(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_log_level(&req);
+}
+SPDK_RPC_REGISTER("set_log_level", spdk_rpc_set_log_level, SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_log_level(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	int level;
+	const char *name;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_trace_flags requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	level = spdk_log_get_level();
+	name = _get_log_level_name(level);
+	spdk_json_write_string(w, name);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_log_level", spdk_rpc_get_log_level, SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_set_trace_flag(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_trace_flag req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_trace_flag_decoders,
+				    SPDK_COUNTOF(rpc_trace_flag_decoders), &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.flag == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "flag was 0\n");
+		goto invalid;
+	}
+
+	spdk_log_set_trace_flag(req.flag);
+	free_rpc_trace_flag(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_trace_flag(&req);
+}
+SPDK_RPC_REGISTER("set_trace_flag", spdk_rpc_set_trace_flag, SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_clear_trace_flag(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct rpc_trace_flag req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_trace_flag_decoders,
+				    SPDK_COUNTOF(rpc_trace_flag_decoders), &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.flag == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_LOG, "flag was 0\n");
+		goto invalid;
+	}
+
+	spdk_log_clear_trace_flag(req.flag);
+	free_rpc_trace_flag(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_trace_flag(&req);
+}
+SPDK_RPC_REGISTER("clear_trace_flag", spdk_rpc_clear_trace_flag,
+		  SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_trace_flags(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_trace_flag *flag;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_trace_flags requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_object_begin(w);
+	flag = spdk_log_get_first_trace_flag();
+	while (flag) {
+		spdk_json_write_name(w, flag->name);
+		spdk_json_write_bool(w, flag->enabled);
+		flag = spdk_log_get_next_trace_flag(flag);
+	}
+	spdk_json_write_object_end(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_trace_flags", spdk_rpc_get_trace_flags, SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/lvol/Makefile b/src/spdk/lib/lvol/Makefile
new file mode 100644
index 00000000..49076fba
--- /dev/null
+++ b/src/spdk/lib/lvol/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = lvol.c
+LIBNAME = lvol
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/lvol/lvol.c b/src/spdk/lib/lvol/lvol.c
new file mode 100644
index 00000000..060cc89f
--- /dev/null
+++ b/src/spdk/lib/lvol/lvol.c
@@ -0,0 +1,1494 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk_internal/lvolstore.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/blob_bdev.h"
+#include "spdk/util.h"
+
+/* Default blob channel opts for lvol */
+#define SPDK_LVOL_BLOB_OPTS_CHANNEL_OPS 512
+
+#define LVOL_NAME "name"
+
+SPDK_LOG_REGISTER_COMPONENT("lvol", SPDK_LOG_LVOL)
+
+static TAILQ_HEAD(, spdk_lvol_store) g_lvol_stores = TAILQ_HEAD_INITIALIZER(g_lvol_stores);
+static pthread_mutex_t g_lvol_stores_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static inline uint64_t
+divide_round_up(uint64_t num, uint64_t divisor)
+{
+	return (num + divisor - 1) / divisor;
+}
+
+static int
+_spdk_add_lvs_to_list(struct spdk_lvol_store *lvs)
+{
+	struct spdk_lvol_store *tmp;
+	bool name_conflict = false;
+
+	pthread_mutex_lock(&g_lvol_stores_mutex);
+	TAILQ_FOREACH(tmp, &g_lvol_stores, link) {
+		if (!strncmp(lvs->name, tmp->name, SPDK_LVS_NAME_MAX)) {
+			name_conflict = true;
+			break;
+		}
+	}
+	if (!name_conflict) {
+		lvs->on_list = true;
+		TAILQ_INSERT_TAIL(&g_lvol_stores, lvs, link);
+	}
+	pthread_mutex_unlock(&g_lvol_stores_mutex);
+
+	return name_conflict ? -1 : 0;
+}
+
+static void
+_spdk_lvs_free(struct spdk_lvol_store *lvs)
+{
+	if (lvs->on_list) {
+		TAILQ_REMOVE(&g_lvol_stores, lvs, link);
+	}
+	free(lvs);
+}
+
+static void
+_spdk_lvol_free(struct spdk_lvol *lvol)
+{
+	free(lvol->unique_id);
+	free(lvol);
+}
+
+static void
+_spdk_lvol_open_cb(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvol_with_handle_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+
+	if (lvolerrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Failed to open lvol %s\n", lvol->unique_id);
+		goto end;
+	}
+
+	lvol->ref_count++;
+	lvol->blob = blob;
+end:
+	req->cb_fn(req->cb_arg, lvol, lvolerrno);
+	free(req);
+}
+
+void
+spdk_lvol_open(struct spdk_lvol *lvol, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_with_handle_req *req;
+
+	assert(cb_fn != NULL);
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		cb_fn(cb_arg, NULL, -ENODEV);
+		return;
+	}
+
+	if (lvol->action_in_progress == true) {
+		SPDK_ERRLOG("Cannot open lvol - operations on lvol pending\n");
+		cb_fn(cb_arg, lvol, -EBUSY);
+		return;
+	}
+
+	if (lvol->ref_count > 0) {
+		lvol->ref_count++;
+		cb_fn(cb_arg, lvol, 0);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for request structure\n");
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol = lvol;
+
+	spdk_bs_open_blob(lvol->lvol_store->blobstore, lvol->blob_id, _spdk_lvol_open_cb, req);
+}
+
+static void
+_spdk_bs_unload_with_error_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+
+	req->cb_fn(req->cb_arg, NULL, req->lvserrno);
+	free(req);
+}
+
+static void
+_spdk_load_next_lvol(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs = lvs->blobstore;
+	struct spdk_lvol *lvol, *tmp;
+	spdk_blob_id blob_id;
+	char uuid[SPDK_UUID_STRING_LEN];
+	const char *attr;
+	size_t value_len;
+	int rc;
+
+	if (lvolerrno == -ENOENT) {
+		/* Finished iterating */
+		req->cb_fn(req->cb_arg, lvs, 0);
+		free(req);
+		return;
+	} else if (lvolerrno < 0) {
+		SPDK_ERRLOG("Failed to fetch blobs list\n");
+		req->lvserrno = lvolerrno;
+		goto invalid;
+	}
+
+	blob_id = spdk_blob_get_id(blob);
+
+	if (blob_id == lvs->super_blob_id) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "found superblob %"PRIu64"\n", (uint64_t)blob_id);
+		spdk_bs_iter_next(bs, blob, _spdk_load_next_lvol, req);
+		return;
+	}
+
+	lvol = calloc(1, sizeof(*lvol));
+	if (!lvol) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol base pointer\n");
+		req->lvserrno = -ENOMEM;
+		goto invalid;
+	}
+
+	lvol->blob = blob;
+	lvol->blob_id = blob_id;
+	lvol->lvol_store = lvs;
+
+	rc = spdk_blob_get_xattr_value(blob, "uuid", (const void **)&attr, &value_len);
+	if (rc != 0 || value_len != SPDK_UUID_STRING_LEN || attr[SPDK_UUID_STRING_LEN - 1] != '\0' ||
+	    spdk_uuid_parse(&lvol->uuid, attr) != 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Missing or corrupt lvol uuid\n");
+		memset(&lvol->uuid, 0, sizeof(lvol->uuid));
+	}
+	spdk_uuid_fmt_lower(lvol->uuid_str, sizeof(lvol->uuid_str), &lvol->uuid);
+
+	if (!spdk_mem_all_zero(&lvol->uuid, sizeof(lvol->uuid))) {
+		lvol->unique_id = strdup(lvol->uuid_str);
+	} else {
+		spdk_uuid_fmt_lower(uuid, sizeof(uuid), &lvol->lvol_store->uuid);
+		lvol->unique_id = spdk_sprintf_alloc("%s_%"PRIu64, uuid, (uint64_t)blob_id);
+	}
+	if (!lvol->unique_id) {
+		SPDK_ERRLOG("Cannot assign lvol name\n");
+		free(lvol);
+		req->lvserrno = -ENOMEM;
+		goto invalid;
+	}
+
+	rc = spdk_blob_get_xattr_value(blob, "name", (const void **)&attr, &value_len);
+	if (rc != 0 || value_len > SPDK_LVOL_NAME_MAX) {
+		SPDK_ERRLOG("Cannot assign lvol name\n");
+		_spdk_lvol_free(lvol);
+		req->lvserrno = -EINVAL;
+		goto invalid;
+	}
+
+	snprintf(lvol->name, sizeof(lvol->name), "%s", attr);
+
+	TAILQ_INSERT_TAIL(&lvs->lvols, lvol, link);
+
+	lvs->lvol_count++;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "added lvol %s (%s)\n", lvol->unique_id, lvol->uuid_str);
+
+	spdk_bs_iter_next(bs, blob, _spdk_load_next_lvol, req);
+
+	return;
+
+invalid:
+	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+		TAILQ_REMOVE(&lvs->lvols, lvol, link);
+		free(lvol->unique_id);
+		free(lvol);
+	}
+
+	_spdk_lvs_free(lvs);
+	spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+}
+
+static void
+_spdk_close_super_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs = lvs->blobstore;
+
+	if (lvolerrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Could not close super blob\n");
+		_spdk_lvs_free(lvs);
+		req->lvserrno = -ENODEV;
+		spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+		return;
+	}
+
+	/* Start loading lvols */
+	spdk_bs_iter_first(lvs->blobstore, _spdk_load_next_lvol, req);
+}
+
+static void
+_spdk_close_super_blob_with_error_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs = lvs->blobstore;
+
+	_spdk_lvs_free(lvs);
+
+	spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+}
+
+static void
+_spdk_lvs_read_uuid(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs = lvs->blobstore;
+	const char *attr;
+	size_t value_len;
+	int rc;
+
+	if (lvolerrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Could not open super blob\n");
+		_spdk_lvs_free(lvs);
+		req->lvserrno = -ENODEV;
+		spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+		return;
+	}
+
+	rc = spdk_blob_get_xattr_value(blob, "uuid", (const void **)&attr, &value_len);
+	if (rc != 0 || value_len != SPDK_UUID_STRING_LEN || attr[SPDK_UUID_STRING_LEN - 1] != '\0') {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "missing or incorrect UUID\n");
+		req->lvserrno = -EINVAL;
+		spdk_blob_close(blob, _spdk_close_super_blob_with_error_cb, req);
+		return;
+	}
+
+	if (spdk_uuid_parse(&lvs->uuid, attr)) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "incorrect UUID '%s'\n", attr);
+		req->lvserrno = -EINVAL;
+		spdk_blob_close(blob, _spdk_close_super_blob_with_error_cb, req);
+		return;
+	}
+
+	rc = spdk_blob_get_xattr_value(blob, "name", (const void **)&attr, &value_len);
+	if (rc != 0 || value_len > SPDK_LVS_NAME_MAX) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "missing or invalid name\n");
+		req->lvserrno = -EINVAL;
+		spdk_blob_close(blob, _spdk_close_super_blob_with_error_cb, req);
+		return;
+	}
+
+	snprintf(lvs->name, sizeof(lvs->name), "%s", attr);
+
+	rc = _spdk_add_lvs_to_list(lvs);
+	if (rc) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "lvolstore with name %s already exists\n", lvs->name);
+		req->lvserrno = -EEXIST;
+		spdk_blob_close(blob, _spdk_close_super_blob_with_error_cb, req);
+		return;
+	}
+
+	lvs->super_blob_id = spdk_blob_get_id(blob);
+
+	spdk_blob_close(blob, _spdk_close_super_cb, req);
+}
+
+static void
+_spdk_lvs_open_super(void *cb_arg, spdk_blob_id blobid, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs = lvs->blobstore;
+
+	if (lvolerrno != 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Super blob not found\n");
+		_spdk_lvs_free(lvs);
+		req->lvserrno = -ENODEV;
+		spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+		return;
+	}
+
+	spdk_bs_open_blob(bs, blobid, _spdk_lvs_read_uuid, req);
+}
+
+static void
+_spdk_lvs_load_cb(void *cb_arg, struct spdk_blob_store *bs, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)cb_arg;
+	struct spdk_lvol_store *lvs;
+
+	if (lvolerrno != 0) {
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		free(req);
+		return;
+	}
+
+	lvs = calloc(1, sizeof(*lvs));
+	if (lvs == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol store\n");
+		spdk_bs_unload(bs, _spdk_bs_unload_with_error_cb, req);
+		return;
+	}
+
+	lvs->blobstore = bs;
+	lvs->bs_dev = req->bs_dev;
+	TAILQ_INIT(&lvs->lvols);
+	TAILQ_INIT(&lvs->pending_lvols);
+
+	req->lvol_store = lvs;
+
+	spdk_bs_get_super(bs, _spdk_lvs_open_super, req);
+}
+
+static void
+spdk_lvs_bs_opts_init(struct spdk_bs_opts *opts)
+{
+	spdk_bs_opts_init(opts);
+	opts->max_channel_ops = SPDK_LVOL_BLOB_OPTS_CHANNEL_OPS;
+}
+
+void
+spdk_lvs_load(struct spdk_bs_dev *bs_dev, spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvs_with_handle_req *req;
+	struct spdk_bs_opts opts = {};
+
+	assert(cb_fn != NULL);
+
+	if (bs_dev == NULL) {
+		SPDK_ERRLOG("Blobstore device does not exist\n");
+		cb_fn(cb_arg, NULL, -ENODEV);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for request structure\n");
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->bs_dev = bs_dev;
+
+	spdk_lvs_bs_opts_init(&opts);
+	snprintf(opts.bstype.bstype, sizeof(opts.bstype.bstype), "LVOLSTORE");
+
+	spdk_bs_load(bs_dev, &opts, _spdk_lvs_load_cb, req);
+}
+
+static void
+_spdk_super_create_close_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+
+	if (lvolerrno < 0) {
+		SPDK_ERRLOG("Lvol store init failed: could not close super blob\n");
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		_spdk_lvs_free(lvs);
+		free(req);
+		return;
+	}
+
+	req->cb_fn(req->cb_arg, lvs, lvolerrno);
+	free(req);
+}
+
+static void
+_spdk_super_blob_set_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob *blob = lvs->super_blob;
+
+	if (lvolerrno < 0) {
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		SPDK_ERRLOG("Lvol store init failed: could not set uuid for super blob\n");
+		_spdk_lvs_free(lvs);
+		free(req);
+		return;
+	}
+
+	spdk_blob_close(blob, _spdk_super_create_close_cb, req);
+}
+
+static void
+_spdk_super_blob_init_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob *blob = lvs->super_blob;
+	char uuid[SPDK_UUID_STRING_LEN];
+
+	if (lvolerrno < 0) {
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		SPDK_ERRLOG("Lvol store init failed: could not set super blob\n");
+		_spdk_lvs_free(lvs);
+		free(req);
+		return;
+	}
+
+	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &lvs->uuid);
+
+	spdk_blob_set_xattr(blob, "uuid", uuid, sizeof(uuid));
+	spdk_blob_set_xattr(blob, "name", lvs->name, strnlen(lvs->name, SPDK_LVS_NAME_MAX) + 1);
+	spdk_blob_sync_md(blob, _spdk_super_blob_set_cb, req);
+}
+
+static void
+_spdk_super_blob_create_open_cb(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+
+	if (lvolerrno < 0) {
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		SPDK_ERRLOG("Lvol store init failed: could not open super blob\n");
+		_spdk_lvs_free(lvs);
+		free(req);
+		return;
+	}
+
+	lvs->super_blob = blob;
+	lvs->super_blob_id = spdk_blob_get_id(blob);
+
+	spdk_bs_set_super(lvs->blobstore, lvs->super_blob_id, _spdk_super_blob_init_cb, req);
+}
+
+static void
+_spdk_super_blob_create_cb(void *cb_arg, spdk_blob_id blobid, int lvolerrno)
+{
+	struct spdk_lvs_with_handle_req *req = cb_arg;
+	struct spdk_lvol_store *lvs = req->lvol_store;
+	struct spdk_blob_store *bs;
+
+	if (lvolerrno < 0) {
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		SPDK_ERRLOG("Lvol store init failed: could not create super blob\n");
+		_spdk_lvs_free(lvs);
+		free(req);
+		return;
+	}
+
+	bs = req->lvol_store->blobstore;
+
+	spdk_bs_open_blob(bs, blobid, _spdk_super_blob_create_open_cb, req);
+}
+
+static void
+_spdk_lvs_init_cb(void *cb_arg, struct spdk_blob_store *bs, int lvserrno)
+{
+	struct spdk_lvs_with_handle_req *lvs_req = cb_arg;
+	struct spdk_lvol_store *lvs = lvs_req->lvol_store;
+
+	if (lvserrno != 0) {
+		assert(bs == NULL);
+		lvs_req->cb_fn(lvs_req->cb_arg, NULL, lvserrno);
+		SPDK_ERRLOG("Lvol store init failed: could not initialize blobstore\n");
+		_spdk_lvs_free(lvs);
+		free(lvs_req);
+		return;
+	}
+
+	assert(bs != NULL);
+	lvs->blobstore = bs;
+	TAILQ_INIT(&lvs->lvols);
+	TAILQ_INIT(&lvs->pending_lvols);
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol store initialized\n");
+
+	/* create super blob */
+	spdk_bs_create_blob(lvs->blobstore, _spdk_super_blob_create_cb, lvs_req);
+}
+
+void
+spdk_lvs_opts_init(struct spdk_lvs_opts *o)
+{
+	o->cluster_sz = SPDK_LVS_OPTS_CLUSTER_SZ;
+	memset(o->name, 0, sizeof(o->name));
+}
+
+static void
+_spdk_setup_lvs_opts(struct spdk_bs_opts *bs_opts, struct spdk_lvs_opts *o)
+{
+	assert(o != NULL);
+	spdk_lvs_bs_opts_init(bs_opts);
+	bs_opts->cluster_sz = o->cluster_sz;
+}
+
+int
+spdk_lvs_init(struct spdk_bs_dev *bs_dev, struct spdk_lvs_opts *o,
+	      spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_store *lvs;
+	struct spdk_lvs_with_handle_req *lvs_req;
+	struct spdk_bs_opts opts = {};
+	int rc;
+
+	if (bs_dev == NULL) {
+		SPDK_ERRLOG("Blobstore device does not exist\n");
+		return -ENODEV;
+	}
+
+	if (o == NULL) {
+		SPDK_ERRLOG("spdk_lvs_opts not specified\n");
+		return -EINVAL;
+	}
+
+	_spdk_setup_lvs_opts(&opts, o);
+
+	if (strnlen(o->name, SPDK_LVS_NAME_MAX) == SPDK_LVS_NAME_MAX) {
+		SPDK_ERRLOG("Name has no null terminator.\n");
+		return -EINVAL;
+	}
+
+	if (strnlen(o->name, SPDK_LVS_NAME_MAX) == 0) {
+		SPDK_ERRLOG("No name specified.\n");
+		return -EINVAL;
+	}
+
+	lvs = calloc(1, sizeof(*lvs));
+	if (!lvs) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol store base pointer\n");
+		return -ENOMEM;
+	}
+
+	spdk_uuid_generate(&lvs->uuid);
+	snprintf(lvs->name, sizeof(lvs->name), "%s", o->name);
+
+	rc = _spdk_add_lvs_to_list(lvs);
+	if (rc) {
+		SPDK_ERRLOG("lvolstore with name %s already exists\n", lvs->name);
+		_spdk_lvs_free(lvs);
+		return -EEXIST;
+	}
+
+	lvs_req = calloc(1, sizeof(*lvs_req));
+	if (!lvs_req) {
+		_spdk_lvs_free(lvs);
+		SPDK_ERRLOG("Cannot alloc memory for lvol store request pointer\n");
+		return -ENOMEM;
+	}
+
+	assert(cb_fn != NULL);
+	lvs_req->cb_fn = cb_fn;
+	lvs_req->cb_arg = cb_arg;
+	lvs_req->lvol_store = lvs;
+	lvs->bs_dev = bs_dev;
+	lvs->destruct = false;
+
+	snprintf(opts.bstype.bstype, sizeof(opts.bstype.bstype), "LVOLSTORE");
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Initializing lvol store\n");
+	spdk_bs_init(bs_dev, &opts, _spdk_lvs_init_cb, lvs_req);
+
+	return 0;
+}
+
+static void
+_spdk_lvs_rename_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_req *req = cb_arg;
+
+	if (lvolerrno != 0) {
+		req->lvserrno = lvolerrno;
+	}
+	if (req->lvserrno != 0) {
+		SPDK_ERRLOG("Lvol store rename operation failed\n");
+		/* Lvs renaming failed, so we should 'clear' new_name.
+		 * Otherwise it could cause a failure on the next attepmt to change the name to 'new_name'  */
+		snprintf(req->lvol_store->new_name,
+			 sizeof(req->lvol_store->new_name),
+			 "%s", req->lvol_store->name);
+	} else {
+		/* Update lvs name with new_name */
+		snprintf(req->lvol_store->name,
+			 sizeof(req->lvol_store->name),
+			 "%s", req->lvol_store->new_name);
+	}
+
+	req->cb_fn(req->cb_arg, req->lvserrno);
+	free(req);
+}
+
+static void
+_spdk_lvs_rename_sync_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvs_req *req = cb_arg;
+	struct spdk_blob *blob = req->lvol_store->super_blob;
+
+	if (lvolerrno < 0) {
+		req->lvserrno = lvolerrno;
+	}
+
+	spdk_blob_close(blob, _spdk_lvs_rename_cb, req);
+}
+
+static void
+_spdk_lvs_rename_open_cb(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvs_req *req = cb_arg;
+	int rc;
+
+	if (lvolerrno < 0) {
+		_spdk_lvs_rename_cb(cb_arg, lvolerrno);
+		return;
+	}
+
+	rc = spdk_blob_set_xattr(blob, "name", req->lvol_store->new_name,
+				 strlen(req->lvol_store->new_name) + 1);
+	if (rc < 0) {
+		req->lvserrno = rc;
+		_spdk_lvs_rename_sync_cb(req, rc);
+		return;
+	}
+
+	req->lvol_store->super_blob = blob;
+
+	spdk_blob_sync_md(blob, _spdk_lvs_rename_sync_cb, req);
+}
+
+void
+spdk_lvs_rename(struct spdk_lvol_store *lvs, const char *new_name,
+		spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvs_req *req;
+	struct spdk_lvol_store *tmp;
+
+	/* Check if new name is current lvs name.
+	 * If so, return success immediately */
+	if (strncmp(lvs->name, new_name, SPDK_LVS_NAME_MAX) == 0) {
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	/* Check if new or new_name is already used in other lvs */
+	TAILQ_FOREACH(tmp, &g_lvol_stores, link) {
+		if (!strncmp(new_name, tmp->name, SPDK_LVS_NAME_MAX) ||
+		    !strncmp(new_name, tmp->new_name, SPDK_LVS_NAME_MAX)) {
+			cb_fn(cb_arg, -EEXIST);
+			return;
+		}
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	snprintf(lvs->new_name, sizeof(lvs->new_name), "%s", new_name);
+	req->lvol_store = lvs;
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_bs_open_blob(lvs->blobstore, lvs->super_blob_id, _spdk_lvs_rename_open_cb, req);
+}
+
+static void
+_lvs_unload_cb(void *cb_arg, int lvserrno)
+{
+	struct spdk_lvs_req *lvs_req = cb_arg;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol store unloaded\n");
+	assert(lvs_req->cb_fn != NULL);
+	lvs_req->cb_fn(lvs_req->cb_arg, lvserrno);
+	free(lvs_req);
+}
+
+int
+spdk_lvs_unload(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn,
+		void *cb_arg)
+{
+	struct spdk_lvs_req *lvs_req;
+	struct spdk_lvol *lvol, *tmp;
+
+	if (lvs == NULL) {
+		SPDK_ERRLOG("Lvol store is NULL\n");
+		return -ENODEV;
+	}
+
+	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+		if (lvol->action_in_progress == true) {
+			SPDK_ERRLOG("Cannot unload lvol store - operations on lvols pending\n");
+			cb_fn(cb_arg, -EBUSY);
+			return -EBUSY;
+		} else if (lvol->ref_count != 0) {
+			SPDK_ERRLOG("Lvols still open on lvol store\n");
+			cb_fn(cb_arg, -EBUSY);
+			return -EBUSY;
+		}
+	}
+
+	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+		TAILQ_REMOVE(&lvs->lvols, lvol, link);
+		_spdk_lvol_free(lvol);
+	}
+
+	lvs_req = calloc(1, sizeof(*lvs_req));
+	if (!lvs_req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol store request pointer\n");
+		return -ENOMEM;
+	}
+
+	lvs_req->cb_fn = cb_fn;
+	lvs_req->cb_arg = cb_arg;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Unloading lvol store\n");
+	spdk_bs_unload(lvs->blobstore, _lvs_unload_cb, lvs_req);
+	_spdk_lvs_free(lvs);
+
+	return 0;
+}
+
+static void
+_lvs_destroy_cb(void *cb_arg, int lvserrno)
+{
+	struct spdk_lvs_destroy_req *lvs_req = cb_arg;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol store destroyed\n");
+	assert(lvs_req->cb_fn != NULL);
+	lvs_req->cb_fn(lvs_req->cb_arg, lvserrno);
+	free(lvs_req);
+}
+
+static void
+_lvs_destroy_super_cb(void *cb_arg, int bserrno)
+{
+	struct spdk_lvs_destroy_req *lvs_req = cb_arg;
+	struct spdk_lvol_store *lvs = lvs_req->lvs;
+
+	assert(lvs != NULL);
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Destroying lvol store\n");
+	spdk_bs_destroy(lvs->blobstore, _lvs_destroy_cb, lvs_req);
+	_spdk_lvs_free(lvs);
+}
+
+int
+spdk_lvs_destroy(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn,
+		 void *cb_arg)
+{
+	struct spdk_lvs_destroy_req *lvs_req;
+	struct spdk_lvol *iter_lvol, *tmp;
+
+	if (lvs == NULL) {
+		SPDK_ERRLOG("Lvol store is NULL\n");
+		return -ENODEV;
+	}
+
+	TAILQ_FOREACH_SAFE(iter_lvol, &lvs->lvols, link, tmp) {
+		if (iter_lvol->action_in_progress == true) {
+			SPDK_ERRLOG("Cannot destroy lvol store - operations on lvols pending\n");
+			cb_fn(cb_arg, -EBUSY);
+			return -EBUSY;
+		} else if (iter_lvol->ref_count != 0) {
+			SPDK_ERRLOG("Lvols still open on lvol store\n");
+			cb_fn(cb_arg, -EBUSY);
+			return -EBUSY;
+		}
+	}
+
+	TAILQ_FOREACH_SAFE(iter_lvol, &lvs->lvols, link, tmp) {
+		free(iter_lvol->unique_id);
+		free(iter_lvol);
+	}
+
+	lvs_req = calloc(1, sizeof(*lvs_req));
+	if (!lvs_req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol store request pointer\n");
+		return -ENOMEM;
+	}
+
+	lvs_req->cb_fn = cb_fn;
+	lvs_req->cb_arg = cb_arg;
+	lvs_req->lvs = lvs;
+
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Deleting super blob\n");
+	spdk_bs_delete_blob(lvs->blobstore, lvs->super_blob_id, _lvs_destroy_super_cb, lvs_req);
+
+	return 0;
+}
+
+static void
+_spdk_lvol_close_blob_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+
+	if (lvolerrno < 0) {
+		SPDK_ERRLOG("Could not close blob on lvol\n");
+		_spdk_lvol_free(lvol);
+		goto end;
+	}
+
+	lvol->ref_count--;
+	lvol->action_in_progress = false;
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol %s closed\n", lvol->unique_id);
+
+end:
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+bool
+spdk_lvol_deletable(struct spdk_lvol *lvol)
+{
+	size_t count;
+
+	spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count);
+	return (count == 0);
+}
+
+static void
+_spdk_lvol_delete_blob_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+
+	if (lvolerrno < 0) {
+		SPDK_ERRLOG("Could not delete blob on lvol\n");
+		goto end;
+	}
+
+	TAILQ_REMOVE(&lvol->lvol_store->lvols, lvol, link);
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol %s deleted\n", lvol->unique_id);
+
+end:
+	_spdk_lvol_free(lvol);
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+static void
+_spdk_lvol_destroy_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+	struct spdk_blob_store *bs = lvol->lvol_store->blobstore;
+
+	if (lvolerrno < 0) {
+		SPDK_ERRLOG("Could not close blob on lvol\n");
+		_spdk_lvol_free(lvol);
+		req->cb_fn(req->cb_arg, lvolerrno);
+		free(req);
+		return;
+	}
+	SPDK_INFOLOG(SPDK_LOG_LVOL, "Blob closed on lvol %s\n", lvol->unique_id);
+
+	spdk_bs_delete_blob(bs, lvol->blob_id, _spdk_lvol_delete_blob_cb, req);
+}
+
+static void
+_spdk_lvol_create_open_cb(void *cb_arg, struct spdk_blob *blob, int lvolerrno)
+{
+	struct spdk_lvol_with_handle_req *req = cb_arg;
+	spdk_blob_id blob_id = spdk_blob_get_id(blob);
+	struct spdk_lvol *lvol = req->lvol;
+
+	TAILQ_REMOVE(&req->lvol->lvol_store->pending_lvols, req->lvol, link);
+
+	if (lvolerrno < 0) {
+		free(lvol);
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		free(req);
+		return;
+	}
+
+	lvol->blob = blob;
+	lvol->blob_id = blob_id;
+
+	TAILQ_INSERT_TAIL(&lvol->lvol_store->lvols, lvol, link);
+
+	lvol->unique_id = strdup(lvol->uuid_str);
+	if (!lvol->unique_id) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol name\n");
+		spdk_blob_close(blob, _spdk_lvol_destroy_cb, req);
+		return;
+	}
+
+	lvol->ref_count++;
+
+	assert(req->cb_fn != NULL);
+	req->cb_fn(req->cb_arg, req->lvol, lvolerrno);
+	free(req);
+}
+
+static void
+_spdk_lvol_create_cb(void *cb_arg, spdk_blob_id blobid, int lvolerrno)
+{
+	struct spdk_lvol_with_handle_req *req = cb_arg;
+	struct spdk_blob_store *bs;
+
+	if (lvolerrno < 0) {
+		TAILQ_REMOVE(&req->lvol->lvol_store->pending_lvols, req->lvol, link);
+		free(req->lvol);
+		assert(req->cb_fn != NULL);
+		req->cb_fn(req->cb_arg, NULL, lvolerrno);
+		free(req);
+		return;
+	}
+
+	bs = req->lvol->lvol_store->blobstore;
+
+	spdk_bs_open_blob(bs, blobid, _spdk_lvol_create_open_cb, req);
+}
+
+static void
+spdk_lvol_get_xattr_value(void *xattr_ctx, const char *name,
+			  const void **value, size_t *value_len)
+{
+	struct spdk_lvol *lvol = xattr_ctx;
+
+	if (!strcmp(LVOL_NAME, name)) {
+		*value = lvol->name;
+		*value_len = SPDK_LVOL_NAME_MAX;
+	} else if (!strcmp("uuid", name)) {
+		*value = lvol->uuid_str;
+		*value_len = sizeof(lvol->uuid_str);
+	}
+}
+
+static int
+_spdk_lvs_verify_lvol_name(struct spdk_lvol_store *lvs, const char *name)
+{
+	struct spdk_lvol *tmp;
+
+	if (name == NULL || strnlen(name, SPDK_LVOL_NAME_MAX) == 0) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "lvol name not provided.\n");
+		return -EINVAL;
+	}
+
+	if (strnlen(name, SPDK_LVOL_NAME_MAX) == SPDK_LVOL_NAME_MAX) {
+		SPDK_ERRLOG("Name has no null terminator.\n");
+		return -EINVAL;
+	}
+
+	TAILQ_FOREACH(tmp, &lvs->lvols, link) {
+		if (!strncmp(name, tmp->name, SPDK_LVOL_NAME_MAX)) {
+			SPDK_ERRLOG("lvol with name %s already exists\n", name);
+			return -EEXIST;
+		}
+	}
+
+	TAILQ_FOREACH(tmp, &lvs->pending_lvols, link) {
+		if (!strncmp(name, tmp->name, SPDK_LVOL_NAME_MAX)) {
+			SPDK_ERRLOG("lvol with name %s is being already created\n", name);
+			return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+		 bool thin_provision, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_with_handle_req *req;
+	struct spdk_blob_store *bs;
+	struct spdk_lvol *lvol;
+	struct spdk_blob_opts opts;
+	uint64_t num_clusters;
+	char *xattr_names[] = {LVOL_NAME, "uuid"};
+	int rc;
+
+	if (lvs == NULL) {
+		SPDK_ERRLOG("lvol store does not exist\n");
+		return -EINVAL;
+	}
+
+	rc = _spdk_lvs_verify_lvol_name(lvs, name);
+	if (rc < 0) {
+		return rc;
+	}
+
+	bs = lvs->blobstore;
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		return -ENOMEM;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	lvol = calloc(1, sizeof(*lvol));
+	if (!lvol) {
+		free(req);
+		SPDK_ERRLOG("Cannot alloc memory for lvol base pointer\n");
+		return -ENOMEM;
+	}
+	lvol->lvol_store = lvs;
+	num_clusters = divide_round_up(sz, spdk_bs_get_cluster_size(bs));
+	lvol->thin_provision = thin_provision;
+	snprintf(lvol->name, sizeof(lvol->name), "%s", name);
+	TAILQ_INSERT_TAIL(&lvol->lvol_store->pending_lvols, lvol, link);
+	spdk_uuid_generate(&lvol->uuid);
+	spdk_uuid_fmt_lower(lvol->uuid_str, sizeof(lvol->uuid_str), &lvol->uuid);
+	req->lvol = lvol;
+
+	spdk_blob_opts_init(&opts);
+	opts.thin_provision = thin_provision;
+	opts.num_clusters = num_clusters;
+	opts.xattrs.count = SPDK_COUNTOF(xattr_names);
+	opts.xattrs.names = xattr_names;
+	opts.xattrs.ctx = lvol;
+	opts.xattrs.get_value = spdk_lvol_get_xattr_value;
+
+	spdk_bs_create_blob_ext(lvs->blobstore, &opts, _spdk_lvol_create_cb, req);
+
+	return 0;
+}
+
+void
+spdk_lvol_create_snapshot(struct spdk_lvol *origlvol, const char *snapshot_name,
+			  spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_store *lvs;
+	struct spdk_lvol *newlvol;
+	struct spdk_blob *origblob;
+	struct spdk_lvol_with_handle_req *req;
+	struct spdk_blob_xattr_opts snapshot_xattrs;
+	char *xattr_names[] = {LVOL_NAME, "uuid"};
+	int rc;
+
+	if (origlvol == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol not provided.\n");
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	origblob = origlvol->blob;
+	lvs = origlvol->lvol_store;
+	if (lvs == NULL) {
+		SPDK_ERRLOG("lvol store does not exist\n");
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	rc = _spdk_lvs_verify_lvol_name(lvs, snapshot_name);
+	if (rc < 0) {
+		cb_fn(cb_arg, NULL, rc);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	newlvol = calloc(1, sizeof(*newlvol));
+	if (!newlvol) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol base pointer\n");
+		free(req);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	newlvol->lvol_store = origlvol->lvol_store;
+	snprintf(newlvol->name, sizeof(newlvol->name), "%s", snapshot_name);
+	TAILQ_INSERT_TAIL(&newlvol->lvol_store->pending_lvols, newlvol, link);
+	spdk_uuid_generate(&newlvol->uuid);
+	spdk_uuid_fmt_lower(newlvol->uuid_str, sizeof(newlvol->uuid_str), &newlvol->uuid);
+	snapshot_xattrs.count = SPDK_COUNTOF(xattr_names);
+	snapshot_xattrs.ctx = newlvol;
+	snapshot_xattrs.names = xattr_names;
+	snapshot_xattrs.get_value = spdk_lvol_get_xattr_value;
+	req->lvol = newlvol;
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_bs_create_snapshot(lvs->blobstore, spdk_blob_get_id(origblob), &snapshot_xattrs,
+				_spdk_lvol_create_cb, req);
+}
+
+void
+spdk_lvol_create_clone(struct spdk_lvol *origlvol, const char *clone_name,
+		       spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol *newlvol;
+	struct spdk_lvol_with_handle_req *req;
+	struct spdk_lvol_store *lvs;
+	struct spdk_blob *origblob;
+	struct spdk_blob_xattr_opts clone_xattrs;
+	char *xattr_names[] = {LVOL_NAME, "uuid"};
+	int rc;
+
+	if (origlvol == NULL) {
+		SPDK_INFOLOG(SPDK_LOG_LVOL, "Lvol not provided.\n");
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	origblob = origlvol->blob;
+	lvs = origlvol->lvol_store;
+	if (lvs == NULL) {
+		SPDK_ERRLOG("lvol store does not exist\n");
+		cb_fn(cb_arg, NULL, -EINVAL);
+		return;
+	}
+
+	rc = _spdk_lvs_verify_lvol_name(lvs, clone_name);
+	if (rc < 0) {
+		cb_fn(cb_arg, NULL, rc);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	newlvol = calloc(1, sizeof(*newlvol));
+	if (!newlvol) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol base pointer\n");
+		free(req);
+		cb_fn(cb_arg, NULL, -ENOMEM);
+		return;
+	}
+
+	newlvol->lvol_store = lvs;
+	snprintf(newlvol->name, sizeof(newlvol->name), "%s", clone_name);
+	TAILQ_INSERT_TAIL(&newlvol->lvol_store->pending_lvols, newlvol, link);
+	spdk_uuid_generate(&newlvol->uuid);
+	spdk_uuid_fmt_lower(newlvol->uuid_str, sizeof(newlvol->uuid_str), &newlvol->uuid);
+	clone_xattrs.count = SPDK_COUNTOF(xattr_names);
+	clone_xattrs.ctx = newlvol;
+	clone_xattrs.names = xattr_names;
+	clone_xattrs.get_value = spdk_lvol_get_xattr_value;
+	req->lvol = newlvol;
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	spdk_bs_create_clone(lvs->blobstore, spdk_blob_get_id(origblob), &clone_xattrs,
+			     _spdk_lvol_create_cb,
+			     req);
+}
+
+static void
+_spdk_lvol_resize_done(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+
+	req->cb_fn(req->cb_arg,  lvolerrno);
+	free(req);
+}
+
+static void
+_spdk_lvol_blob_resize_cb(void *cb_arg, int bserrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+	struct spdk_lvol *lvol = req->lvol;
+
+	if (bserrno != 0) {
+		req->cb_fn(req->cb_arg, bserrno);
+		free(req);
+		return;
+	}
+
+	spdk_blob_sync_md(lvol->blob, _spdk_lvol_resize_done, req);
+}
+
+void
+spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz,
+		 spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_blob *blob = lvol->blob;
+	struct spdk_lvol_store *lvs = lvol->lvol_store;
+	struct spdk_lvol_req *req;
+	uint64_t new_clusters = divide_round_up(sz, spdk_bs_get_cluster_size(lvs->blobstore));
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol = lvol;
+
+	spdk_blob_resize(blob, new_clusters, _spdk_lvol_blob_resize_cb, req);
+}
+
+static void
+_spdk_lvol_rename_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+
+	if (lvolerrno != 0) {
+		SPDK_ERRLOG("Lvol rename operation failed\n");
+	} else {
+		snprintf(req->lvol->name, sizeof(req->lvol->name), "%s", req->name);
+	}
+
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+void
+spdk_lvol_rename(struct spdk_lvol *lvol, const char *new_name,
+		 spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol *tmp;
+	struct spdk_blob *blob = lvol->blob;
+	struct spdk_lvol_req *req;
+	int rc;
+
+	/* Check if new name is current lvol name.
+	 * If so, return success immediately */
+	if (strncmp(lvol->name, new_name, SPDK_LVOL_NAME_MAX) == 0) {
+		cb_fn(cb_arg, 0);
+		return;
+	}
+
+	/* Check if lvol with 'new_name' already exists in lvolstore */
+	TAILQ_FOREACH(tmp, &lvol->lvol_store->lvols, link) {
+		if (strncmp(tmp->name, new_name, SPDK_LVOL_NAME_MAX) == 0) {
+			SPDK_ERRLOG("Lvol %s already exists in lvol store %s\n", new_name, lvol->lvol_store->name);
+			cb_fn(cb_arg, -EEXIST);
+			return;
+		}
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol = lvol;
+	snprintf(req->name, sizeof(req->name), "%s", new_name);
+
+	rc = spdk_blob_set_xattr(blob, "name", new_name, strlen(new_name) + 1);
+	if (rc < 0) {
+		free(req);
+		cb_fn(cb_arg, rc);
+		return;
+	}
+
+	spdk_blob_sync_md(blob, _spdk_lvol_rename_cb, req);
+}
+
+void
+spdk_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+	struct spdk_blob_store *bs = lvol->lvol_store->blobstore;
+
+	assert(cb_fn != NULL);
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	if (lvol->ref_count != 0) {
+		SPDK_ERRLOG("Cannot destroy lvol %s because it is still open\n", lvol->unique_id);
+		cb_fn(cb_arg, -EBUSY);
+		return;
+	}
+
+	lvol->action_in_progress = true;
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol = lvol;
+
+	spdk_bs_delete_blob(bs, lvol->blob_id, _spdk_lvol_delete_blob_cb, req);
+}
+
+void
+spdk_lvol_close(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+
+	assert(cb_fn != NULL);
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("lvol does not exist\n");
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	if (lvol->ref_count > 1) {
+		lvol->ref_count--;
+		cb_fn(cb_arg, 0);
+		return;
+	} else if (lvol->ref_count == 0) {
+		cb_fn(cb_arg, -EINVAL);
+		return;
+	}
+
+	lvol->action_in_progress = true;
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->lvol = lvol;
+
+	spdk_blob_close(lvol->blob, _spdk_lvol_close_blob_cb, req);
+}
+
+struct spdk_io_channel *
+spdk_lvol_get_io_channel(struct spdk_lvol *lvol)
+{
+	return spdk_bs_alloc_io_channel(lvol->lvol_store->blobstore);
+}
+
+static void
+_spdk_lvol_inflate_cb(void *cb_arg, int lvolerrno)
+{
+	struct spdk_lvol_req *req = cb_arg;
+
+	spdk_bs_free_io_channel(req->channel);
+
+	if (lvolerrno < 0) {
+		SPDK_ERRLOG("Could not inflate lvol\n");
+	}
+
+	req->cb_fn(req->cb_arg, lvolerrno);
+	free(req);
+}
+
+void
+spdk_lvol_inflate(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+	struct spdk_blob *blob = lvol->blob;
+	spdk_blob_id blob_id = spdk_blob_get_id(blob);
+
+	assert(cb_fn != NULL);
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("Lvol does not exist\n");
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->channel = spdk_bs_alloc_io_channel(lvol->lvol_store->blobstore);
+	if (req->channel == NULL) {
+		SPDK_ERRLOG("Cannot alloc io channel for lvol inflate request\n");
+		free(req);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	spdk_bs_inflate_blob(lvol->lvol_store->blobstore, req->channel, blob_id, _spdk_lvol_inflate_cb,
+			     req);
+}
+
+void
+spdk_lvol_decouple_parent(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+	struct spdk_lvol_req *req;
+	struct spdk_blob *blob = lvol->blob;
+	spdk_blob_id blob_id = spdk_blob_get_id(blob);
+
+	assert(cb_fn != NULL);
+
+	if (lvol == NULL) {
+		SPDK_ERRLOG("Lvol does not exist\n");
+		cb_fn(cb_arg, -ENODEV);
+		return;
+	}
+
+	req = calloc(1, sizeof(*req));
+	if (!req) {
+		SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->channel = spdk_bs_alloc_io_channel(lvol->lvol_store->blobstore);
+	if (req->channel == NULL) {
+		SPDK_ERRLOG("Cannot alloc io channel for lvol inflate request\n");
+		free(req);
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	spdk_bs_blob_decouple_parent(lvol->lvol_store->blobstore, req->channel, blob_id,
+				     _spdk_lvol_inflate_cb, req);
+}
diff --git a/src/spdk/lib/nbd/Makefile b/src/spdk/lib/nbd/Makefile
new file mode 100644
index 00000000..419a2158
--- /dev/null
+++ b/src/spdk/lib/nbd/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = nbd
+C_SRCS = nbd.c nbd_rpc.c
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/nbd/nbd.c b/src/spdk/lib/nbd/nbd.c
new file mode 100644
index 00000000..639f122c
--- /dev/null
+++ b/src/spdk/lib/nbd/nbd.c
@@ -0,0 +1,969 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+
+#include <linux/nbd.h>
+
+#include "spdk/nbd.h"
+#include "nbd_internal.h"
+#include "spdk/bdev.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/util.h"
+#include "spdk/thread.h"
+#include "spdk/event.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/queue.h"
+
+#define GET_IO_LOOP_COUNT	16
+
+enum nbd_io_state_t {
+	/* Receiving or ready to receive nbd request header */
+	NBD_IO_RECV_REQ = 0,
+	/* Receiving write payload */
+	NBD_IO_RECV_PAYLOAD,
+	/* Transmitting or ready to transmit nbd response header */
+	NBD_IO_XMIT_RESP,
+	/* Transmitting read payload */
+	NBD_IO_XMIT_PAYLOAD,
+};
+
+struct nbd_io {
+	struct spdk_nbd_disk	*nbd;
+	enum nbd_io_state_t	state;
+
+	void			*payload;
+	uint32_t		payload_size;
+
+	struct nbd_request	req;
+	struct nbd_reply	resp;
+
+	/*
+	 * Tracks current progress on reading/writing a request,
+	 * response, or payload from the nbd socket.
+	 */
+	uint32_t		offset;
+
+	/* for bdev io_wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+
+	TAILQ_ENTRY(nbd_io)	tailq;
+};
+
+enum nbd_disk_state_t {
+	NBD_DISK_STATE_RUNNING = 0,
+	/* soft disconnection caused by receiving nbd_cmd_disc */
+	NBD_DISK_STATE_SOFTDISC,
+	/* hard disconnection caused by mandatory conditions */
+	NBD_DISK_STATE_HARDDISC,
+};
+
+struct spdk_nbd_disk {
+	struct spdk_bdev	*bdev;
+	struct spdk_bdev_desc	*bdev_desc;
+	struct spdk_io_channel	*ch;
+	int			dev_fd;
+	char			*nbd_path;
+	int			kernel_sp_fd;
+	int			spdk_sp_fd;
+	struct spdk_poller	*nbd_poller;
+	uint32_t		buf_align;
+
+	struct nbd_io		*io_in_recv;
+	TAILQ_HEAD(, nbd_io)	received_io_list;
+	TAILQ_HEAD(, nbd_io)	executed_io_list;
+
+	enum nbd_disk_state_t	state;
+	/* count of nbd_io in spdk_nbd_disk */
+	int			io_count;
+
+	TAILQ_ENTRY(spdk_nbd_disk)	tailq;
+};
+
+struct spdk_nbd_disk_globals {
+	TAILQ_HEAD(, spdk_nbd_disk)	disk_head;
+};
+
+static struct spdk_nbd_disk_globals g_spdk_nbd;
+
+static int
+nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io);
+
+int
+spdk_nbd_init(void)
+{
+	TAILQ_INIT(&g_spdk_nbd.disk_head);
+
+	return 0;
+}
+
+void
+spdk_nbd_fini(void)
+{
+	struct spdk_nbd_disk *nbd_idx, *nbd_tmp;
+
+	/*
+	 * Stop running spdk_nbd_disk.
+	 * Here, nbd removing are unnecessary, but _SAFE variant
+	 * is needed, since internal spdk_nbd_disk_unregister will
+	 * remove nbd from TAILQ.
+	 */
+	TAILQ_FOREACH_SAFE(nbd_idx, &g_spdk_nbd.disk_head, tailq, nbd_tmp) {
+		spdk_nbd_stop(nbd_idx);
+	}
+}
+
+static int
+spdk_nbd_disk_register(struct spdk_nbd_disk *nbd)
+{
+	if (spdk_nbd_disk_find_by_nbd_path(nbd->nbd_path)) {
+		SPDK_NOTICELOG("%s is already exported\n", nbd->nbd_path);
+		return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&g_spdk_nbd.disk_head, nbd, tailq);
+
+	return 0;
+}
+
+static void
+spdk_nbd_disk_unregister(struct spdk_nbd_disk *nbd)
+{
+	struct spdk_nbd_disk *nbd_idx, *nbd_tmp;
+
+	/*
+	 * nbd disk may be stopped before registered.
+	 * check whether it was registered.
+	 */
+	TAILQ_FOREACH_SAFE(nbd_idx, &g_spdk_nbd.disk_head, tailq, nbd_tmp) {
+		if (nbd == nbd_idx) {
+			TAILQ_REMOVE(&g_spdk_nbd.disk_head, nbd_idx, tailq);
+			break;
+		}
+	}
+}
+
+struct spdk_nbd_disk *
+spdk_nbd_disk_find_by_nbd_path(const char *nbd_path)
+{
+	struct spdk_nbd_disk *nbd;
+
+	/*
+	 * check whether nbd has already been registered by nbd path.
+	 */
+	TAILQ_FOREACH(nbd, &g_spdk_nbd.disk_head, tailq) {
+		if (!strcmp(nbd->nbd_path, nbd_path)) {
+			return nbd;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_nbd_disk *spdk_nbd_disk_first(void)
+{
+	return TAILQ_FIRST(&g_spdk_nbd.disk_head);
+}
+
+struct spdk_nbd_disk *spdk_nbd_disk_next(struct spdk_nbd_disk *prev)
+{
+	return TAILQ_NEXT(prev, tailq);
+}
+
+const char *
+spdk_nbd_disk_get_nbd_path(struct spdk_nbd_disk *nbd)
+{
+	return nbd->nbd_path;
+}
+
+const char *
+spdk_nbd_disk_get_bdev_name(struct spdk_nbd_disk *nbd)
+{
+	return spdk_bdev_get_name(nbd->bdev);
+}
+
+void
+spdk_nbd_write_config_json(struct spdk_json_write_ctx *w)
+{
+	struct spdk_nbd_disk *nbd;
+
+	spdk_json_write_array_begin(w);
+
+	TAILQ_FOREACH(nbd, &g_spdk_nbd.disk_head, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_named_string(w, "method", "start_nbd_disk");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "nbd_device",  spdk_nbd_disk_get_nbd_path(nbd));
+		spdk_json_write_named_string(w, "bdev_name", spdk_nbd_disk_get_bdev_name(nbd));
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+
+	spdk_json_write_array_end(w);
+}
+
+void
+nbd_disconnect(struct spdk_nbd_disk *nbd)
+{
+	/*
+	 * nbd soft-disconnection to terminate transmission phase.
+	 * After receiving this ioctl command, nbd kernel module will send
+	 * a NBD_CMD_DISC type io to nbd server in order to inform server.
+	 */
+	ioctl(nbd->dev_fd, NBD_DISCONNECT);
+}
+
+static struct nbd_io *
+spdk_get_nbd_io(struct spdk_nbd_disk *nbd)
+{
+	struct nbd_io *io;
+
+	io = calloc(1, sizeof(*io));
+	if (!io) {
+		return NULL;
+	}
+
+	io->nbd = nbd;
+	to_be32(&io->resp.magic, NBD_REPLY_MAGIC);
+
+	nbd->io_count++;
+
+	return io;
+}
+
+static void
+spdk_put_nbd_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
+{
+	if (io->payload) {
+		spdk_dma_free(io->payload);
+	}
+	free(io);
+
+	nbd->io_count--;
+}
+
+/*
+ * Check whether received nbd_io are all transmitted.
+ *
+ * \return 1 there is still some nbd_io not transmitted.
+ *         0 all nbd_io received are transmitted.
+ */
+static int
+spdk_nbd_io_xmit_check(struct spdk_nbd_disk *nbd)
+{
+	if (nbd->io_count == 0) {
+		return 0;
+	} else if (nbd->io_count == 1 && nbd->io_in_recv != NULL) {
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Check whether received nbd_io are all executed,
+ * and put back executed nbd_io instead of transmitting them
+ *
+ * \return 1 there is still some nbd_io under executing
+ *         0 all nbd_io gotten are freed.
+ */
+static int
+spdk_nbd_cleanup_io(struct spdk_nbd_disk *nbd)
+{
+	struct nbd_io *io, *io_tmp;
+
+	/* free io_in_recv */
+	if (nbd->io_in_recv != NULL) {
+		spdk_put_nbd_io(nbd, nbd->io_in_recv);
+		nbd->io_in_recv = NULL;
+	}
+
+	/* free io in received_io_list */
+	if (!TAILQ_EMPTY(&nbd->received_io_list)) {
+		TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
+			TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
+			spdk_put_nbd_io(nbd, io);
+		}
+	}
+
+	/* free io in executed_io_list */
+	if (!TAILQ_EMPTY(&nbd->executed_io_list)) {
+		TAILQ_FOREACH_SAFE(io, &nbd->executed_io_list, tailq, io_tmp) {
+			TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
+			spdk_put_nbd_io(nbd, io);
+		}
+	}
+
+	/*
+	 * Some nbd_io may be under executing in bdev.
+	 * Wait for their done operation.
+	 */
+	if (nbd->io_count != 0) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+_nbd_stop(struct spdk_nbd_disk *nbd)
+{
+	if (nbd->ch) {
+		spdk_put_io_channel(nbd->ch);
+	}
+
+	if (nbd->bdev_desc) {
+		spdk_bdev_close(nbd->bdev_desc);
+	}
+
+	if (nbd->nbd_path) {
+		free(nbd->nbd_path);
+	}
+
+	if (nbd->spdk_sp_fd >= 0) {
+		close(nbd->spdk_sp_fd);
+	}
+
+	if (nbd->kernel_sp_fd >= 0) {
+		close(nbd->kernel_sp_fd);
+	}
+
+	if (nbd->dev_fd >= 0) {
+		ioctl(nbd->dev_fd, NBD_CLEAR_QUE);
+		ioctl(nbd->dev_fd, NBD_CLEAR_SOCK);
+		close(nbd->dev_fd);
+	}
+
+	if (nbd->nbd_poller) {
+		spdk_poller_unregister(&nbd->nbd_poller);
+	}
+
+	spdk_nbd_disk_unregister(nbd);
+
+	free(nbd);
+}
+
+void
+spdk_nbd_stop(struct spdk_nbd_disk *nbd)
+{
+	if (nbd == NULL) {
+		return;
+	}
+
+	nbd->state = NBD_DISK_STATE_HARDDISC;
+
+	/*
+	 * Stop action should be called only after all nbd_io are executed.
+	 */
+	if (!spdk_nbd_cleanup_io(nbd)) {
+		_nbd_stop(nbd);
+	}
+}
+
+static int64_t
+read_from_socket(int fd, void *buf, size_t length)
+{
+	ssize_t bytes_read;
+
+	bytes_read = read(fd, buf, length);
+	if (bytes_read == 0) {
+		return -EIO;
+	} else if (bytes_read == -1) {
+		if (errno != EAGAIN) {
+			return -errno;
+		}
+		return 0;
+	} else {
+		return bytes_read;
+	}
+}
+
+static int64_t
+write_to_socket(int fd, void *buf, size_t length)
+{
+	ssize_t bytes_written;
+
+	bytes_written = write(fd, buf, length);
+	if (bytes_written == 0) {
+		return -EIO;
+	} else if (bytes_written == -1) {
+		if (errno != EAGAIN) {
+			return -errno;
+		}
+		return 0;
+	} else {
+		return bytes_written;
+	}
+}
+
+static void
+nbd_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct nbd_io	*io = cb_arg;
+	struct spdk_nbd_disk *nbd = io->nbd;
+
+	if (success) {
+		io->resp.error = 0;
+	} else {
+		to_be32(&io->resp.error, EIO);
+	}
+
+	memcpy(&io->resp.handle, &io->req.handle, sizeof(io->resp.handle));
+	TAILQ_INSERT_TAIL(&nbd->executed_io_list, io, tailq);
+
+	if (bdev_io != NULL) {
+		spdk_bdev_free_io(bdev_io);
+	}
+
+	if (nbd->state == NBD_DISK_STATE_HARDDISC && !spdk_nbd_cleanup_io(nbd)) {
+		_nbd_stop(nbd);
+	}
+}
+
+static void
+nbd_resubmit_io(void *arg)
+{
+	struct nbd_io *io = (struct nbd_io *)arg;
+	struct spdk_nbd_disk *nbd = io->nbd;
+	int rc = 0;
+
+	rc = nbd_submit_bdev_io(nbd, io);
+	if (rc) {
+		SPDK_INFOLOG(SPDK_LOG_NBD, "nbd: io resubmit for dev %s , io_type %d, returned %d.\n",
+			     spdk_nbd_disk_get_bdev_name(nbd), from_be32(&io->req.type), rc);
+	}
+}
+
+static void
+nbd_queue_io(struct nbd_io *io)
+{
+	int rc;
+	struct spdk_bdev *bdev = io->nbd->bdev;
+
+	io->bdev_io_wait.bdev = bdev;
+	io->bdev_io_wait.cb_fn = nbd_resubmit_io;
+	io->bdev_io_wait.cb_arg = io;
+
+	rc = spdk_bdev_queue_io_wait(bdev, io->nbd->ch, &io->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in nbd_queue_io, rc=%d.\n", rc);
+		nbd_io_done(NULL, false, io);
+	}
+}
+
+static int
+nbd_submit_bdev_io(struct spdk_nbd_disk *nbd, struct nbd_io *io)
+{
+	struct spdk_bdev_desc *desc = nbd->bdev_desc;
+	struct spdk_io_channel *ch = nbd->ch;
+	int rc = 0;
+
+	switch (from_be32(&io->req.type)) {
+	case NBD_CMD_READ:
+		rc = spdk_bdev_read(desc, ch, io->payload, from_be64(&io->req.from),
+				    io->payload_size, nbd_io_done, io);
+		break;
+	case NBD_CMD_WRITE:
+		rc = spdk_bdev_write(desc, ch, io->payload, from_be64(&io->req.from),
+				     io->payload_size, nbd_io_done, io);
+		break;
+#ifdef NBD_FLAG_SEND_FLUSH
+	case NBD_CMD_FLUSH:
+		rc = spdk_bdev_flush(desc, ch, 0,
+				     spdk_bdev_get_num_blocks(nbd->bdev) * spdk_bdev_get_block_size(nbd->bdev),
+				     nbd_io_done, io);
+		break;
+#endif
+#ifdef NBD_FLAG_SEND_TRIM
+	case NBD_CMD_TRIM:
+		rc = spdk_bdev_unmap(desc, ch, from_be64(&io->req.from),
+				     from_be32(&io->req.len), nbd_io_done, io);
+		break;
+#endif
+	case NBD_CMD_DISC:
+		spdk_put_nbd_io(nbd, io);
+		nbd->state = NBD_DISK_STATE_SOFTDISC;
+		break;
+	default:
+		rc = -1;
+	}
+
+	if (rc < 0) {
+		if (rc == -ENOMEM) {
+			SPDK_INFOLOG(SPDK_LOG_NBD, "No memory, start to queue io.\n");
+			nbd_queue_io(io);
+		} else {
+			SPDK_ERRLOG("nbd io failed in nbd_queue_io, rc=%d.\n", rc);
+			nbd_io_done(NULL, false, io);
+		}
+	}
+
+	return 0;
+}
+
+static int
+spdk_nbd_io_exec(struct spdk_nbd_disk *nbd)
+{
+	struct nbd_io *io, *io_tmp;
+	int ret = 0;
+
+	/*
+	 * For soft disconnection, nbd server must handle all outstanding
+	 * request before closing connection.
+	 */
+	if (nbd->state == NBD_DISK_STATE_HARDDISC) {
+		return 0;
+	}
+
+	if (!TAILQ_EMPTY(&nbd->received_io_list)) {
+		TAILQ_FOREACH_SAFE(io, &nbd->received_io_list, tailq, io_tmp) {
+			TAILQ_REMOVE(&nbd->received_io_list, io, tailq);
+			ret = nbd_submit_bdev_io(nbd, io);
+			if (ret < 0) {
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int
+spdk_nbd_io_recv_internal(struct spdk_nbd_disk *nbd)
+{
+	struct nbd_io *io;
+	int ret = 0;
+
+	if (nbd->io_in_recv == NULL) {
+		nbd->io_in_recv = spdk_get_nbd_io(nbd);
+		if (!nbd->io_in_recv) {
+			return -ENOMEM;
+		}
+	}
+
+	io = nbd->io_in_recv;
+
+	if (io->state == NBD_IO_RECV_REQ) {
+		ret = read_from_socket(nbd->spdk_sp_fd, (char *)&io->req + io->offset,
+				       sizeof(io->req) - io->offset);
+		if (ret < 0) {
+			spdk_put_nbd_io(nbd, io);
+			nbd->io_in_recv = NULL;
+			return ret;
+		}
+
+		io->offset += ret;
+
+		/* request is fully received */
+		if (io->offset == sizeof(io->req)) {
+			io->offset = 0;
+
+			/* req magic check */
+			if (from_be32(&io->req.magic) != NBD_REQUEST_MAGIC) {
+				SPDK_ERRLOG("invalid request magic\n");
+				spdk_put_nbd_io(nbd, io);
+				nbd->io_in_recv = NULL;
+				return -EINVAL;
+			}
+
+			/* io except read/write should ignore payload */
+			if (from_be32(&io->req.type) == NBD_CMD_WRITE ||
+			    from_be32(&io->req.type) == NBD_CMD_READ) {
+				io->payload_size = from_be32(&io->req.len);
+			} else {
+				io->payload_size = 0;
+			}
+
+			/* io payload allocate */
+			if (io->payload_size) {
+				io->payload = spdk_dma_malloc(io->payload_size, nbd->buf_align, NULL);
+				if (io->payload == NULL) {
+					SPDK_ERRLOG("could not allocate io->payload of size %d\n", io->payload_size);
+					spdk_put_nbd_io(nbd, io);
+					nbd->io_in_recv = NULL;
+					return -ENOMEM;
+				}
+			} else {
+				io->payload = NULL;
+			}
+
+			/* next io step */
+			if (from_be32(&io->req.type) == NBD_CMD_WRITE) {
+				io->state = NBD_IO_RECV_PAYLOAD;
+			} else {
+				io->state = NBD_IO_XMIT_RESP;
+				nbd->io_in_recv = NULL;
+				TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
+			}
+		}
+	}
+
+	if (io->state == NBD_IO_RECV_PAYLOAD) {
+		ret = read_from_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset);
+		if (ret < 0) {
+			spdk_put_nbd_io(nbd, io);
+			nbd->io_in_recv = NULL;
+			return ret;
+		}
+
+		io->offset += ret;
+
+		/* request payload is fully received */
+		if (io->offset == io->payload_size) {
+			io->offset = 0;
+			io->state = NBD_IO_XMIT_RESP;
+			nbd->io_in_recv = NULL;
+			TAILQ_INSERT_TAIL(&nbd->received_io_list, io, tailq);
+		}
+
+	}
+
+	return 0;
+}
+
+static int
+spdk_nbd_io_recv(struct spdk_nbd_disk *nbd)
+{
+	int i, ret = 0;
+
+	/*
+	 * nbd server should not accept request in both soft and hard
+	 * disconnect states.
+	 */
+	if (nbd->state != NBD_DISK_STATE_RUNNING) {
+		return 0;
+	}
+
+	for (i = 0; i < GET_IO_LOOP_COUNT; i++) {
+		ret = spdk_nbd_io_recv_internal(nbd);
+		if (ret != 0) {
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
+spdk_nbd_io_xmit_internal(struct spdk_nbd_disk *nbd)
+{
+	struct nbd_io *io;
+	int ret = 0;
+
+	io = TAILQ_FIRST(&nbd->executed_io_list);
+	if (io == NULL) {
+		return 0;
+	}
+
+	/* Remove IO from list now assuming it will be completed.  It will be inserted
+	 *  back to the head if it cannot be completed.  This approach is specifically
+	 *  taken to work around a scan-build use-after-free mischaracterization.
+	 */
+	TAILQ_REMOVE(&nbd->executed_io_list, io, tailq);
+
+	/* resp error and handler are already set in io_done */
+
+	if (io->state == NBD_IO_XMIT_RESP) {
+		ret = write_to_socket(nbd->spdk_sp_fd, (char *)&io->resp + io->offset,
+				      sizeof(io->resp) - io->offset);
+		if (ret <= 0) {
+			goto reinsert;
+		}
+
+		io->offset += ret;
+
+		/* response is fully transmitted */
+		if (io->offset == sizeof(io->resp)) {
+			io->offset = 0;
+
+			/* transmit payload only when NBD_CMD_READ with no resp error */
+			if (from_be32(&io->req.type) != NBD_CMD_READ || io->resp.error != 0) {
+				spdk_put_nbd_io(nbd, io);
+				return 0;
+			} else {
+				io->state = NBD_IO_XMIT_PAYLOAD;
+			}
+		}
+	}
+
+	if (io->state == NBD_IO_XMIT_PAYLOAD) {
+		ret = write_to_socket(nbd->spdk_sp_fd, io->payload + io->offset, io->payload_size - io->offset);
+		if (ret <= 0) {
+			goto reinsert;
+		}
+
+		io->offset += ret;
+
+		/* read payload is fully transmitted */
+		if (io->offset == io->payload_size) {
+			spdk_put_nbd_io(nbd, io);
+			return 0;
+		}
+	}
+
+reinsert:
+	TAILQ_INSERT_HEAD(&nbd->executed_io_list, io, tailq);
+	return ret;
+}
+
+static int
+spdk_nbd_io_xmit(struct spdk_nbd_disk *nbd)
+{
+	int ret = 0;
+
+	/*
+	 * For soft disconnection, nbd server must handle all outstanding
+	 * request before closing connection.
+	 */
+	if (nbd->state == NBD_DISK_STATE_HARDDISC) {
+		return 0;
+	}
+
+	while (!TAILQ_EMPTY(&nbd->executed_io_list)) {
+		ret = spdk_nbd_io_xmit_internal(nbd);
+		if (ret != 0) {
+			return ret;
+		}
+	}
+
+	/*
+	 * For soft disconnection, nbd server can close connection after all
+	 * outstanding request are transmitted.
+	 */
+	if (nbd->state == NBD_DISK_STATE_SOFTDISC && !spdk_nbd_io_xmit_check(nbd)) {
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * Poll an NBD instance.
+ *
+ * \return 0 on success or negated errno values on error (e.g. connection closed).
+ */
+static int
+_spdk_nbd_poll(struct spdk_nbd_disk *nbd)
+{
+	int rc;
+
+	/* transmit executed io first */
+	rc = spdk_nbd_io_xmit(nbd);
+	if (rc < 0) {
+		return rc;
+	}
+
+	rc = spdk_nbd_io_recv(nbd);
+	if (rc < 0) {
+		return rc;
+	}
+
+	rc = spdk_nbd_io_exec(nbd);
+
+	return rc;
+}
+
+static int
+spdk_nbd_poll(void *arg)
+{
+	struct spdk_nbd_disk *nbd = arg;
+	int rc;
+
+	rc = _spdk_nbd_poll(nbd);
+	if (rc < 0) {
+		SPDK_INFOLOG(SPDK_LOG_NBD, "spdk_nbd_poll() returned %s (%d); closing connection\n",
+			     spdk_strerror(-rc), rc);
+		spdk_nbd_stop(nbd);
+	}
+
+	return -1;
+}
+
+static void *
+nbd_start_kernel(void *arg)
+{
+	int dev_fd = (int)(intptr_t)arg;
+
+	spdk_unaffinitize_thread();
+
+	/* This will block in the kernel until we close the spdk_sp_fd. */
+	ioctl(dev_fd, NBD_DO_IT);
+
+	pthread_exit(NULL);
+}
+
+static void
+spdk_nbd_bdev_hot_remove(void *remove_ctx)
+{
+	struct spdk_nbd_disk *nbd = remove_ctx;
+
+	spdk_nbd_stop(nbd);
+}
+
+struct spdk_nbd_disk *
+spdk_nbd_start(const char *bdev_name, const char *nbd_path)
+{
+	struct spdk_nbd_disk	*nbd;
+	struct spdk_bdev	*bdev;
+	pthread_t		tid;
+	int			rc;
+	int			sp[2];
+	int			flag;
+
+	bdev = spdk_bdev_get_by_name(bdev_name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("no bdev %s exists\n", bdev_name);
+		return NULL;
+	}
+
+	nbd = calloc(1, sizeof(*nbd));
+	if (nbd == NULL) {
+		return NULL;
+	}
+
+	nbd->dev_fd = -1;
+	nbd->spdk_sp_fd = -1;
+	nbd->kernel_sp_fd = -1;
+
+	rc = spdk_bdev_open(bdev, true, spdk_nbd_bdev_hot_remove, nbd, &nbd->bdev_desc);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not open bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc);
+		goto err;
+	}
+
+	nbd->bdev = bdev;
+
+	nbd->ch = spdk_bdev_get_io_channel(nbd->bdev_desc);
+	nbd->buf_align = spdk_max(spdk_bdev_get_buf_align(bdev), 64);
+
+	rc = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
+	if (rc != 0) {
+		SPDK_ERRLOG("socketpair failed\n");
+		goto err;
+	}
+
+	nbd->spdk_sp_fd = sp[0];
+	nbd->kernel_sp_fd = sp[1];
+	nbd->nbd_path = strdup(nbd_path);
+	if (!nbd->nbd_path) {
+		SPDK_ERRLOG("strdup allocation failure\n");
+		goto err;
+	}
+
+	TAILQ_INIT(&nbd->received_io_list);
+	TAILQ_INIT(&nbd->executed_io_list);
+
+	/* Add nbd_disk to the end of disk list */
+	rc = spdk_nbd_disk_register(nbd);
+	if (rc != 0) {
+		goto err;
+	}
+
+	nbd->dev_fd = open(nbd_path, O_RDWR);
+	if (nbd->dev_fd == -1) {
+		SPDK_ERRLOG("open(\"%s\") failed: %s\n", nbd_path, spdk_strerror(errno));
+		goto err;
+	}
+
+	rc = ioctl(nbd->dev_fd, NBD_SET_BLKSIZE, spdk_bdev_get_block_size(bdev));
+	if (rc == -1) {
+		SPDK_ERRLOG("ioctl(NBD_SET_BLKSIZE) failed: %s\n", spdk_strerror(errno));
+		goto err;
+	}
+
+	rc = ioctl(nbd->dev_fd, NBD_SET_SIZE_BLOCKS, spdk_bdev_get_num_blocks(bdev));
+	if (rc == -1) {
+		SPDK_ERRLOG("ioctl(NBD_SET_SIZE_BLOCKS) failed: %s\n", spdk_strerror(errno));
+		goto err;
+	}
+
+	rc = ioctl(nbd->dev_fd, NBD_CLEAR_SOCK);
+	if (rc == -1) {
+		SPDK_ERRLOG("ioctl(NBD_CLEAR_SOCK) failed: %s\n", spdk_strerror(errno));
+		goto err;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_NBD, "Enabling kernel access to bdev %s via %s\n",
+		     spdk_bdev_get_name(bdev), nbd_path);
+
+	rc = ioctl(nbd->dev_fd, NBD_SET_SOCK, nbd->kernel_sp_fd);
+	if (rc == -1) {
+		SPDK_ERRLOG("ioctl(NBD_SET_SOCK) failed: %s\n", spdk_strerror(errno));
+		goto err;
+	}
+
+#ifdef NBD_FLAG_SEND_TRIM
+	rc = ioctl(nbd->dev_fd, NBD_SET_FLAGS, NBD_FLAG_SEND_TRIM);
+	if (rc == -1) {
+		SPDK_ERRLOG("ioctl(NBD_SET_FLAGS) failed: %s\n", spdk_strerror(errno));
+		goto err;
+	}
+#endif
+
+	rc = pthread_create(&tid, NULL, nbd_start_kernel, (void *)(intptr_t)nbd->dev_fd);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not create thread: %s\n", spdk_strerror(rc));
+		goto err;
+	}
+
+	rc = pthread_detach(tid);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not detach thread for nbd kernel: %s\n", spdk_strerror(rc));
+		goto err;
+	}
+
+	flag = fcntl(nbd->spdk_sp_fd, F_GETFL);
+	if (fcntl(nbd->spdk_sp_fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+			    nbd->spdk_sp_fd, spdk_strerror(errno));
+		goto err;
+	}
+
+	nbd->nbd_poller = spdk_poller_register(spdk_nbd_poll, nbd, 0);
+
+	return nbd;
+
+err:
+	spdk_nbd_stop(nbd);
+
+	return NULL;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("nbd", SPDK_LOG_NBD)
diff --git a/src/spdk/lib/nbd/nbd_internal.h b/src/spdk/lib/nbd/nbd_internal.h
new file mode 100644
index 00000000..adf1cb21
--- /dev/null
+++ b/src/spdk/lib/nbd/nbd_internal.h
@@ -0,0 +1,52 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NBD_INTERNAL_H
+#define SPDK_NBD_INTERNAL_H
+
+#include "spdk/stdinc.h"
+#include "spdk/nbd.h"
+
+struct spdk_nbd_disk *spdk_nbd_disk_find_by_nbd_path(const char *nbd_path);
+
+struct spdk_nbd_disk *spdk_nbd_disk_first(void);
+
+struct spdk_nbd_disk *spdk_nbd_disk_next(struct spdk_nbd_disk *prev);
+
+const char *spdk_nbd_disk_get_nbd_path(struct spdk_nbd_disk *nbd);
+
+const char *spdk_nbd_disk_get_bdev_name(struct spdk_nbd_disk *nbd);
+
+void nbd_disconnect(struct spdk_nbd_disk *nbd);
+
+#endif /* SPDK_NBD_INTERNAL_H */
diff --git a/src/spdk/lib/nbd/nbd_rpc.c b/src/spdk/lib/nbd/nbd_rpc.c
new file mode 100644
index 00000000..bec64a13
--- /dev/null
+++ b/src/spdk/lib/nbd/nbd_rpc.c
@@ -0,0 +1,304 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/string.h"
+#include "spdk/env.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include <linux/nbd.h>
+
+#include "nbd_internal.h"
+#include "spdk_internal/log.h"
+
+struct rpc_start_nbd_disk {
+	char *bdev_name;
+	char *nbd_device;
+};
+
+static void
+free_rpc_start_nbd_disk(struct rpc_start_nbd_disk *req)
+{
+	free(req->bdev_name);
+	free(req->nbd_device);
+}
+
+static const struct spdk_json_object_decoder rpc_start_nbd_disk_decoders[] = {
+	{"bdev_name", offsetof(struct rpc_start_nbd_disk, bdev_name), spdk_json_decode_string},
+	{"nbd_device", offsetof(struct rpc_start_nbd_disk, nbd_device), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_start_nbd_disk(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_start_nbd_disk req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_nbd_disk *nbd;
+
+	if (spdk_json_decode_object(params, rpc_start_nbd_disk_decoders,
+				    SPDK_COUNTOF(rpc_start_nbd_disk_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (req.nbd_device == NULL || req.bdev_name == NULL) {
+		goto invalid;
+	}
+
+	/* make sure nbd_device is not registered */
+	nbd = spdk_nbd_disk_find_by_nbd_path(req.nbd_device);
+	if (nbd) {
+		goto invalid;
+	}
+
+	nbd = spdk_nbd_start(req.bdev_name, req.nbd_device);
+	if (!nbd) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free_rpc_start_nbd_disk(&req);
+		return;
+	}
+
+	spdk_json_write_string(w, req.nbd_device);
+	spdk_jsonrpc_end_result(request, w);
+	free_rpc_start_nbd_disk(&req);
+	return;
+
+invalid:
+	free_rpc_start_nbd_disk(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+
+SPDK_RPC_REGISTER("start_nbd_disk", spdk_rpc_start_nbd_disk, SPDK_RPC_RUNTIME)
+
+struct rpc_stop_nbd_disk {
+	char *nbd_device;
+};
+
+static void
+free_rpc_stop_nbd_disk(struct rpc_stop_nbd_disk *req)
+{
+	free(req->nbd_device);
+}
+
+static const struct spdk_json_object_decoder rpc_stop_nbd_disk_decoders[] = {
+	{"nbd_device", offsetof(struct rpc_stop_nbd_disk, nbd_device), spdk_json_decode_string},
+};
+
+struct nbd_disconnect_arg {
+	struct spdk_jsonrpc_request *request;
+	struct spdk_nbd_disk *nbd;
+};
+
+static void *
+nbd_disconnect_thread(void *arg)
+{
+	struct nbd_disconnect_arg *thd_arg = arg;
+	struct spdk_json_write_ctx *w;
+
+	spdk_unaffinitize_thread();
+
+	nbd_disconnect(thd_arg->nbd);
+
+	w = spdk_jsonrpc_begin_result(thd_arg->request);
+	if (w == NULL) {
+		goto out;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(thd_arg->request, w);
+
+out:
+	free(thd_arg);
+	pthread_exit(NULL);
+}
+
+static void
+spdk_rpc_stop_nbd_disk(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	struct rpc_stop_nbd_disk req = {};
+	struct spdk_nbd_disk *nbd;
+	pthread_t tid;
+	struct nbd_disconnect_arg *thd_arg = NULL;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_stop_nbd_disk_decoders,
+				    SPDK_COUNTOF(rpc_stop_nbd_disk_decoders),
+				    &req)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		goto out;
+	}
+
+	if (req.nbd_device == NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		goto out;
+	}
+
+	/* make sure nbd_device is registered */
+	nbd = spdk_nbd_disk_find_by_nbd_path(req.nbd_device);
+	if (!nbd) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		goto out;
+	}
+
+	/*
+	 * thd_arg should be freed by created thread
+	 * if thread is created successfully.
+	 */
+	thd_arg = malloc(sizeof(*thd_arg));
+	if (!thd_arg) {
+		SPDK_ERRLOG("could not allocate nbd disconnect thread arg\n");
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Out of memory");
+		goto out;
+	}
+
+	thd_arg->request = request;
+	thd_arg->nbd = nbd;
+
+	/*
+	 * NBD ioctl of disconnect will block until data are flushed.
+	 * Create separate thread to execute it.
+	 */
+	rc = pthread_create(&tid, NULL, nbd_disconnect_thread, (void *)thd_arg);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not create nbd disconnect thread: %s\n", spdk_strerror(rc));
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(rc));
+		free(thd_arg);
+		goto out;
+	}
+
+	rc = pthread_detach(tid);
+	if (rc != 0) {
+		SPDK_ERRLOG("could not detach nbd disconnect thread: %s\n", spdk_strerror(rc));
+		goto out;
+	}
+
+out:
+	free_rpc_stop_nbd_disk(&req);
+}
+
+SPDK_RPC_REGISTER("stop_nbd_disk", spdk_rpc_stop_nbd_disk, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_dump_nbd_info(struct spdk_json_write_ctx *w,
+		       struct spdk_nbd_disk *nbd)
+{
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "nbd_device");
+	spdk_json_write_string(w, spdk_nbd_disk_get_nbd_path(nbd));
+
+	spdk_json_write_name(w, "bdev_name");
+	spdk_json_write_string(w, spdk_nbd_disk_get_bdev_name(nbd));
+
+	spdk_json_write_object_end(w);
+}
+
+struct rpc_get_nbd_disks {
+	char *nbd_device;
+};
+
+static void
+free_rpc_get_nbd_disks(struct rpc_get_nbd_disks *r)
+{
+	free(r->nbd_device);
+}
+
+static const struct spdk_json_object_decoder rpc_get_nbd_disks_decoders[] = {
+	{"nbd_device", offsetof(struct rpc_get_nbd_disks, nbd_device), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_nbd_disks(struct spdk_jsonrpc_request *request,
+		       const struct spdk_json_val *params)
+{
+	struct rpc_get_nbd_disks req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_nbd_disk *nbd = NULL;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_get_nbd_disks_decoders,
+					    SPDK_COUNTOF(rpc_get_nbd_disks_decoders),
+					    &req)) {
+			SPDK_ERRLOG("spdk_json_decode_object failed\n");
+			goto invalid;
+		}
+
+		if (req.nbd_device) {
+			nbd = spdk_nbd_disk_find_by_nbd_path(req.nbd_device);
+			if (nbd == NULL) {
+				SPDK_ERRLOG("nbd device '%s' does not exist\n", req.nbd_device);
+				goto invalid;
+			}
+
+			free_rpc_get_nbd_disks(&req);
+		}
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	if (nbd != NULL) {
+		spdk_rpc_dump_nbd_info(w, nbd);
+	} else {
+		for (nbd = spdk_nbd_disk_first(); nbd != NULL; nbd = spdk_nbd_disk_next(nbd)) {
+			spdk_rpc_dump_nbd_info(w, nbd);
+		}
+	}
+
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+	free_rpc_get_nbd_disks(&req);
+}
+SPDK_RPC_REGISTER("get_nbd_disks", spdk_rpc_get_nbd_disks, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/net/Makefile b/src/spdk/lib/net/Makefile
new file mode 100644
index 00000000..6431e7be
--- /dev/null
+++ b/src/spdk/lib/net/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = interface.c net_rpc.c
+
+LIBNAME = net
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/net/interface.c b/src/spdk/lib/net/interface.c
new file mode 100644
index 00000000..5102695b
--- /dev/null
+++ b/src/spdk/lib/net/interface.c
@@ -0,0 +1,505 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "net_internal.h"
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+
+#include "spdk/log.h"
+#include "spdk/net.h"
+
+#ifdef __linux__ /* Interface management is Linux-specific */
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+static TAILQ_HEAD(, spdk_interface) g_interface_head;
+
+static pthread_mutex_t interface_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int spdk_get_ifc_ipv4(void)
+{
+	int ret;
+	int rtattrlen;
+	int netlink_fd;
+	uint32_t ipv4_addr;
+
+	struct {
+		struct nlmsghdr n;
+		struct ifaddrmsg r;
+		struct rtattr rta;
+	} req;
+	char buf[16384];
+	struct nlmsghdr *nlmp;
+	struct ifaddrmsg *rtmp;
+	struct rtattr *rtatp;
+	struct spdk_interface *ifc;
+
+	netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+	if (netlink_fd < 0) {
+		SPDK_ERRLOG("socket failed!\n");
+		return 1;
+	}
+
+	/*
+	 * Prepare a message structure
+	 */
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
+	req.n.nlmsg_type = RTM_GETADDR;
+
+	/* IPv4 only */
+	req.r.ifa_family = AF_INET;
+
+	/*
+	 * Fill up all the attributes for the rtnetlink header.
+	 */
+	assert(&req.rta == (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.n.nlmsg_len)));
+	req.rta.rta_len = RTA_LENGTH(16);
+
+	/* Send and recv the message from kernel */
+	ret = send(netlink_fd, &req, req.n.nlmsg_len, 0);
+	if (ret < 0) {
+		SPDK_ERRLOG("netlink send failed: %s\n", spdk_strerror(errno));
+		ret = 1;
+		goto exit;
+	}
+
+	ret = recv(netlink_fd, buf, sizeof(buf), 0);
+	if (ret <= 0) {
+		SPDK_ERRLOG("netlink recv failed: %s\n", spdk_strerror(errno));
+		ret = 1;
+		goto exit;
+	}
+
+	for (nlmp = (struct nlmsghdr *)buf; ret > (int)sizeof(*nlmp);) {
+		int len = nlmp->nlmsg_len;
+		int req_len = len - sizeof(*nlmp);
+
+		if (req_len < 0 || len > ret) {
+			SPDK_ERRLOG("error\n");
+			ret = 1;
+			goto exit;
+		}
+
+		if (!NLMSG_OK(nlmp, (uint32_t)ret)) {
+			SPDK_ERRLOG("NLMSG not OK\n");
+			ret = 1;
+			goto exit;
+		}
+
+		rtmp = (struct ifaddrmsg *)NLMSG_DATA(nlmp);
+		rtatp = (struct rtattr *)IFA_RTA(rtmp);
+
+		rtattrlen = IFA_PAYLOAD(nlmp);
+
+		for (; RTA_OK(rtatp, rtattrlen); rtatp = RTA_NEXT(rtatp, rtattrlen)) {
+			if (rtatp->rta_type == IFA_LOCAL) {
+				memcpy(&ipv4_addr, (struct in_addr *)RTA_DATA(rtatp),
+				       sizeof(struct in_addr));
+				TAILQ_FOREACH(ifc, &g_interface_head, tailq) {
+					if (ifc->index == rtmp->ifa_index) {
+						/* add a new IP address to interface */
+						if (ifc->num_ip_addresses >= SPDK_MAX_IP_PER_IFC) {
+							SPDK_ERRLOG("SPDK: number of IP addresses supported for %s excceded. limit=%d\n",
+								    ifc->name,
+								    SPDK_MAX_IP_PER_IFC);
+							break;
+						}
+						ifc->ip_address[ifc->num_ip_addresses] = ipv4_addr;
+						ifc->num_ip_addresses++;
+						break;
+					}
+				}
+			}
+		}
+		ret -= NLMSG_ALIGN(len);
+		nlmp = (struct nlmsghdr *)((char *)nlmp + NLMSG_ALIGN(len));
+	}
+	ret = 0;
+
+exit:
+	close(netlink_fd);
+	return ret;
+}
+
+
+static int spdk_process_new_interface_msg(struct nlmsghdr *h)
+{
+	int len;
+	struct spdk_interface *ifc;
+	struct ifinfomsg *iface;
+	struct rtattr *attribute;
+
+	iface = (struct ifinfomsg *)NLMSG_DATA(h);
+
+	ifc = (struct spdk_interface *) malloc(sizeof(*ifc));
+	if (ifc == NULL) {
+		SPDK_ERRLOG("%s: Malloc failed\n", __func__);
+		return 1;
+	}
+
+	memset(ifc, 0, sizeof(*ifc));
+
+	/* Set interface index */
+	ifc->index = iface->ifi_index;
+
+	len = h->nlmsg_len - NLMSG_LENGTH(sizeof(*iface));
+
+	/* Loop over all attributes for the NEWLINK message */
+	for (attribute = IFLA_RTA(iface); RTA_OK(attribute, len); attribute = RTA_NEXT(attribute, len)) {
+		switch (attribute->rta_type) {
+		case IFLA_IFNAME:
+			if (if_indextoname(iface->ifi_index, ifc->name) == NULL) {
+				SPDK_ERRLOG("Indextoname failed!\n");
+				free(ifc);
+				return 2;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	TAILQ_INSERT_TAIL(&g_interface_head, ifc, tailq);
+	return 0;
+}
+
+static int spdk_prepare_ifc_list(void)
+{
+	int ret = 0;
+	struct nl_req_s {
+		struct nlmsghdr hdr;
+		struct rtgenmsg gen;
+		struct ifinfomsg ifi;
+	};
+	int netlink_fd;
+	struct sockaddr_nl local;	/* Our local (user space) side of the communication */
+	struct sockaddr_nl kernel;	/* The remote (kernel space) side of the communication */
+
+	struct msghdr rtnl_msg;		/* Generic msghdr struct for use with sendmsg */
+	struct iovec io;		/* IO vector for sendmsg */
+
+	struct nl_req_s req;		/* Structure that describes the rtnetlink packet itself */
+	char reply[16384];		/* a large buffer to receive lots of link information */
+
+	pid_t pid = getpid();		/* Our process ID to build the correct netlink address */
+	int end = 0;			/* some flag to end loop parsing */
+
+	/*
+	 * Prepare netlink socket for kernel/user space communication
+	 */
+	netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (netlink_fd < 0) {
+		SPDK_ERRLOG("socket failed!\n");
+		return 1;
+	}
+
+	memset(&local, 0, sizeof(local)); /* Fill-in local address information */
+	local.nl_family = AF_NETLINK;
+	local.nl_pid = pid;
+	local.nl_groups = 0;
+
+	/* RTNL socket is ready to use, prepare and send L2 request. */
+	memset(&rtnl_msg, 0, sizeof(rtnl_msg));
+	memset(&kernel, 0, sizeof(kernel));
+	memset(&req, 0, sizeof(req));
+
+	kernel.nl_family = AF_NETLINK; /* Fill-in kernel address (destination of our message) */
+
+	req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+	req.hdr.nlmsg_type = RTM_GETLINK;
+	req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	req.hdr.nlmsg_seq = 1;
+	req.hdr.nlmsg_pid = pid;
+
+	req.ifi.ifi_family = AF_UNSPEC;
+	req.ifi.ifi_type = 1;
+
+	io.iov_base = &req;
+	io.iov_len = req.hdr.nlmsg_len;
+	rtnl_msg.msg_iov = &io;
+	rtnl_msg.msg_iovlen = 1;
+	rtnl_msg.msg_name = &kernel;
+	rtnl_msg.msg_namelen = sizeof(kernel);
+
+	if (sendmsg(netlink_fd, &rtnl_msg, 0) == -1) {
+		SPDK_ERRLOG("Sendmsg failed!\n");
+		ret = 1;
+		goto exit;
+	}
+
+	/* Parse reply */
+	while (!end) {
+		int len;
+		struct nlmsghdr *msg_ptr;	/* Pointer to current message part */
+
+		struct msghdr rtnl_reply;	/* Generic msghdr structure for use with recvmsg */
+		struct iovec io_reply;
+
+		memset(&io_reply, 0, sizeof(io_reply));
+		memset(&rtnl_reply, 0, sizeof(rtnl_reply));
+
+		io.iov_base = reply;
+		io.iov_len = 8192;
+		rtnl_reply.msg_iov = &io;
+		rtnl_reply.msg_iovlen = 1;
+		rtnl_reply.msg_name = &kernel;
+		rtnl_reply.msg_namelen = sizeof(kernel);
+
+		/* Read as much data as fits in the receive buffer */
+		len = recvmsg(netlink_fd, &rtnl_reply, 0);
+		if (len) {
+			for (msg_ptr = (struct nlmsghdr *) reply; NLMSG_OK(msg_ptr, (uint32_t)len);
+			     msg_ptr = NLMSG_NEXT(msg_ptr, len)) {
+				switch (msg_ptr->nlmsg_type) {
+				case NLMSG_DONE:		/* This is the special meaning NLMSG_DONE message we asked for by using NLM_F_DUMP flag */
+					end++;
+					break;
+				case RTM_NEWLINK:	/* This is a RTM_NEWLINK message, which contains lots of information about a link */
+					ret = spdk_process_new_interface_msg(msg_ptr);
+					if (ret != 0) {
+						goto exit;
+					}
+					break;
+				default:
+					break;
+				}
+			}
+		}
+	}
+exit:
+	close(netlink_fd);
+	return ret;
+}
+
+static int spdk_interface_available(uint32_t ifc_index)
+{
+	struct spdk_interface *ifc_entry;
+
+	pthread_mutex_lock(&interface_lock);
+	TAILQ_FOREACH(ifc_entry, &g_interface_head, tailq) {
+		if (ifc_entry->index == ifc_index) {
+			pthread_mutex_unlock(&interface_lock);
+			return 0;
+		}
+	}
+	pthread_mutex_unlock(&interface_lock);
+
+	return -1;
+}
+
+static int netlink_addr_msg(uint32_t ifc_idx, uint32_t ip_address, uint32_t create)
+{
+	int fd;
+	struct sockaddr_nl la;
+	struct sockaddr_nl pa;
+	struct msghdr msg;
+	struct iovec iov;
+	int ifal;
+	struct {
+		struct nlmsghdr n;
+		struct ifaddrmsg r;
+		char buf[16384];
+	} req;
+	struct rtattr *rta;
+
+	if (spdk_interface_available(ifc_idx)) {
+		return -1;
+	}
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd < 0) {
+		SPDK_ERRLOG("socket failed!\n");
+		return -1;
+	}
+
+	/* setup local address & bind using this address. */
+	bzero(&la, sizeof(la));
+	la.nl_family = AF_NETLINK;
+	la.nl_pid = getpid();
+	bind(fd, (struct sockaddr *) &la, sizeof(la));
+
+	/* initialize RTNETLINK request buffer. */
+	bzero(&req, sizeof(req));
+
+	/* compute the initial length of the service request. */
+	ifal = sizeof(struct ifaddrmsg);
+
+	/* add first attrib: set IP addr and RTNETLINK buffer size. */
+	rta = (struct rtattr *) req.buf;
+	rta->rta_type = IFA_ADDRESS;
+	rta->rta_len = sizeof(struct rtattr) + 4;
+	memcpy(((char *)rta) + sizeof(struct rtattr), &ip_address, sizeof(ip_address));
+	ifal += rta->rta_len;
+
+	/* add second attrib. */
+	rta = (struct rtattr *)(((char *)rta) + rta->rta_len);
+	rta->rta_type = IFA_LOCAL;
+	rta->rta_len = sizeof(struct rtattr) + 4;
+	memcpy(((char *)rta) + sizeof(struct rtattr), &ip_address, sizeof(ip_address));
+	ifal += rta->rta_len;
+
+	/* setup the NETLINK header. */
+	req.n.nlmsg_len = NLMSG_LENGTH(ifal);
+	if (create) {
+		req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_APPEND;
+		req.n.nlmsg_type = RTM_NEWADDR;
+	} else {
+		req.n.nlmsg_flags = NLM_F_REQUEST;
+		req.n.nlmsg_type = RTM_DELADDR;
+	}
+
+	/* setup the service header (struct rtmsg). */
+	req.r.ifa_family = AF_INET;
+	req.r.ifa_prefixlen = 32; /* hardcoded */
+	req.r.ifa_flags = IFA_F_PERMANENT | IFA_F_SECONDARY;
+	req.r.ifa_index = ifc_idx;
+	req.r.ifa_scope = 0;
+
+	/* create the remote address to communicate. */
+	bzero(&pa, sizeof(pa));
+	pa.nl_family = AF_NETLINK;
+
+	/* initialize & create the struct msghdr supplied to the sendmsg() function. */
+	bzero(&msg, sizeof(msg));
+	msg.msg_name = (void *) &pa;
+	msg.msg_namelen = sizeof(pa);
+
+	/* place the pointer & size of the RTNETLINK message in the struct msghdr. */
+	iov.iov_base = (void *) &req.n;
+	iov.iov_len = req.n.nlmsg_len;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	/* send the RTNETLINK message to kernel. */
+	sendmsg(fd, &msg, 0);
+	close(fd);
+	return 0;
+}
+
+static void spdk_interface_ip_update(void)
+{
+	struct spdk_interface *ifc_entry;
+
+	pthread_mutex_lock(&interface_lock);
+	TAILQ_FOREACH(ifc_entry, &g_interface_head, tailq) {
+		ifc_entry->num_ip_addresses = 0;
+		memset(ifc_entry->ip_address, 0, sizeof(ifc_entry->ip_address));
+	}
+	spdk_get_ifc_ipv4();
+	pthread_mutex_unlock(&interface_lock);
+}
+
+int
+spdk_interface_init(void)
+{
+	int rc = 0;
+
+	TAILQ_INIT(&g_interface_head);
+	rc = spdk_prepare_ifc_list();
+	if (!rc) {
+		rc = spdk_get_ifc_ipv4();
+	}
+
+	return rc;
+}
+
+void
+spdk_interface_destroy(void)
+{
+	struct spdk_interface *ifc_entry;
+
+	while (!TAILQ_EMPTY(&g_interface_head)) {
+		ifc_entry = TAILQ_FIRST(&g_interface_head);
+		TAILQ_REMOVE(&g_interface_head, ifc_entry, tailq);
+		free(ifc_entry);
+	}
+}
+
+int
+spdk_interface_add_ip_address(int ifc_index, char *ip_addr)
+{
+	uint32_t addr;
+
+	addr = inet_addr(ip_addr);
+	return netlink_addr_msg(ifc_index, addr, 1);
+}
+
+int
+spdk_interface_delete_ip_address(int ifc_index, char *ip_addr)
+{
+	uint32_t addr;
+
+	addr = inet_addr(ip_addr);
+	return netlink_addr_msg(ifc_index, addr, 0);
+}
+
+void *spdk_interface_get_list(void)
+{
+	spdk_interface_ip_update();
+	return &g_interface_head;
+}
+
+#else /* Not Linux */
+
+int
+spdk_interface_init(void)
+{
+	return 0;
+}
+
+void
+spdk_interface_destroy(void)
+{
+}
+
+int
+spdk_interface_add_ip_address(int ifc_index, char *ip_addr)
+{
+	return -1;
+}
+
+int
+spdk_interface_delete_ip_address(int ifc_index, char *ip_addr)
+{
+	return -1;
+}
+
+void *
+spdk_interface_get_list(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/src/spdk/lib/net/net_internal.h b/src/spdk/lib/net/net_internal.h
new file mode 100644
index 00000000..8dbaf633
--- /dev/null
+++ b/src/spdk/lib/net/net_internal.h
@@ -0,0 +1,79 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NET_INTERNAL_H
+#define SPDK_NET_INTERNAL_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+
+#define SPDK_IFNAMSIZE		32
+#define SPDK_MAX_IP_PER_IFC	32
+
+struct spdk_interface {
+	char name[SPDK_IFNAMSIZE];
+	uint32_t index;
+	uint32_t num_ip_addresses; /* number of IP addresses defined */
+	uint32_t ip_address[SPDK_MAX_IP_PER_IFC];
+	TAILQ_ENTRY(spdk_interface)	tailq;
+};
+
+/**
+ * Add an ip address to the network interface.
+ *
+ * \param ifc_index Index of the network interface.
+ * \param ip_addr Ip address to add.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_interface_add_ip_address(int ifc_index, char *ip_addr);
+
+/**
+ * Delete an ip address from the network interface.
+ *
+ * \param ifc_index Index of the network interface.
+ * \param ip_addr Ip address to delete.
+ *
+ * \return 0 on success, -1 on failure.
+ */
+int spdk_interface_delete_ip_address(int ifc_index, char *ip_addr);
+
+/**
+ * Get the list of all the network interfaces.
+ *
+ * \return a pointer to the head of the linked list of all the network interfaces.
+ */
+void *spdk_interface_get_list(void);
+
+#endif /* SPDK_NET_INTERNAL_H */
diff --git a/src/spdk/lib/net/net_rpc.c b/src/spdk/lib/net/net_rpc.c
new file mode 100644
index 00000000..aaaf6865
--- /dev/null
+++ b/src/spdk/lib/net/net_rpc.c
@@ -0,0 +1,180 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "net_internal.h"
+
+#include "spdk/stdinc.h"
+
+#include "spdk/rpc.h"
+#include "spdk/net.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_ip_address {
+	int32_t ifc_index;
+	char *ip_address;
+};
+
+static void
+free_rpc_ip_address(struct rpc_ip_address *req)
+{
+	free(req->ip_address);
+}
+
+static const struct spdk_json_object_decoder rpc_ip_address_decoders[] = {
+	{"ifc_index", offsetof(struct rpc_ip_address, ifc_index), spdk_json_decode_int32},
+	{"ip_address", offsetof(struct rpc_ip_address, ip_address), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_add_ip_address(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct rpc_ip_address req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_ip_address_decoders,
+				    SPDK_COUNTOF(rpc_ip_address_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NET, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (spdk_interface_add_ip_address(req.ifc_index, req.ip_address)) {
+		goto invalid;
+	}
+
+	free_rpc_ip_address(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_ip_address(&req);
+}
+SPDK_RPC_REGISTER("add_ip_address", spdk_rpc_add_ip_address, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_delete_ip_address(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_ip_address req = {};
+	struct spdk_json_write_ctx *w;
+
+	if (spdk_json_decode_object(params, rpc_ip_address_decoders,
+				    SPDK_COUNTOF(rpc_ip_address_decoders),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NET, "spdk_json_decode_object failed\n");
+		goto invalid;
+	}
+
+	if (spdk_interface_delete_ip_address(req.ifc_index, req.ip_address)) {
+		goto invalid;
+	}
+
+	free_rpc_ip_address(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+	free_rpc_ip_address(&req);
+}
+SPDK_RPC_REGISTER("delete_ip_address", spdk_rpc_delete_ip_address, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_get_interfaces(struct spdk_jsonrpc_request *request,
+			const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	TAILQ_HEAD(, spdk_interface) *interface_head = spdk_interface_get_list();
+	struct spdk_interface *ifc;
+	char *ip_address;
+	struct in_addr inaddr;
+	uint32_t i;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_interfaces requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	TAILQ_FOREACH(ifc, interface_head, tailq) {
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "name");
+		spdk_json_write_string(w, ifc->name);
+
+		spdk_json_write_name(w, "ifc_index");
+		spdk_json_write_int32(w, ifc->index);
+
+		spdk_json_write_name(w, "ip_addr");
+		spdk_json_write_array_begin(w);
+		for (i = 0; i < ifc->num_ip_addresses; i++) {
+			memcpy(&inaddr, &ifc->ip_address[i], sizeof(uint32_t));
+			ip_address = inet_ntoa(inaddr);
+			spdk_json_write_string(w, ip_address);
+		}
+		spdk_json_write_array_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_interfaces", spdk_rpc_get_interfaces, SPDK_RPC_RUNTIME)
+
+SPDK_LOG_REGISTER_COMPONENT("net", SPDK_LOG_NET)
diff --git a/src/spdk/lib/nvme/Makefile b/src/spdk/lib/nvme/Makefile
new file mode 100644
index 00000000..3351c87c
--- /dev/null
+++ b/src/spdk/lib/nvme/Makefile
@@ -0,0 +1,61 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c nvme_ctrlr_ocssd_cmd.c \
+	nvme_ns_ocssd_cmd.c
+C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c
+LIBNAME = nvme
+LOCAL_SYS_LIBS = -luuid
+ifeq ($(CONFIG_RDMA),y)
+LOCAL_SYS_LIBS += -libverbs -lrdmacm
+#Attach only if FreeBSD and RDMA is specified with configure
+ifeq ($(OS),FreeBSD)
+# Mellanox - MLX4 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx4.*)","")
+LOCAL_SYS_LIBS += -lmlx4
+endif
+# Mellanox - MLX5 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx5.*)","")
+LOCAL_SYS_LIBS += -lmlx5
+endif
+# Chelsio HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libcxgb4.*)","")
+LOCAL_SYS_LIBS += -lcxgb4
+endif
+endif
+endif
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/nvme/nvme.c b/src/spdk/lib/nvme/nvme.c
new file mode 100644
index 00000000..dc657966
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme.c
@@ -0,0 +1,862 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/nvmf_spec.h"
+#include "nvme_internal.h"
+
+#define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
+
+struct nvme_driver	*g_spdk_nvme_driver;
+pid_t			g_spdk_nvme_pid;
+
+int32_t			spdk_nvme_retry_count;
+
+/* gross timeout of 180 seconds in milliseconds */
+static int g_nvme_driver_timeout_ms = 3 * 60 * 1000;
+
+static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_init_ctrlrs =
+	TAILQ_HEAD_INITIALIZER(g_nvme_init_ctrlrs);
+
+/* Per-process attached controller list */
+static TAILQ_HEAD(, spdk_nvme_ctrlr) g_nvme_attached_ctrlrs =
+	TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs);
+
+/* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
+static bool
+nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE;
+}
+
+/* Caller must hold g_spdk_nvme_driver->lock */
+void
+nvme_ctrlr_connected(struct spdk_nvme_ctrlr *ctrlr)
+{
+	TAILQ_INSERT_TAIL(&g_nvme_init_ctrlrs, ctrlr, tailq);
+}
+
+int
+spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
+{
+	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+
+	nvme_ctrlr_proc_put_ref(ctrlr);
+
+	if (nvme_ctrlr_get_ref_count(ctrlr) == 0) {
+		if (nvme_ctrlr_shared(ctrlr)) {
+			TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
+		} else {
+			TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq);
+		}
+		nvme_ctrlr_destruct(ctrlr);
+	}
+
+	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+	return 0;
+}
+
+void
+nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_completion_poll_status	*status = arg;
+
+	/*
+	 * Copy status into the argument passed by the caller, so that
+	 *  the caller can check the status to determine if the
+	 *  the request passed or failed.
+	 */
+	memcpy(&status->cpl, cpl, sizeof(*cpl));
+	status->done = true;
+}
+
+/**
+ * Poll qpair for completions until a command completes.
+ *
+ * \param qpair queue to poll
+ * \param status completion status
+ * \param robust_mutex optional robust mutex to lock while polling qpair
+ *
+ * \return 0 if command completed without error, negative errno on failure
+ *
+ * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
+ * and status as the callback argument.
+ */
+int
+spdk_nvme_wait_for_completion_robust_lock(
+	struct spdk_nvme_qpair *qpair,
+	struct nvme_completion_poll_status *status,
+	pthread_mutex_t *robust_mutex)
+{
+	memset(&status->cpl, 0, sizeof(status->cpl));
+	status->done = false;
+
+	while (status->done == false) {
+		if (robust_mutex) {
+			nvme_robust_mutex_lock(robust_mutex);
+		}
+
+		spdk_nvme_qpair_process_completions(qpair, 0);
+
+		if (robust_mutex) {
+			nvme_robust_mutex_unlock(robust_mutex);
+		}
+	}
+
+	return spdk_nvme_cpl_is_error(&status->cpl) ? -EIO : 0;
+}
+
+int
+spdk_nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
+			      struct nvme_completion_poll_status *status)
+{
+	return spdk_nvme_wait_for_completion_robust_lock(qpair, status, NULL);
+}
+
+static void
+nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_request *req = arg;
+	enum spdk_nvme_data_transfer xfer;
+
+	if (req->user_buffer && req->payload_size) {
+		/* Copy back to the user buffer and free the contig buffer */
+		assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
+		xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
+		if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
+		    xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
+			assert(req->pid == getpid());
+			memcpy(req->user_buffer, req->payload.contig_or_cb_arg, req->payload_size);
+		}
+
+		spdk_dma_free(req->payload.contig_or_cb_arg);
+	}
+
+	/* Call the user's original callback now that the buffer has been copied */
+	req->user_cb_fn(req->user_cb_arg, cpl);
+}
+
+/**
+ * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
+ *
+ * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
+ * where the overhead of a copy is not a problem.
+ */
+struct nvme_request *
+nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
+				void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
+				void *cb_arg, bool host_to_controller)
+{
+	struct nvme_request *req;
+	void *dma_buffer = NULL;
+	uint64_t phys_addr;
+
+	if (buffer && payload_size) {
+		dma_buffer = spdk_zmalloc(payload_size, 4096, &phys_addr,
+					  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
+		if (!dma_buffer) {
+			return NULL;
+		}
+
+		if (host_to_controller) {
+			memcpy(dma_buffer, buffer, payload_size);
+		}
+	}
+
+	req = nvme_allocate_request_contig(qpair, dma_buffer, payload_size, nvme_user_copy_cmd_complete,
+					   NULL);
+	if (!req) {
+		spdk_free(dma_buffer);
+		return NULL;
+	}
+
+	req->user_cb_fn = cb_fn;
+	req->user_cb_arg = cb_arg;
+	req->user_buffer = buffer;
+	req->cb_arg = req;
+
+	return req;
+}
+
+/**
+ * Check if a request has exceeded the controller timeout.
+ *
+ * \param req request to check for timeout.
+ * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
+ * \param active_proc per-process data for the controller associated with req
+ * \param now_tick current time from spdk_get_ticks()
+ * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
+ * 1 if requests newer than req need not be checked.
+ *
+ * The request's timeout callback will be called if needed; the caller is only responsible for
+ * calling this function on each outstanding request.
+ */
+int
+nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
+			   struct spdk_nvme_ctrlr_process *active_proc,
+			   uint64_t now_tick)
+{
+	struct spdk_nvme_qpair *qpair = req->qpair;
+	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
+
+	assert(active_proc->timeout_cb_fn != NULL);
+
+	if (req->timed_out || req->submit_tick == 0) {
+		return 0;
+	}
+
+	if (req->pid != g_spdk_nvme_pid) {
+		return 0;
+	}
+
+	if (nvme_qpair_is_admin_queue(qpair) &&
+	    req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
+		return 0;
+	}
+
+	if (req->submit_tick + active_proc->timeout_ticks > now_tick) {
+		return 1;
+	}
+
+	req->timed_out = true;
+
+	/*
+	 * We don't want to expose the admin queue to the user,
+	 * so when we're timing out admin commands set the
+	 * qpair to NULL.
+	 */
+	active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr,
+				   nvme_qpair_is_admin_queue(qpair) ? NULL : qpair,
+				   cid);
+	return 0;
+}
+
+int
+nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
+{
+	int rc = 0;
+
+#ifdef __FreeBSD__
+	pthread_mutex_init(mtx, NULL);
+#else
+	pthread_mutexattr_t attr;
+
+	if (pthread_mutexattr_init(&attr)) {
+		return -1;
+	}
+	if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
+	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
+	    pthread_mutex_init(mtx, &attr)) {
+		rc = -1;
+	}
+	pthread_mutexattr_destroy(&attr);
+#endif
+
+	return rc;
+}
+
+int
+nvme_driver_init(void)
+{
+	int ret = 0;
+	/* Any socket ID */
+	int socket_id = -1;
+
+	/* Each process needs its own pid. */
+	g_spdk_nvme_pid = getpid();
+
+	/*
+	 * Only one thread from one process will do this driver init work.
+	 * The primary process will reserve the shared memory and do the
+	 *  initialization.
+	 * The secondary process will lookup the existing reserved memory.
+	 */
+	if (spdk_process_is_primary()) {
+		/* The unique named memzone already reserved. */
+		if (g_spdk_nvme_driver != NULL) {
+			return 0;
+		} else {
+			g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
+					     sizeof(struct nvme_driver), socket_id,
+					     SPDK_MEMZONE_NO_IOVA_CONTIG);
+		}
+
+		if (g_spdk_nvme_driver == NULL) {
+			SPDK_ERRLOG("primary process failed to reserve memory\n");
+
+			return -1;
+		}
+	} else {
+		g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
+
+		/* The unique named memzone already reserved by the primary process. */
+		if (g_spdk_nvme_driver != NULL) {
+			int ms_waited = 0;
+
+			/* Wait the nvme driver to get initialized. */
+			while ((g_spdk_nvme_driver->initialized == false) &&
+			       (ms_waited < g_nvme_driver_timeout_ms)) {
+				ms_waited++;
+				nvme_delay(1000); /* delay 1ms */
+			}
+			if (g_spdk_nvme_driver->initialized == false) {
+				SPDK_ERRLOG("timeout waiting for primary process to init\n");
+
+				return -1;
+			}
+		} else {
+			SPDK_ERRLOG("primary process is not started yet\n");
+
+			return -1;
+		}
+
+		return 0;
+	}
+
+	/*
+	 * At this moment, only one thread from the primary process will do
+	 * the g_spdk_nvme_driver initialization
+	 */
+	assert(spdk_process_is_primary());
+
+	ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
+	if (ret != 0) {
+		SPDK_ERRLOG("failed to initialize mutex\n");
+		spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
+		return ret;
+	}
+
+	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+
+	g_spdk_nvme_driver->initialized = false;
+
+	TAILQ_INIT(&g_spdk_nvme_driver->shared_attached_ctrlrs);
+
+	spdk_uuid_generate(&g_spdk_nvme_driver->default_extended_host_id);
+
+	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+
+	return ret;
+}
+
+int
+nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, void *devhandle,
+		 spdk_nvme_probe_cb probe_cb, void *cb_ctx)
+{
+	struct spdk_nvme_ctrlr *ctrlr;
+	struct spdk_nvme_ctrlr_opts opts;
+
+	assert(trid != NULL);
+
+	spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
+
+	if (!probe_cb || probe_cb(cb_ctx, trid, &opts)) {
+		ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
+		if (ctrlr == NULL) {
+			SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid->traddr);
+			return -1;
+		}
+
+		TAILQ_INSERT_TAIL(&g_nvme_init_ctrlrs, ctrlr, tailq);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+nvme_init_controllers(void *cb_ctx, spdk_nvme_attach_cb attach_cb)
+{
+	int rc = 0;
+	int start_rc;
+	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
+
+	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+
+	/* Initialize all new controllers in the g_nvme_init_ctrlrs list in parallel. */
+	while (!TAILQ_EMPTY(&g_nvme_init_ctrlrs)) {
+		TAILQ_FOREACH_SAFE(ctrlr, &g_nvme_init_ctrlrs, tailq, ctrlr_tmp) {
+			/* Drop the driver lock while calling nvme_ctrlr_process_init()
+			 *  since it needs to acquire the driver lock internally when initializing
+			 *  controller.
+			 *
+			 * TODO: Rethink the locking - maybe reset should take the lock so that start() and
+			 *  the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
+			 *  can assume it is held.
+			 */
+			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+			start_rc = nvme_ctrlr_process_init(ctrlr);
+			nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+
+			if (start_rc) {
+				/* Controller failed to initialize. */
+				TAILQ_REMOVE(&g_nvme_init_ctrlrs, ctrlr, tailq);
+				SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr->trid.traddr);
+				nvme_ctrlr_destruct(ctrlr);
+				rc = -1;
+				break;
+			}
+
+			if (ctrlr->state == NVME_CTRLR_STATE_READY) {
+				/*
+				 * Controller has been initialized.
+				 *  Move it to the attached_ctrlrs list.
+				 */
+				TAILQ_REMOVE(&g_nvme_init_ctrlrs, ctrlr, tailq);
+				if (nvme_ctrlr_shared(ctrlr)) {
+					TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq);
+				} else {
+					TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs, ctrlr, tailq);
+				}
+
+				/*
+				 * Increase the ref count before calling attach_cb() as the user may
+				 * call nvme_detach() immediately.
+				 */
+				nvme_ctrlr_proc_get_ref(ctrlr);
+
+				/*
+				 * Unlock while calling attach_cb() so the user can call other functions
+				 *  that may take the driver lock, like nvme_detach().
+				 */
+				if (attach_cb) {
+					nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+					attach_cb(cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
+					nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+				}
+
+				break;
+			}
+		}
+	}
+
+	g_spdk_nvme_driver->initialized = true;
+
+	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+	return rc;
+}
+
+/* This function must not be called while holding g_spdk_nvme_driver->lock */
+static struct spdk_nvme_ctrlr *
+spdk_nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvme_ctrlr *ctrlr;
+
+	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+	ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(trid);
+	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+
+	return ctrlr;
+}
+
+/* This function must be called while holding g_spdk_nvme_driver->lock */
+struct spdk_nvme_ctrlr *
+spdk_nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvme_ctrlr *ctrlr;
+
+	/* Search per-process list */
+	TAILQ_FOREACH(ctrlr, &g_nvme_attached_ctrlrs, tailq) {
+		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
+			return ctrlr;
+		}
+	}
+
+	/* Search multi-process shared list */
+	TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
+		if (spdk_nvme_transport_id_compare(&ctrlr->trid, trid) == 0) {
+			return ctrlr;
+		}
+	}
+
+	return NULL;
+}
+
+/* This function must only be called while holding g_spdk_nvme_driver->lock */
+static int
+spdk_nvme_probe_internal(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
+			 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
+			 spdk_nvme_remove_cb remove_cb, struct spdk_nvme_ctrlr **connected_ctrlr)
+{
+	int rc;
+	struct spdk_nvme_ctrlr *ctrlr;
+	bool direct_connect = (connected_ctrlr != NULL);
+
+	if (!spdk_nvme_transport_available(trid->trtype)) {
+		SPDK_ERRLOG("NVMe trtype %u not available\n", trid->trtype);
+		return -1;
+	}
+
+	nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+
+	nvme_transport_ctrlr_scan(trid, cb_ctx, probe_cb, remove_cb, direct_connect);
+
+	/*
+	 * Probe controllers on the shared_attached_ctrlrs list
+	 */
+	if (!spdk_process_is_primary() && (trid->trtype == SPDK_NVME_TRANSPORT_PCIE)) {
+		TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) {
+			/* Do not attach other ctrlrs if user specify a valid trid */
+			if ((strlen(trid->traddr) != 0) &&
+			    (spdk_nvme_transport_id_compare(trid, &ctrlr->trid))) {
+				continue;
+			}
+
+			nvme_ctrlr_proc_get_ref(ctrlr);
+
+			/*
+			 * Unlock while calling attach_cb() so the user can call other functions
+			 *  that may take the driver lock, like nvme_detach().
+			 */
+			if (attach_cb) {
+				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+				attach_cb(cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
+				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+			}
+		}
+
+		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+
+		rc = 0;
+
+		goto exit;
+	}
+
+	nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+	/*
+	 * Keep going even if one or more nvme_attach() calls failed,
+	 *  but maintain the value of rc to signal errors when we return.
+	 */
+
+	rc = nvme_init_controllers(cb_ctx, attach_cb);
+
+exit:
+	if (connected_ctrlr) {
+		*connected_ctrlr = spdk_nvme_get_ctrlr_by_trid(trid);
+	}
+
+	return rc;
+}
+
+int
+spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
+		spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
+		spdk_nvme_remove_cb remove_cb)
+{
+	int rc;
+	struct spdk_nvme_transport_id trid_pcie;
+
+	rc = nvme_driver_init();
+	if (rc != 0) {
+		return rc;
+	}
+
+	if (trid == NULL) {
+		memset(&trid_pcie, 0, sizeof(trid_pcie));
+		trid_pcie.trtype = SPDK_NVME_TRANSPORT_PCIE;
+		trid = &trid_pcie;
+	}
+
+	return spdk_nvme_probe_internal(trid, cb_ctx, probe_cb, attach_cb, remove_cb, NULL);
+}
+
+static bool
+spdk_nvme_connect_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+			   struct spdk_nvme_ctrlr_opts *opts)
+{
+	struct spdk_nvme_ctrlr_connect_opts *requested_opts = cb_ctx;
+
+	assert(requested_opts->opts);
+
+	assert(requested_opts->opts_size != 0);
+
+	memcpy(opts, requested_opts->opts, spdk_min(sizeof(*opts), requested_opts->opts_size));
+
+	return true;
+}
+
+struct spdk_nvme_ctrlr *
+spdk_nvme_connect(const struct spdk_nvme_transport_id *trid,
+		  const struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
+{
+	int rc;
+	struct spdk_nvme_ctrlr_connect_opts connect_opts = {};
+	struct spdk_nvme_ctrlr_connect_opts *user_connect_opts = NULL;
+	struct spdk_nvme_ctrlr *ctrlr = NULL;
+	spdk_nvme_probe_cb probe_cb = NULL;
+
+	if (trid == NULL) {
+		SPDK_ERRLOG("No transport ID specified\n");
+		return NULL;
+	}
+
+	rc = nvme_driver_init();
+	if (rc != 0) {
+		return NULL;
+	}
+
+	if (opts && opts_size > 0) {
+		connect_opts.opts = opts;
+		connect_opts.opts_size = opts_size;
+		user_connect_opts = &connect_opts;
+		probe_cb = spdk_nvme_connect_probe_cb;
+	}
+
+	spdk_nvme_probe_internal(trid, user_connect_opts, probe_cb, NULL, NULL, &ctrlr);
+
+	return ctrlr;
+}
+
+int
+spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
+{
+	if (trtype == NULL || str == NULL) {
+		return -EINVAL;
+	}
+
+	if (strcasecmp(str, "PCIe") == 0) {
+		*trtype = SPDK_NVME_TRANSPORT_PCIE;
+	} else if (strcasecmp(str, "RDMA") == 0) {
+		*trtype = SPDK_NVME_TRANSPORT_RDMA;
+	} else if (strcasecmp(str, "FC") == 0) {
+		*trtype = SPDK_NVME_TRANSPORT_FC;
+	} else {
+		return -ENOENT;
+	}
+	return 0;
+}
+
+const char *
+spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
+{
+	switch (trtype) {
+	case SPDK_NVME_TRANSPORT_PCIE:
+		return "PCIe";
+	case SPDK_NVME_TRANSPORT_RDMA:
+		return "RDMA";
+	case SPDK_NVME_TRANSPORT_FC:
+		return "FC";
+	default:
+		return NULL;
+	}
+}
+
+int
+spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
+{
+	if (adrfam == NULL || str == NULL) {
+		return -EINVAL;
+	}
+
+	if (strcasecmp(str, "IPv4") == 0) {
+		*adrfam = SPDK_NVMF_ADRFAM_IPV4;
+	} else if (strcasecmp(str, "IPv6") == 0) {
+		*adrfam = SPDK_NVMF_ADRFAM_IPV6;
+	} else if (strcasecmp(str, "IB") == 0) {
+		*adrfam = SPDK_NVMF_ADRFAM_IB;
+	} else if (strcasecmp(str, "FC") == 0) {
+		*adrfam = SPDK_NVMF_ADRFAM_FC;
+	} else {
+		return -ENOENT;
+	}
+	return 0;
+}
+
+const char *
+spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam)
+{
+	switch (adrfam) {
+	case SPDK_NVMF_ADRFAM_IPV4:
+		return "IPv4";
+	case SPDK_NVMF_ADRFAM_IPV6:
+		return "IPv6";
+	case SPDK_NVMF_ADRFAM_IB:
+		return "IB";
+	case SPDK_NVMF_ADRFAM_FC:
+		return "FC";
+	default:
+		return NULL;
+	}
+}
+
+int
+spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
+{
+	const char *sep, *sep1;
+	const char *whitespace = " \t\n";
+	size_t key_len, val_len;
+	char key[32];
+	char val[1024];
+
+	if (trid == NULL || str == NULL) {
+		return -EINVAL;
+	}
+
+	while (*str != '\0') {
+		str += strspn(str, whitespace);
+
+		sep = strchr(str, ':');
+		if (!sep) {
+			sep = strchr(str, '=');
+			if (!sep) {
+				SPDK_ERRLOG("Key without ':' or '=' separator\n");
+				return -EINVAL;
+			}
+		} else {
+			sep1 = strchr(str, '=');
+			if ((sep1 != NULL) && (sep1 < sep)) {
+				sep = sep1;
+			}
+		}
+
+		key_len = sep - str;
+		if (key_len >= sizeof(key)) {
+			SPDK_ERRLOG("Transport key length %zu greater than maximum allowed %zu\n",
+				    key_len, sizeof(key) - 1);
+			return -EINVAL;
+		}
+
+		memcpy(key, str, key_len);
+		key[key_len] = '\0';
+
+		str += key_len + 1; /* Skip key: */
+		val_len = strcspn(str, whitespace);
+		if (val_len == 0) {
+			SPDK_ERRLOG("Key without value\n");
+			return -EINVAL;
+		}
+
+		if (val_len >= sizeof(val)) {
+			SPDK_ERRLOG("Transport value length %zu greater than maximum allowed %zu\n",
+				    val_len, sizeof(val) - 1);
+			return -EINVAL;
+		}
+
+		memcpy(val, str, val_len);
+		val[val_len] = '\0';
+
+		str += val_len;
+
+		if (strcasecmp(key, "trtype") == 0) {
+			if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
+				SPDK_ERRLOG("Unknown trtype '%s'\n", val);
+				return -EINVAL;
+			}
+		} else if (strcasecmp(key, "adrfam") == 0) {
+			if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
+				SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
+				return -EINVAL;
+			}
+		} else if (strcasecmp(key, "traddr") == 0) {
+			if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
+				SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
+					    val_len, SPDK_NVMF_TRADDR_MAX_LEN);
+				return -EINVAL;
+			}
+			memcpy(trid->traddr, val, val_len + 1);
+		} else if (strcasecmp(key, "trsvcid") == 0) {
+			if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
+				SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
+					    val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
+				return -EINVAL;
+			}
+			memcpy(trid->trsvcid, val, val_len + 1);
+		} else if (strcasecmp(key, "subnqn") == 0) {
+			if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
+				SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
+					    val_len, SPDK_NVMF_NQN_MAX_LEN);
+				return -EINVAL;
+			}
+			memcpy(trid->subnqn, val, val_len + 1);
+		} else {
+			SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
+		}
+	}
+
+	return 0;
+}
+
+static int
+cmp_int(int a, int b)
+{
+	return a - b;
+}
+
+int
+spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
+			       const struct spdk_nvme_transport_id *trid2)
+{
+	int cmp;
+
+	cmp = cmp_int(trid1->trtype, trid2->trtype);
+	if (cmp) {
+		return cmp;
+	}
+
+	if (trid1->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+		struct spdk_pci_addr pci_addr1;
+		struct spdk_pci_addr pci_addr2;
+
+		/* Normalize PCI addresses before comparing */
+		if (spdk_pci_addr_parse(&pci_addr1, trid1->traddr) < 0 ||
+		    spdk_pci_addr_parse(&pci_addr2, trid2->traddr) < 0) {
+			return -1;
+		}
+
+		/* PCIe transport ID only uses trtype and traddr */
+		return spdk_pci_addr_compare(&pci_addr1, &pci_addr2);
+	}
+
+	cmp = strcasecmp(trid1->traddr, trid2->traddr);
+	if (cmp) {
+		return cmp;
+	}
+
+	cmp = cmp_int(trid1->adrfam, trid2->adrfam);
+	if (cmp) {
+		return cmp;
+	}
+
+	cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
+	if (cmp) {
+		return cmp;
+	}
+
+	cmp = strcmp(trid1->subnqn, trid2->subnqn);
+	if (cmp) {
+		return cmp;
+	}
+
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME)
diff --git a/src/spdk/lib/nvme/nvme_ctrlr.c b/src/spdk/lib/nvme/nvme_ctrlr.c
new file mode 100644
index 00000000..69ae0878
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ctrlr.c
@@ -0,0 +1,2678 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvme_internal.h"
+
+#include "spdk/env.h"
+#include "spdk/string.h"
+
+static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
+		struct nvme_async_event_request *aer);
+static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
+static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
+
+static int
+nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
+{
+	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
+					      &cc->raw);
+}
+
+static int
+nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
+{
+	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
+					      &csts->raw);
+}
+
+int
+nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
+{
+	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
+					      &cap->raw);
+}
+
+int
+nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
+{
+	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
+					      &vs->raw);
+}
+
+static int
+nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
+{
+	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
+					      cc->raw);
+}
+
+void
+spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
+{
+	char host_id_str[SPDK_UUID_STRING_LEN];
+
+	assert(opts);
+
+	memset(opts, 0, opts_size);
+
+#define FIELD_OK(field) \
+	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
+
+	if (FIELD_OK(num_io_queues)) {
+		opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
+	}
+
+	if (FIELD_OK(use_cmb_sqs)) {
+		opts->use_cmb_sqs = true;
+	}
+
+	if (FIELD_OK(arb_mechanism)) {
+		opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
+	}
+
+	if (FIELD_OK(keep_alive_timeout_ms)) {
+		opts->keep_alive_timeout_ms = 10 * 1000;
+	}
+
+	if (FIELD_OK(io_queue_size)) {
+		opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
+	}
+
+	if (FIELD_OK(io_queue_requests)) {
+		opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
+	}
+
+	if (FIELD_OK(host_id)) {
+		memset(opts->host_id, 0, sizeof(opts->host_id));
+	}
+
+	if (nvme_driver_init() == 0) {
+		if (FIELD_OK(extended_host_id)) {
+			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
+			       sizeof(opts->extended_host_id));
+		}
+
+		if (FIELD_OK(hostnqn)) {
+			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
+					    &g_spdk_nvme_driver->default_extended_host_id);
+			snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str);
+		}
+	}
+
+	if (FIELD_OK(src_addr)) {
+		memset(opts->src_addr, 0, sizeof(opts->src_addr));
+	}
+
+	if (FIELD_OK(src_svcid)) {
+		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
+	}
+
+	if (FIELD_OK(command_set)) {
+		opts->command_set = SPDK_NVME_CC_CSS_NVM;
+	}
+#undef FIELD_OK
+}
+
+/**
+ * This function will be called when the process allocates the IO qpair.
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
+		qpair->active_proc = active_proc;
+	}
+}
+
+/**
+ * This function will be called when the process frees the IO qpair.
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
+	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (!active_proc) {
+		return;
+	}
+
+	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
+			   per_process_tailq, tmp_qpair) {
+		if (active_qpair == qpair) {
+			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
+				     active_qpair, per_process_tailq);
+
+			break;
+		}
+	}
+}
+
+void
+spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
+		struct spdk_nvme_io_qpair_opts *opts,
+		size_t opts_size)
+{
+	assert(ctrlr);
+
+	assert(opts);
+
+	memset(opts, 0, opts_size);
+
+#define FIELD_OK(field) \
+	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
+
+	if (FIELD_OK(qprio)) {
+		opts->qprio = SPDK_NVME_QPRIO_URGENT;
+	}
+
+	if (FIELD_OK(io_queue_size)) {
+		opts->io_queue_size = ctrlr->opts.io_queue_size;
+	}
+
+	if (FIELD_OK(io_queue_requests)) {
+		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
+	}
+
+#undef FIELD_OK
+}
+
+struct spdk_nvme_qpair *
+spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
+			       const struct spdk_nvme_io_qpair_opts *user_opts,
+			       size_t opts_size)
+{
+	uint32_t				qid;
+	struct spdk_nvme_qpair			*qpair;
+	union spdk_nvme_cc_register		cc;
+	struct spdk_nvme_io_qpair_opts		opts;
+
+	if (!ctrlr) {
+		return NULL;
+	}
+
+	/*
+	 * Get the default options, then overwrite them with the user-provided options
+	 * up to opts_size.
+	 *
+	 * This allows for extensions of the opts structure without breaking
+	 * ABI compatibility.
+	 */
+	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
+	if (user_opts) {
+		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
+	}
+
+	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
+		SPDK_ERRLOG("get_cc failed\n");
+		return NULL;
+	}
+
+	/* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
+	if ((opts.qprio & 3) != opts.qprio) {
+		return NULL;
+	}
+
+	/*
+	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
+	 * default round robin arbitration method.
+	 */
+	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) {
+		SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
+		return NULL;
+	}
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	/*
+	 * Get the first available I/O queue ID.
+	 */
+	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
+	if (qid > ctrlr->opts.num_io_queues) {
+		SPDK_ERRLOG("No free I/O queue IDs\n");
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return NULL;
+	}
+
+	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts);
+	if (qpair == NULL) {
+		SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n");
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return NULL;
+	}
+	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
+	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
+
+	nvme_ctrlr_proc_add_io_qpair(qpair);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
+		spdk_delay_us(100);
+	}
+
+	return qpair;
+}
+
+int
+spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
+{
+	struct spdk_nvme_ctrlr *ctrlr;
+
+	if (qpair == NULL) {
+		return 0;
+	}
+
+	ctrlr = qpair->ctrlr;
+
+	if (qpair->in_completion_context) {
+		/*
+		 * There are many cases where it is convenient to delete an io qpair in the context
+		 *  of that qpair's completion routine.  To handle this properly, set a flag here
+		 *  so that the completion routine will perform an actual delete after the context
+		 *  unwinds.
+		 */
+		qpair->delete_after_completion_context = 1;
+		return 0;
+	}
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	nvme_ctrlr_proc_remove_io_qpair(qpair);
+
+	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
+	spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
+
+	if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -1;
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return 0;
+}
+
+static void
+nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
+		struct spdk_nvme_intel_log_page_directory *log_page_directory)
+{
+	if (log_page_directory == NULL) {
+		return;
+	}
+
+	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
+		return;
+	}
+
+	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
+
+	if (log_page_directory->read_latency_log_len ||
+	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
+		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
+	}
+	if (log_page_directory->write_latency_log_len ||
+	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
+		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
+	}
+	if (log_page_directory->temperature_statistics_log_len) {
+		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
+	}
+	if (log_page_directory->smart_log_len) {
+		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
+	}
+	if (log_page_directory->marketing_description_log_len) {
+		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
+	}
+}
+
+static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc = 0;
+	uint64_t phys_addr = 0;
+	struct nvme_completion_poll_status	status;
+	struct spdk_nvme_intel_log_page_directory *log_page_directory;
+
+	log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
+					  64, &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
+	if (log_page_directory == NULL) {
+		SPDK_ERRLOG("could not allocate log_page_directory\n");
+		return -ENXIO;
+	}
+
+	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
+					      SPDK_NVME_GLOBAL_NS_TAG, log_page_directory,
+					      sizeof(struct spdk_nvme_intel_log_page_directory),
+					      0, nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		spdk_free(log_page_directory);
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		spdk_free(log_page_directory);
+		SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n");
+		return -ENXIO;
+	}
+
+	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
+	spdk_free(log_page_directory);
+	return 0;
+}
+
+static int
+nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int	rc = 0;
+
+	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
+	/* Mandatory pages */
+	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
+	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
+	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
+	if (ctrlr->cdata.lpa.celp) {
+		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
+	}
+	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
+		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
+	}
+
+	return rc;
+}
+
+static void
+nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
+{
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
+	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
+}
+
+static void
+nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
+{
+	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
+	/* Mandatory features */
+	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
+	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
+	/* Optional features */
+	if (ctrlr->cdata.vwc.present) {
+		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
+	}
+	if (ctrlr->cdata.apsta.supported) {
+		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
+	}
+	if (ctrlr->cdata.hmpre) {
+		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
+	}
+	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
+		nvme_ctrlr_set_intel_supported_features(ctrlr);
+	}
+}
+
+void
+nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
+{
+	/*
+	 * Set the flag here and leave the work failure of qpairs to
+	 * spdk_nvme_qpair_process_completions().
+	 */
+	if (hot_remove) {
+		ctrlr->is_removed = true;
+	}
+	ctrlr->is_failed = true;
+	SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr);
+}
+
+static void
+nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
+{
+	union spdk_nvme_cc_register	cc;
+	union spdk_nvme_csts_register	csts;
+	uint32_t			ms_waited = 0;
+	uint32_t			shutdown_timeout_ms;
+
+	if (ctrlr->is_removed) {
+		return;
+	}
+
+	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
+		SPDK_ERRLOG("get_cc() failed\n");
+		return;
+	}
+
+	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
+
+	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
+		SPDK_ERRLOG("set_cc() failed\n");
+		return;
+	}
+
+	/*
+	 * The NVMe specification defines RTD3E to be the time between
+	 *  setting SHN = 1 until the controller will set SHST = 10b.
+	 * If the device doesn't report RTD3 entry latency, or if it
+	 *  reports RTD3 entry latency less than 10 seconds, pick
+	 *  10 seconds as a reasonable amount of time to
+	 *  wait before proceeding.
+	 */
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
+	shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000;
+	shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000);
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms);
+
+	do {
+		if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
+			SPDK_ERRLOG("get_csts() failed\n");
+			return;
+		}
+
+		if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n",
+				      ms_waited);
+			return;
+		}
+
+		nvme_delay(1000);
+		ms_waited++;
+	} while (ms_waited < shutdown_timeout_ms);
+
+	SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms);
+}
+
+static int
+nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
+{
+	union spdk_nvme_cc_register	cc;
+	int				rc;
+
+	rc = nvme_transport_ctrlr_enable(ctrlr);
+	if (rc != 0) {
+		SPDK_ERRLOG("transport ctrlr_enable failed\n");
+		return rc;
+	}
+
+	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
+		SPDK_ERRLOG("get_cc() failed\n");
+		return -EIO;
+	}
+
+	if (cc.bits.en != 0) {
+		SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__);
+		return -EINVAL;
+	}
+
+	cc.bits.en = 1;
+	cc.bits.css = 0;
+	cc.bits.shn = 0;
+	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
+	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
+
+	/* Page size is 2 ^ (12 + mps). */
+	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
+
+	if (ctrlr->cap.bits.css == 0) {
+		SPDK_INFOLOG(SPDK_LOG_NVME,
+			     "Drive reports no command sets supported. Assuming NVM is supported.\n");
+		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
+	}
+
+	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n",
+			      ctrlr->opts.command_set, ctrlr->cap.bits.css);
+		return -EINVAL;
+	}
+
+	cc.bits.css = ctrlr->opts.command_set;
+
+	switch (ctrlr->opts.arb_mechanism) {
+	case SPDK_NVME_CC_AMS_RR:
+		break;
+	case SPDK_NVME_CC_AMS_WRR:
+		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
+			break;
+		}
+		return -EINVAL;
+	case SPDK_NVME_CC_AMS_VS:
+		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
+			break;
+		}
+		return -EINVAL;
+	default:
+		return -EINVAL;
+	}
+
+	cc.bits.ams = ctrlr->opts.arb_mechanism;
+
+	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
+		SPDK_ERRLOG("set_cc() failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+#ifdef DEBUG
+static const char *
+nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
+{
+	switch (state) {
+	case NVME_CTRLR_STATE_INIT_DELAY:
+		return "delay init";
+	case NVME_CTRLR_STATE_INIT:
+		return "init";
+	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
+		return "disable and wait for CSTS.RDY = 1";
+	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
+		return "disable and wait for CSTS.RDY = 0";
+	case NVME_CTRLR_STATE_ENABLE:
+		return "enable controller by writing CC.EN = 1";
+	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
+		return "wait for CSTS.RDY = 1";
+	case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
+		return "enable admin queue";
+	case NVME_CTRLR_STATE_IDENTIFY:
+		return "identify controller";
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
+		return "wait for identify controller";
+	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
+		return "set number of queues";
+	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
+		return "wait for set number of queues";
+	case NVME_CTRLR_STATE_GET_NUM_QUEUES:
+		return "get number of queues";
+	case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
+		return "wait for get number of queues";
+	case NVME_CTRLR_STATE_CONSTRUCT_NS:
+		return "construct namespaces";
+	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
+		return "identify active ns";
+	case NVME_CTRLR_STATE_IDENTIFY_NS:
+		return "identify ns";
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
+		return "wait for identify ns";
+	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
+		return "identify namespace id descriptors";
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
+		return "wait for identify namespace id descriptors";
+	case NVME_CTRLR_STATE_CONFIGURE_AER:
+		return "configure AER";
+	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
+		return "wait for configure aer";
+	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
+		return "set supported log pages";
+	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
+		return "set supported features";
+	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
+		return "set doorbell buffer config";
+	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
+		return "wait for doorbell buffer config";
+	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
+		return "set keep alive timeout";
+	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
+		return "wait for set keep alive timeout";
+	case NVME_CTRLR_STATE_SET_HOST_ID:
+		return "set host ID";
+	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
+		return "wait for set host ID";
+	case NVME_CTRLR_STATE_READY:
+		return "ready";
+	case NVME_CTRLR_STATE_ERROR:
+		return "error";
+	}
+	return "unknown";
+};
+#endif /* DEBUG */
+
+static void
+nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
+		     uint64_t timeout_in_ms)
+{
+	ctrlr->state = state;
+	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n",
+			      nvme_ctrlr_state_string(ctrlr->state));
+		ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
+			      nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
+		ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000;
+	}
+}
+
+static void
+nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
+{
+	if (ctrlr->shadow_doorbell) {
+		spdk_dma_free(ctrlr->shadow_doorbell);
+		ctrlr->shadow_doorbell = NULL;
+	}
+
+	if (ctrlr->eventidx) {
+		spdk_dma_free(ctrlr->eventidx);
+		ctrlr->eventidx = NULL;
+	}
+}
+
+static void
+nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_WARNLOG("Doorbell buffer config failed\n");
+	} else {
+		SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n",
+			     ctrlr->trid.traddr);
+	}
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc = 0;
+	uint64_t prp1, prp2;
+
+	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	/* only 1 page size for doorbell buffer */
+	ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size,
+				 &prp1);
+	if (ctrlr->shadow_doorbell == NULL) {
+		rc = -ENOMEM;
+		goto error;
+	}
+
+	ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, &prp2);
+	if (ctrlr->eventidx == NULL) {
+		rc = -ENOMEM;
+		goto error;
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, NVME_TIMEOUT_INFINITE);
+
+	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
+			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
+	if (rc != 0) {
+		goto error;
+	}
+
+	return 0;
+
+error:
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+	nvme_ctrlr_free_doorbell_buffer(ctrlr);
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc = 0;
+	struct spdk_nvme_qpair	*qpair;
+	struct nvme_request	*req, *tmp;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	if (ctrlr->is_resetting || ctrlr->is_failed) {
+		/*
+		 * Controller is already resetting or has failed.  Return
+		 *  immediately since there is no need to kick off another
+		 *  reset in these cases.
+		 */
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return 0;
+	}
+
+	ctrlr->is_resetting = true;
+
+	SPDK_NOTICELOG("resetting controller\n");
+
+	/* Free all of the queued abort requests */
+	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
+		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
+		nvme_free_request(req);
+		ctrlr->outstanding_aborts--;
+	}
+
+	/* Disable all queues before disabling the controller hardware. */
+	nvme_qpair_disable(ctrlr->adminq);
+	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
+		nvme_qpair_disable(qpair);
+	}
+
+	/* Doorbell buffer config is invalid during reset */
+	nvme_ctrlr_free_doorbell_buffer(ctrlr);
+
+	/* Set the state back to INIT to cause a full hardware reset. */
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
+
+	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
+		if (nvme_ctrlr_process_init(ctrlr) != 0) {
+			SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__);
+			nvme_ctrlr_fail(ctrlr, false);
+			rc = -1;
+			break;
+		}
+	}
+
+	if (!ctrlr->is_failed) {
+		/* Reinitialize qpairs */
+		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
+			if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) {
+				nvme_ctrlr_fail(ctrlr, false);
+				rc = -1;
+			}
+		}
+	}
+
+	ctrlr->is_resetting = false;
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+static void
+nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_ERRLOG("nvme_identify_controller failed!\n");
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	/*
+	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
+	 *  controller supports.
+	 */
+	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
+	if (ctrlr->cdata.mdts > 0) {
+		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
+						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
+	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+		ctrlr->cntlid = ctrlr->cdata.cntlid;
+	} else {
+		/*
+		 * Fabrics controllers should already have CNTLID from the Connect command.
+		 *
+		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
+		 * trust the one from Connect.
+		 */
+		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME,
+				      "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
+				      ctrlr->cdata.cntlid, ctrlr->cntlid);
+		}
+	}
+
+	if (ctrlr->cdata.sgls.supported) {
+		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
+		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int	rc;
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, NVME_TIMEOUT_INFINITE);
+
+	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
+				     &ctrlr->cdata, sizeof(ctrlr->cdata),
+				     nvme_ctrlr_identify_done, ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct nvme_completion_poll_status	status;
+	int					rc;
+	uint32_t				i;
+	uint32_t				num_pages;
+	uint32_t				next_nsid = 0;
+	uint32_t				*new_ns_list = NULL;
+
+
+	/*
+	 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list)
+	 */
+	num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1;
+	new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
+				       NULL);
+	if (!new_ns_list) {
+		SPDK_ERRLOG("Failed to allocate active_ns_list!\n");
+		return -ENOMEM;
+	}
+
+	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+		/*
+		 * Iterate through the pages and fetch each chunk of 1024 namespaces until
+		 * there are no more active namespaces
+		 */
+		for (i = 0; i < num_pages; i++) {
+			rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid,
+						     &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list),
+						     nvme_completion_poll_cb, &status);
+			if (rc != 0) {
+				goto fail;
+			}
+			if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+				SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n");
+				rc = -ENXIO;
+				goto fail;
+			}
+			next_nsid = new_ns_list[1024 * i + 1023];
+			if (next_nsid == 0) {
+				/*
+				 * No more active namespaces found, no need to fetch additional chunks
+				 */
+				break;
+			}
+		}
+
+	} else {
+		/*
+		 * Controller doesn't support active ns list CNS 0x02 so dummy up
+		 * an active ns list
+		 */
+		for (i = 0; i < ctrlr->num_ns; i++) {
+			new_ns_list[i] = i + 1;
+		}
+	}
+
+	/*
+	 * Now that that the list is properly setup, we can swap it in to the ctrlr and
+	 * free up the previous one.
+	 */
+	spdk_dma_free(ctrlr->active_ns_list);
+	ctrlr->active_ns_list = new_ns_list;
+
+	return 0;
+fail:
+	spdk_dma_free(new_ns_list);
+	return rc;
+}
+
+static void
+nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
+	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
+	uint32_t nsid;
+	int rc;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return;
+	} else {
+		nvme_ns_set_identify_data(ns);
+	}
+
+	/* move on to the next active NS */
+	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
+	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+	if (ns == NULL) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+	ns->ctrlr = ctrlr;
+	ns->id = nsid;
+
+	rc = nvme_ctrlr_identify_ns_async(ns);
+	if (rc) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+	}
+}
+
+static int
+nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
+{
+	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
+	struct spdk_nvme_ns_data *nsdata;
+
+	nsdata = &ctrlr->nsdata[ns->id - 1];
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, NVME_TIMEOUT_INFINITE);
+	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id,
+				       nsdata, sizeof(*nsdata),
+				       nvme_ctrlr_identify_ns_async_done, ns);
+}
+
+static int
+nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint32_t nsid;
+	struct spdk_nvme_ns *ns;
+	int rc;
+
+	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
+	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+	if (ns == NULL) {
+		/* No active NS, move on to the next state */
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	ns->ctrlr = ctrlr;
+	ns->id = nsid;
+
+	rc = nvme_ctrlr_identify_ns_async(ns);
+	if (rc) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+	}
+
+	return rc;
+}
+
+static void
+nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
+	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
+	uint32_t nsid;
+	int rc;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	/* move on to the next active NS */
+	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
+	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+	if (ns == NULL) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	rc = nvme_ctrlr_identify_id_desc_async(ns);
+	if (rc) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+	}
+}
+
+static int
+nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
+{
+	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
+
+	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, NVME_TIMEOUT_INFINITE);
+	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
+				       0, ns->id, ns->id_desc_list, sizeof(ns->id_desc_list),
+				       nvme_ctrlr_identify_id_desc_async_done, ns);
+}
+
+static int
+nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint32_t nsid;
+	struct spdk_nvme_ns *ns;
+	int rc;
+
+	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) ||
+	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
+	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+	if (ns == NULL) {
+		/* No active NS, move on to the next state */
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	rc = nvme_ctrlr_identify_id_desc_async(ns);
+	if (rc) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+	}
+
+	return rc;
+}
+
+static void
+nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_ERRLOG("Set Features - Number of Queues failed!\n");
+	}
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc;
+
+	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
+		SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
+			       ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
+		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
+	} else if (ctrlr->opts.num_io_queues < 1) {
+		SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
+		ctrlr->opts.num_io_queues = 1;
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
+
+	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
+					   nvme_ctrlr_set_num_queues_done, ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_get_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	uint32_t cq_allocated, sq_allocated, min_allocated, i;
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_ERRLOG("Get Features - Number of Queues failed!\n");
+		ctrlr->opts.num_io_queues = 0;
+	} else {
+		/*
+		 * Data in cdw0 is 0-based.
+		 * Lower 16-bits indicate number of submission queues allocated.
+		 * Upper 16-bits indicate number of completion queues allocated.
+		 */
+		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
+		cq_allocated = (cpl->cdw0 >> 16) + 1;
+
+		/*
+		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
+		 * submission and completion queues.
+		 */
+		min_allocated = spdk_min(sq_allocated, cq_allocated);
+
+		/* Set number of queues to be minimum of requested and actually allocated. */
+		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
+	}
+
+	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
+	if (ctrlr->free_io_qids == NULL) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
+	spdk_bit_array_clear(ctrlr->free_io_qids, 0);
+	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
+		spdk_bit_array_set(ctrlr->free_io_qids, i);
+	}
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_get_num_queues(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc;
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
+
+	/* Obtain the number of queues allocated using Get Features. */
+	rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_ctrlr_get_num_queues_done, ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	uint32_t keep_alive_interval_ms;
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
+			    cpl->status.sc, cpl->status.sct);
+		ctrlr->opts.keep_alive_timeout_ms = 0;
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n",
+			      cpl->cdw0);
+	}
+
+	ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
+
+	keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
+	if (keep_alive_interval_ms == 0) {
+		keep_alive_interval_ms = 1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
+
+	ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
+
+	/* Schedule the first Keep Alive to be sent as soon as possible. */
+	ctrlr->next_keep_alive_tick = spdk_get_ticks();
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc;
+
+	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	if (ctrlr->cdata.kas == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
+		ctrlr->opts.keep_alive_timeout_ms = 0;
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
+
+	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
+	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
+					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
+	if (rc != 0) {
+		SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
+		ctrlr->opts.keep_alive_timeout_ms = 0;
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		/*
+		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
+		 * is optional.
+		 */
+		SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
+			     cpl->status.sc, cpl->status.sct);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n");
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint8_t *host_id;
+	uint32_t host_id_size;
+	int rc;
+
+	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
+		/*
+		 * NVMe-oF sends the host ID during Connect and doesn't allow
+		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
+		 */
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n");
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n");
+		host_id = ctrlr->opts.extended_host_id;
+		host_id_size = sizeof(ctrlr->opts.extended_host_id);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n");
+		host_id = ctrlr->opts.host_id;
+		host_id_size = sizeof(ctrlr->opts.host_id);
+	}
+
+	/* If the user specified an all-zeroes host identifier, don't send the command. */
+	if (spdk_mem_all_zero(host_id, host_id_size)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME,
+			      "User did not specify host ID - not sending Set Features - Host ID\n");
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
+		return 0;
+	}
+
+	SPDK_TRACEDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size);
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, NVME_TIMEOUT_INFINITE);
+
+	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
+	if (rc != 0) {
+		SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
+{
+	if (ctrlr->ns) {
+		uint32_t i, num_ns = ctrlr->num_ns;
+
+		for (i = 0; i < num_ns; i++) {
+			nvme_ns_destruct(&ctrlr->ns[i]);
+		}
+
+		spdk_free(ctrlr->ns);
+		ctrlr->ns = NULL;
+		ctrlr->num_ns = 0;
+	}
+
+	if (ctrlr->nsdata) {
+		spdk_free(ctrlr->nsdata);
+		ctrlr->nsdata = NULL;
+	}
+
+	spdk_dma_free(ctrlr->active_ns_list);
+	ctrlr->active_ns_list = NULL;
+}
+
+static void
+nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint32_t i, nn = ctrlr->cdata.nn;
+	struct spdk_nvme_ns_data *nsdata;
+
+	for (i = 0; i < nn; i++) {
+		struct spdk_nvme_ns	*ns = &ctrlr->ns[i];
+		uint32_t		nsid = i + 1;
+		nsdata			= &ctrlr->nsdata[nsid - 1];
+
+		if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+			if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
+				continue;
+			}
+		}
+
+		if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+			nvme_ns_destruct(ns);
+		}
+	}
+}
+
+static int
+nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc = 0;
+	uint32_t nn = ctrlr->cdata.nn;
+	uint64_t phys_addr = 0;
+
+	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
+	 * so check if we need to reallocate.
+	 */
+	if (nn != ctrlr->num_ns) {
+		nvme_ctrlr_destruct_namespaces(ctrlr);
+
+		if (nn == 0) {
+			SPDK_WARNLOG("controller has 0 namespaces\n");
+			return 0;
+		}
+
+		ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64,
+					 &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+		if (ctrlr->ns == NULL) {
+			rc = -ENOMEM;
+			goto fail;
+		}
+
+		ctrlr->nsdata = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
+					     &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
+		if (ctrlr->nsdata == NULL) {
+			rc = -ENOMEM;
+			goto fail;
+		}
+
+		ctrlr->num_ns = nn;
+	}
+
+	return 0;
+
+fail:
+	nvme_ctrlr_destruct_namespaces(ctrlr);
+	return rc;
+}
+
+static void
+nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	union spdk_nvme_async_event_completion	event;
+	int					rc;
+
+	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
+	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
+		/*
+		 *  This is simulated when controller is being shut down, to
+		 *  effectively abort outstanding asynchronous event requests
+		 *  and make sure all memory is freed.  Do not repost the
+		 *  request in this case.
+		 */
+		return;
+	}
+
+	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
+	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
+		/*
+		 *  SPDK will only send as many AERs as the device says it supports,
+		 *  so this status code indicates an out-of-spec device.  Do not repost
+		 *  the request in this case.
+		 */
+		SPDK_ERRLOG("Controller appears out-of-spec for asynchronous event request\n"
+			    "handling.  Do not repost this AER.\n");
+		return;
+	}
+
+	event.raw = cpl->cdw0;
+	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
+	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
+		rc = nvme_ctrlr_identify_active_ns(ctrlr);
+		if (rc) {
+			return;
+		}
+		nvme_ctrlr_update_namespaces(ctrlr);
+	}
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc && active_proc->aer_cb_fn) {
+		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
+	}
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
+		/*
+		 * We can't do anything to recover from a failure here,
+		 * so just print a warning message and leave the AER unsubmitted.
+		 */
+		SPDK_ERRLOG("resubmitting AER failed!\n");
+	}
+}
+
+static int
+nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
+				    struct nvme_async_event_request *aer)
+{
+	struct nvme_request *req;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
+	aer->req = req;
+	if (req == NULL) {
+		return -1;
+	}
+
+	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+static void
+nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_async_event_request		*aer;
+	int					rc;
+	uint32_t				i;
+	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n");
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, NVME_TIMEOUT_INFINITE);
+		return;
+	}
+
+	/* aerl is a zero-based value, so we need to add 1 here. */
+	ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
+
+	for (i = 0; i < ctrlr->num_aers; i++) {
+		aer = &ctrlr->aer[i];
+		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+		if (rc) {
+			SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+			return;
+		}
+	}
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, NVME_TIMEOUT_INFINITE);
+}
+
+static int
+nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
+{
+	union spdk_nvme_feat_async_event_configuration	config;
+	int						rc;
+
+	config.raw = 0;
+	config.bits.crit_warn.bits.available_spare = 1;
+	config.bits.crit_warn.bits.temperature = 1;
+	config.bits.crit_warn.bits.device_reliability = 1;
+	config.bits.crit_warn.bits.read_only = 1;
+	config.bits.crit_warn.bits.volatile_memory_backup = 1;
+
+	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
+		if (ctrlr->cdata.oaes.ns_attribute_notices) {
+			config.bits.ns_attr_notice = 1;
+		}
+		if (ctrlr->cdata.oaes.fw_activation_notices) {
+			config.bits.fw_activation_notice = 1;
+		}
+	}
+	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
+		config.bits.telemetry_log_notice = 1;
+	}
+
+	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+
+	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
+			nvme_ctrlr_configure_aer_done,
+			ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
+		return rc;
+	}
+
+	return 0;
+}
+
+struct spdk_nvme_ctrlr_process *
+spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+
+	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
+		if (active_proc->pid == pid) {
+			return active_proc;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_nvme_ctrlr_process *
+spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return spdk_nvme_ctrlr_get_process(ctrlr, getpid());
+}
+
+/**
+ * This function will be called when a process is using the controller.
+ *  1. For the primary process, it is called when constructing the controller.
+ *  2. For the secondary process, it is called at probing the controller.
+ * Note: will check whether the process is already added for the same process.
+ */
+int
+nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
+{
+	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
+	pid_t				pid = getpid();
+
+	/* Check whether the process is already added or not */
+	if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) {
+		return 0;
+	}
+
+	/* Initialize the per process properties for this ctrlr */
+	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
+				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (ctrlr_proc == NULL) {
+		SPDK_ERRLOG("failed to allocate memory to track the process props\n");
+
+		return -1;
+	}
+
+	ctrlr_proc->is_primary = spdk_process_is_primary();
+	ctrlr_proc->pid = pid;
+	STAILQ_INIT(&ctrlr_proc->active_reqs);
+	ctrlr_proc->devhandle = devhandle;
+	ctrlr_proc->ref = 0;
+	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
+
+	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
+
+	return 0;
+}
+
+/**
+ * This function will be called when the process detaches the controller.
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
+			  struct spdk_nvme_ctrlr_process *proc)
+{
+	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
+
+	assert(STAILQ_EMPTY(&proc->active_reqs));
+
+	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
+		spdk_nvme_ctrlr_free_io_qpair(qpair);
+	}
+
+	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
+
+	spdk_dma_free(proc);
+}
+
+/**
+ * This function will be called when the process exited unexpectedly
+ *  in order to free any incomplete nvme request, allocated IO qpairs
+ *  and allocated memory.
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
+{
+	struct nvme_request	*req, *tmp_req;
+	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
+
+	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
+		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
+
+		assert(req->pid == proc->pid);
+
+		nvme_free_request(req);
+	}
+
+	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
+		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
+
+		/*
+		 * The process may have been killed while some qpairs were in their
+		 *  completion context.  Clear that flag here to allow these IO
+		 *  qpairs to be deleted.
+		 */
+		qpair->in_completion_context = 0;
+
+		qpair->no_deletion_notification_needed = 1;
+
+		spdk_nvme_ctrlr_free_io_qpair(qpair);
+	}
+
+	spdk_dma_free(proc);
+}
+
+/**
+ * This function will be called when destructing the controller.
+ *  1. There is no more admin request on this controller.
+ *  2. Clean up any left resource allocation when its associated process is gone.
+ */
+void
+nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
+
+	/* Free all the processes' properties and make sure no pending admin IOs */
+	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
+		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
+
+		assert(STAILQ_EMPTY(&active_proc->active_reqs));
+
+		spdk_free(active_proc);
+	}
+}
+
+/**
+ * This function will be called when any other process attaches or
+ *  detaches the controller in order to cleanup those unexpectedly
+ *  terminated processes.
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static int
+nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
+	int				active_proc_count = 0;
+
+	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
+		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
+			SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
+
+			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
+
+			nvme_ctrlr_cleanup_process(active_proc);
+		} else {
+			active_proc_count++;
+		}
+	}
+
+	return active_proc_count;
+}
+
+void
+nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	nvme_ctrlr_remove_inactive_proc(ctrlr);
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		active_proc->ref++;
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+}
+
+void
+nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	int				proc_count;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		active_proc->ref--;
+		assert(active_proc->ref >= 0);
+
+		/*
+		 * The last active process will be removed at the end of
+		 * the destruction of the controller.
+		 */
+		if (active_proc->ref == 0 && proc_count != 1) {
+			nvme_ctrlr_remove_process(ctrlr, active_proc);
+		}
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+}
+
+int
+nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	int				ref = 0;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	nvme_ctrlr_remove_inactive_proc(ctrlr);
+
+	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
+		ref += active_proc->ref;
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return ref;
+}
+
+/**
+ *  Get the PCI device handle which is only visible to its associated process.
+ */
+struct spdk_pci_device *
+nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+	struct spdk_pci_device		*devhandle = NULL;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		devhandle = active_proc->devhandle;
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return devhandle;
+}
+
+static void
+nvme_ctrlr_enable_admin_queue(struct spdk_nvme_ctrlr *ctrlr)
+{
+	nvme_transport_qpair_reset(ctrlr->adminq);
+	nvme_qpair_enable(ctrlr->adminq);
+}
+
+/**
+ * This function will be called repeatedly during initialization until the controller is ready.
+ */
+int
+nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
+{
+	union spdk_nvme_cc_register cc;
+	union spdk_nvme_csts_register csts;
+	uint32_t ready_timeout_in_ms;
+	int rc = 0;
+
+	/*
+	 * May need to avoid accessing any register on the target controller
+	 * for a while. Return early without touching the FSM.
+	 * Check sleep_timeout_tsc > 0 for unit test.
+	 */
+	if ((ctrlr->sleep_timeout_tsc > 0) &&
+	    (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
+		return 0;
+	}
+	ctrlr->sleep_timeout_tsc = 0;
+
+	if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
+	    nvme_ctrlr_get_csts(ctrlr, &csts)) {
+		if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
+			/* While a device is resetting, it may be unable to service MMIO reads
+			 * temporarily. Allow for this case.
+			 */
+			SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n");
+			goto init_timeout;
+		}
+		SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
+		nvme_ctrlr_fail(ctrlr, false);
+		return -EIO;
+	}
+
+	ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
+
+	/*
+	 * Check if the current initialization step is done or has timed out.
+	 */
+	switch (ctrlr->state) {
+	case NVME_CTRLR_STATE_INIT_DELAY:
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
+		/*
+		 * Controller may need some delay before it's enabled.
+		 *
+		 * This is a workaround for an issue where the PCIe-attached NVMe controller
+		 * is not ready after VFIO reset. We delay the initialization rather than the
+		 * enabling itself, because this is required only for the very first enabling
+		 * - directly after a VFIO reset.
+		 *
+		 * TODO: Figure out what is actually going wrong.
+		 */
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Adding 2 second delay before initializing the controller\n");
+		ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000);
+		break;
+
+	case NVME_CTRLR_STATE_INIT:
+		/* Begin the hardware initialization by making sure the controller is disabled. */
+		if (cc.bits.en) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n");
+			/*
+			 * Controller is currently enabled. We need to disable it to cause a reset.
+			 *
+			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
+			 *  Wait for the ready bit to be 1 before disabling the controller.
+			 */
+			if (csts.bits.rdy == 0) {
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
+				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
+				return 0;
+			}
+
+			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
+			cc.bits.en = 0;
+			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
+				SPDK_ERRLOG("set_cc() failed\n");
+				nvme_ctrlr_fail(ctrlr, false);
+				return -EIO;
+			}
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
+
+			/*
+			 * Wait 2.5 seconds before accessing PCI registers.
+			 * Not using sleep() to avoid blocking other controller's initialization.
+			 */
+			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2.5 seconds before reading registers\n");
+				ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
+			}
+			return 0;
+		} else {
+			if (csts.bits.rdy == 1) {
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
+			}
+
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
+			return 0;
+		}
+		break;
+
+	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
+		if (csts.bits.rdy == 1) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
+			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
+			cc.bits.en = 0;
+			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
+				SPDK_ERRLOG("set_cc() failed\n");
+				nvme_ctrlr_fail(ctrlr, false);
+				return -EIO;
+			}
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
+			return 0;
+		}
+		break;
+
+	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
+		if (csts.bits.rdy == 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
+			/*
+			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
+			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
+			 */
+			spdk_delay_us(100);
+			return 0;
+		}
+		break;
+
+	case NVME_CTRLR_STATE_ENABLE:
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n");
+		rc = nvme_ctrlr_enable(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
+		return rc;
+
+	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
+		if (csts.bits.rdy == 1) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
+			/*
+			 * The controller has been enabled.
+			 *  Perform the rest of initialization serially.
+			 */
+			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE, NVME_TIMEOUT_INFINITE);
+			return 0;
+		}
+		break;
+
+	case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
+		nvme_ctrlr_enable_admin_queue(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_IDENTIFY:
+		rc = nvme_ctrlr_identify(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
+		rc = nvme_ctrlr_set_num_queues(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_GET_NUM_QUEUES:
+		rc = nvme_ctrlr_get_num_queues(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_CONSTRUCT_NS:
+		rc = nvme_ctrlr_construct_namespaces(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
+		rc = nvme_ctrlr_identify_active_ns(ctrlr);
+		if (rc < 0) {
+			nvme_ctrlr_destruct_namespaces(ctrlr);
+		}
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_IDENTIFY_NS:
+		rc = nvme_ctrlr_identify_namespaces(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
+		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_CONFIGURE_AER:
+		rc = nvme_ctrlr_configure_aer(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
+		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
+		nvme_ctrlr_set_supported_features(ctrlr);
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, NVME_TIMEOUT_INFINITE);
+		break;
+
+	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
+		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
+		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_SET_HOST_ID:
+		rc = nvme_ctrlr_set_host_id(ctrlr);
+		break;
+
+	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
+		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+		break;
+
+	case NVME_CTRLR_STATE_READY:
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n");
+		return 0;
+
+	case NVME_CTRLR_STATE_ERROR:
+		SPDK_ERRLOG("Ctrlr %s is in error state\n", ctrlr->trid.traddr);
+		return -1;
+
+	default:
+		assert(0);
+		nvme_ctrlr_fail(ctrlr, false);
+		return -1;
+	}
+
+init_timeout:
+	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
+	    spdk_get_ticks() > ctrlr->state_timeout_tsc) {
+		SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
+		nvme_ctrlr_fail(ctrlr, false);
+		return -1;
+	}
+
+	return rc;
+}
+
+int
+nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
+{
+	pthread_mutexattr_t attr;
+	int rc = 0;
+
+	if (pthread_mutexattr_init(&attr)) {
+		return -1;
+	}
+	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
+#ifndef __FreeBSD__
+	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
+	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
+#endif
+	    pthread_mutex_init(mtx, &attr)) {
+		rc = -1;
+	}
+	pthread_mutexattr_destroy(&attr);
+	return rc;
+}
+
+int
+nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int rc;
+
+	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
+	} else {
+		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
+	}
+
+	ctrlr->flags = 0;
+	ctrlr->free_io_qids = NULL;
+	ctrlr->is_resetting = false;
+	ctrlr->is_failed = false;
+
+	TAILQ_INIT(&ctrlr->active_io_qpairs);
+	STAILQ_INIT(&ctrlr->queued_aborts);
+	ctrlr->outstanding_aborts = 0;
+
+	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
+	if (rc != 0) {
+		return rc;
+	}
+
+	TAILQ_INIT(&ctrlr->active_procs);
+
+	return rc;
+}
+
+/* This function should be called once at ctrlr initialization to set up constant properties. */
+void
+nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
+		    const union spdk_nvme_vs_register *vs)
+{
+	ctrlr->cap = *cap;
+	ctrlr->vs = *vs;
+
+	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
+
+	/* For now, always select page_size == min_page_size. */
+	ctrlr->page_size = ctrlr->min_page_size;
+
+	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
+	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
+	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
+
+	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
+}
+
+void
+nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
+{
+	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
+}
+
+void
+nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct spdk_nvme_qpair *qpair, *tmp;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
+	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
+		spdk_nvme_ctrlr_free_io_qpair(qpair);
+	}
+
+	nvme_ctrlr_free_doorbell_buffer(ctrlr);
+
+	nvme_ctrlr_shutdown(ctrlr);
+
+	nvme_ctrlr_destruct_namespaces(ctrlr);
+
+	spdk_bit_array_free(&ctrlr->free_io_qids);
+
+	nvme_transport_ctrlr_destruct(ctrlr);
+}
+
+int
+nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
+				struct nvme_request *req)
+{
+	return nvme_qpair_submit_request(ctrlr->adminq, req);
+}
+
+static void
+nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
+{
+	/* Do nothing */
+}
+
+/*
+ * Check if we need to send a Keep Alive command.
+ * Caller must hold ctrlr->ctrlr_lock.
+ */
+static void
+nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
+{
+	uint64_t now;
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	now = spdk_get_ticks();
+	if (now < ctrlr->next_keep_alive_tick) {
+		return;
+	}
+
+	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
+	if (req == NULL) {
+		return;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	if (rc != 0) {
+		SPDK_ERRLOG("Submitting Keep Alive failed\n");
+	}
+
+	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
+}
+
+int32_t
+spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
+{
+	int32_t num_completions;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	if (ctrlr->keep_alive_interval_ticks) {
+		nvme_ctrlr_keep_alive(ctrlr);
+	}
+	num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return num_completions;
+}
+
+const struct spdk_nvme_ctrlr_data *
+spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return &ctrlr->cdata;
+}
+
+union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
+{
+	union spdk_nvme_csts_register csts;
+
+	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
+		csts.raw = 0xFFFFFFFFu;
+	}
+	return csts;
+}
+
+union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->cap;
+}
+
+union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->vs;
+}
+
+uint32_t
+spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->num_ns;
+}
+
+static int32_t
+spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
+{
+	int32_t result = -1;
+
+	if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) {
+		return result;
+	}
+
+	int32_t lower = 0;
+	int32_t upper = ctrlr->num_ns - 1;
+	int32_t mid;
+
+	while (lower <= upper) {
+		mid = lower + (upper - lower) / 2;
+		if (ctrlr->active_ns_list[mid] == nsid) {
+			result = mid;
+			break;
+		} else {
+			if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
+				lower = mid + 1;
+			} else {
+				upper = mid - 1;
+			}
+
+		}
+	}
+
+	return result;
+}
+
+bool
+spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
+{
+	return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
+}
+
+uint32_t
+spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
+}
+
+uint32_t
+spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
+{
+	int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
+	if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) {
+		return ctrlr->active_ns_list[nsid_idx + 1];
+	}
+	return 0;
+}
+
+struct spdk_nvme_ns *
+spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
+{
+	if (nsid < 1 || nsid > ctrlr->num_ns) {
+		return NULL;
+	}
+
+	return &ctrlr->ns[nsid - 1];
+}
+
+struct spdk_pci_device *
+spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
+{
+	if (ctrlr == NULL) {
+		return NULL;
+	}
+
+	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
+		return NULL;
+	}
+
+	return nvme_ctrlr_proc_get_devhandle(ctrlr);
+}
+
+uint32_t
+spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
+{
+	return ctrlr->max_xfer_size;
+}
+
+void
+spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
+				      spdk_nvme_aer_cb aer_cb_fn,
+				      void *aer_cb_arg)
+{
+	struct spdk_nvme_ctrlr_process *active_proc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		active_proc->aer_cb_fn = aer_cb_fn;
+		active_proc->aer_cb_arg = aer_cb_arg;
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+}
+
+void
+spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
+		uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
+{
+	struct spdk_nvme_ctrlr_process	*active_proc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (active_proc) {
+		active_proc->timeout_ticks = timeout_us * spdk_get_ticks_hz() / 1000000ULL;
+		active_proc->timeout_cb_fn = cb_fn;
+		active_proc->timeout_cb_arg = cb_arg;
+	}
+
+	ctrlr->timeout_enabled = true;
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+}
+
+bool
+spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
+{
+	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
+	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
+	return ctrlr->log_page_supported[log_page];
+}
+
+bool
+spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
+{
+	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
+	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
+	return ctrlr->feature_supported[feature_code];
+}
+
+int
+spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+			  struct spdk_nvme_ctrlr_list *payload)
+{
+	struct nvme_completion_poll_status	status;
+	int					res;
+	struct spdk_nvme_ns			*ns;
+
+	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
+				       nvme_completion_poll_cb, &status);
+	if (res) {
+		return res;
+	}
+	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
+		return -ENXIO;
+	}
+
+	res = nvme_ctrlr_identify_active_ns(ctrlr);
+	if (res) {
+		return res;
+	}
+
+	ns = &ctrlr->ns[nsid - 1];
+	return nvme_ns_construct(ns, nsid, ctrlr);
+}
+
+int
+spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+			  struct spdk_nvme_ctrlr_list *payload)
+{
+	struct nvme_completion_poll_status	status;
+	int					res;
+	struct spdk_nvme_ns			*ns;
+
+	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
+				       nvme_completion_poll_cb, &status);
+	if (res) {
+		return res;
+	}
+	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
+		return -ENXIO;
+	}
+
+	res = nvme_ctrlr_identify_active_ns(ctrlr);
+	if (res) {
+		return res;
+	}
+
+	ns = &ctrlr->ns[nsid - 1];
+	/* Inactive NS */
+	nvme_ns_destruct(ns);
+
+	return 0;
+}
+
+uint32_t
+spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
+{
+	struct nvme_completion_poll_status	status;
+	int					res;
+	uint32_t				nsid;
+	struct spdk_nvme_ns			*ns;
+
+	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
+	if (res) {
+		return 0;
+	}
+	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
+		return 0;
+	}
+
+	nsid = status.cpl.cdw0;
+	ns = &ctrlr->ns[nsid - 1];
+	/* Inactive NS */
+	res = nvme_ns_construct(ns, nsid, ctrlr);
+	if (res) {
+		return 0;
+	}
+
+	/* Return the namespace ID that was created */
+	return nsid;
+}
+
+int
+spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
+{
+	struct nvme_completion_poll_status	status;
+	int					res;
+	struct spdk_nvme_ns			*ns;
+
+	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
+	if (res) {
+		return res;
+	}
+	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
+		return -ENXIO;
+	}
+
+	res = nvme_ctrlr_identify_active_ns(ctrlr);
+	if (res) {
+		return res;
+	}
+
+	ns = &ctrlr->ns[nsid - 1];
+	nvme_ns_destruct(ns);
+
+	return 0;
+}
+
+int
+spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+		       struct spdk_nvme_format *format)
+{
+	struct nvme_completion_poll_status	status;
+	int					res;
+
+	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
+				    &status);
+	if (res) {
+		return res;
+	}
+	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
+		return -ENXIO;
+	}
+
+	return spdk_nvme_ctrlr_reset(ctrlr);
+}
+
+int
+spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
+				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
+{
+	struct spdk_nvme_fw_commit		fw_commit;
+	struct nvme_completion_poll_status	status;
+	int					res;
+	unsigned int				size_remaining;
+	unsigned int				offset;
+	unsigned int				transfer;
+	void					*p;
+
+	if (!completion_status) {
+		return -EINVAL;
+	}
+	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
+	if (size % 4) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
+		return -1;
+	}
+
+	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
+	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
+	 */
+	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
+	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
+		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n");
+		return -1;
+	}
+
+	/* Firmware download */
+	size_remaining = size;
+	offset = 0;
+	p = payload;
+
+	while (size_remaining > 0) {
+		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
+
+		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
+						       nvme_completion_poll_cb,
+						       &status);
+		if (res) {
+			return res;
+		}
+
+		if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
+			SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
+			return -ENXIO;
+		}
+		p += transfer;
+		offset += transfer;
+		size_remaining -= transfer;
+	}
+
+	/* Firmware commit */
+	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
+	fw_commit.fs = slot;
+	fw_commit.ca = commit_action;
+
+	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
+				       &status);
+	if (res) {
+		return res;
+	}
+
+	res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock);
+
+	memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status));
+
+	if (res) {
+		if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
+		    status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
+			if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
+			    status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
+				SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n");
+			} else {
+				SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
+			}
+			return -ENXIO;
+		}
+	}
+
+	return spdk_nvme_ctrlr_reset(ctrlr);
+}
+
+void *
+spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
+{
+	void *buf;
+
+	if (size == 0) {
+		return NULL;
+	}
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return buf;
+}
+
+void
+spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
+{
+	if (buf && size) {
+		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+		nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size);
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	}
+}
diff --git a/src/spdk/lib/nvme/nvme_ctrlr_cmd.c b/src/spdk/lib/nvme/nvme_ctrlr_cmd.c
new file mode 100644
index 00000000..750a2d78
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ctrlr_cmd.c
@@ -0,0 +1,694 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+int
+spdk_nvme_ctrlr_cmd_io_raw(struct spdk_nvme_ctrlr *ctrlr,
+			   struct spdk_nvme_qpair *qpair,
+			   struct spdk_nvme_cmd *cmd,
+			   void *buf, uint32_t len,
+			   spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+
+	req = nvme_allocate_request_contig(qpair, buf, len, cb_fn, cb_arg);
+
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	memcpy(&req->cmd, cmd, sizeof(req->cmd));
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
+				   struct spdk_nvme_qpair *qpair,
+				   struct spdk_nvme_cmd *cmd,
+				   void *buf, uint32_t len, void *md_buf,
+				   spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buf, md_buf);
+
+	req = nvme_allocate_request(qpair, &payload, len, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	memcpy(&req->cmd, cmd, sizeof(req->cmd));
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ctrlr_cmd_admin_raw(struct spdk_nvme_ctrlr *ctrlr,
+			      struct spdk_nvme_cmd *cmd,
+			      void *buf, uint32_t len,
+			      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	int			rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_contig(ctrlr->adminq, buf, len, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	memcpy(&req->cmd, cmd, sizeof(req->cmd));
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_identify(struct spdk_nvme_ctrlr *ctrlr, uint8_t cns, uint16_t cntid, uint32_t nsid,
+			void *payload, size_t payload_size,
+			spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, payload_size,
+					      cb_fn, cb_arg, false);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_IDENTIFY;
+	cmd->cdw10 = cns | ((uint32_t)cntid << 16);
+	cmd->nsid = nsid;
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+int
+nvme_ctrlr_cmd_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+			 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request			*req;
+	struct spdk_nvme_cmd			*cmd;
+	int					rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, sizeof(struct spdk_nvme_ctrlr_list),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_NS_ATTACHMENT;
+	cmd->nsid = nsid;
+	cmd->cdw10 = SPDK_NVME_NS_CTRLR_ATTACH;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+			 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request			*req;
+	struct spdk_nvme_cmd			*cmd;
+	int					rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, sizeof(struct spdk_nvme_ctrlr_list),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_NS_ATTACHMENT;
+	cmd->nsid = nsid;
+	cmd->cdw10 = SPDK_NVME_NS_CTRLR_DETACH;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload,
+			 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request			*req;
+	struct spdk_nvme_cmd			*cmd;
+	int					rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, sizeof(struct spdk_nvme_ns_data),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_NS_MANAGEMENT;
+	cmd->cdw10 = SPDK_NVME_NS_MANAGEMENT_CREATE;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, spdk_nvme_cmd_cb cb_fn,
+			 void *cb_arg)
+{
+	struct nvme_request			*req;
+	struct spdk_nvme_cmd			*cmd;
+	int					rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_NS_MANAGEMENT;
+	cmd->cdw10 = SPDK_NVME_NS_MANAGEMENT_DELETE;
+	cmd->nsid = nsid;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr, uint64_t prp1, uint64_t prp2,
+				      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request			*req;
+	struct spdk_nvme_cmd			*cmd;
+	int					rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG;
+	cmd->dptr.prp.prp1 = prp1;
+	cmd->dptr.prp.prp2 = prp2;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_format *format,
+		      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_FORMAT_NVM;
+	cmd->nsid = nsid;
+	memcpy(&cmd->cdw10, format, sizeof(uint32_t));
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_cmd_set_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+				uint32_t cdw11, uint32_t cdw12, void *payload, uint32_t payload_size,
+				spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size, cb_fn, cb_arg,
+					      true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_SET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+	cmd->cdw12 = cdw12;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_cmd_get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+				uint32_t cdw11, void *payload, uint32_t payload_size,
+				spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size, cb_fn, cb_arg,
+					      false);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_GET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_cmd_get_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+				   uint32_t cdw11, void *payload,
+				   uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
+				   void *cb_arg, uint32_t ns_id)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size, cb_fn, cb_arg,
+					      false);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_GET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+	cmd->nsid = ns_id;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int spdk_nvme_ctrlr_cmd_set_feature_ns(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature,
+				       uint32_t cdw11, uint32_t cdw12, void *payload,
+				       uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
+				       void *cb_arg, uint32_t ns_id)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size, cb_fn, cb_arg,
+					      true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_SET_FEATURES;
+	cmd->cdw10 = feature;
+	cmd->cdw11 = cdw11;
+	cmd->cdw12 = cdw12;
+	cmd->nsid = ns_id;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_set_num_queues(struct spdk_nvme_ctrlr *ctrlr,
+			      uint32_t num_queues, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	uint32_t cdw11;
+
+	cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
+	return spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_NUMBER_OF_QUEUES, cdw11, 0,
+					       NULL, 0, cb_fn, cb_arg);
+}
+
+int
+nvme_ctrlr_cmd_get_num_queues(struct spdk_nvme_ctrlr *ctrlr,
+			      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	return spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_NUMBER_OF_QUEUES, 0, NULL, 0,
+					       cb_fn, cb_arg);
+}
+
+int
+nvme_ctrlr_cmd_set_async_event_config(struct spdk_nvme_ctrlr *ctrlr,
+				      union spdk_nvme_feat_async_event_configuration config, spdk_nvme_cmd_cb cb_fn,
+				      void *cb_arg)
+{
+	uint32_t cdw11;
+
+	cdw11 = config.raw;
+	return spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, 0,
+					       NULL, 0,
+					       cb_fn, cb_arg);
+}
+
+int
+nvme_ctrlr_cmd_set_host_id(struct spdk_nvme_ctrlr *ctrlr, void *host_id, uint32_t host_id_size,
+			   spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	uint32_t cdw11;
+
+	if (host_id_size == 16) {
+		/* 128-bit extended host identifier */
+		cdw11 = 1;
+	} else if (host_id_size == 8) {
+		/* 64-bit host identifier */
+		cdw11 = 0;
+	} else {
+		SPDK_ERRLOG("Invalid host ID size %u\n", host_id_size);
+		return -EINVAL;
+	}
+
+	return spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_HOST_IDENTIFIER, cdw11, 0,
+					       host_id, host_id_size, cb_fn, cb_arg);
+}
+
+int
+spdk_nvme_ctrlr_cmd_get_log_page(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page,
+				 uint32_t nsid, void *payload, uint32_t payload_size,
+				 uint64_t offset, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	uint32_t numd, numdl, numdu;
+	uint32_t lpol, lpou;
+	int rc;
+
+	if (payload_size == 0) {
+		return -EINVAL;
+	}
+
+	if (offset & 3) {
+		return -EINVAL;
+	}
+
+	numd = payload_size / sizeof(uint32_t) - 1u;
+	numdl = numd & 0xFFFFu;
+	numdu = (numd >> 16) & 0xFFFFu;
+
+	lpol = (uint32_t)offset;
+	lpou = (uint32_t)(offset >> 32);
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+
+	if (offset && !ctrlr->cdata.lpa.edlp) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -EINVAL;
+	}
+
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, payload_size, cb_fn, cb_arg, false);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_GET_LOG_PAGE;
+	cmd->nsid = nsid;
+	cmd->cdw10 = numdl << 16;
+	cmd->cdw10 |= log_page;
+	cmd->cdw11 = numdu;
+	cmd->cdw12 = lpol;
+	cmd->cdw13 = lpou;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+static void
+spdk_nvme_ctrlr_cmd_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_request	*req, *next, *tmp;
+	struct spdk_nvme_ctrlr	*ctrlr;
+	int			rc;
+
+	req = ctx;
+	ctrlr = (struct spdk_nvme_ctrlr *)req->user_buffer;
+
+	ctrlr->outstanding_aborts--;
+	STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) {
+		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
+		ctrlr->outstanding_aborts++;
+		rc = nvme_ctrlr_submit_admin_request(ctrlr, next);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to submit queued abort.\n");
+			memset(&next->cpl, 0, sizeof(next->cpl));
+			next->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+			next->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+			next->cpl.status.dnr = 1;
+			nvme_complete_request(next, &req->cpl);
+			nvme_free_request(next);
+		} else {
+			/* If the first abort succeeds, stop iterating. */
+			break;
+		}
+	}
+
+	req->user_cb_fn(req->user_cb_arg, cpl);
+}
+
+int
+spdk_nvme_ctrlr_cmd_abort(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
+			  uint16_t cid, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	int rc;
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	uint16_t sqid;
+
+	if (qpair) {
+		sqid = qpair->id;
+	} else {
+		sqid = ctrlr->adminq->id; /* 0 */
+	}
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(ctrlr->adminq, spdk_nvme_ctrlr_cmd_abort_cpl, NULL);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+	req->cb_arg = req;
+	req->user_cb_fn = cb_fn;
+	req->user_cb_arg = cb_arg;
+	req->user_buffer = ctrlr; /* This is a hack to get to the ctrlr in the
+				   * completion handler. */
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_ABORT;
+	cmd->cdw10 = (cid << 16) | sqid;
+
+	if (ctrlr->outstanding_aborts >= ctrlr->cdata.acl) {
+		STAILQ_INSERT_TAIL(&ctrlr->queued_aborts, req, stailq);
+		rc = 0;
+	} else {
+		ctrlr->outstanding_aborts++;
+		rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	}
+
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	return rc;
+}
+
+int
+nvme_ctrlr_cmd_fw_commit(struct spdk_nvme_ctrlr *ctrlr,
+			 const struct spdk_nvme_fw_commit *fw_commit,
+			 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_FIRMWARE_COMMIT;
+	memcpy(&cmd->cdw10, fw_commit, sizeof(uint32_t));
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+
+}
+
+int
+nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr,
+				 uint32_t size, uint32_t offset, void *payload,
+				 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, size, cb_fn, cb_arg, true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+	cmd->cdw10 = (size >> 2) - 1;
+	cmd->cdw11 = offset >> 2;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_cmd_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+				     uint16_t spsp, uint8_t nssf, void *payload,
+				     uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size,
+					      cb_fn, cb_arg, false);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_SECURITY_RECEIVE;
+	cmd->cdw10 = ((uint32_t)secp << 24) | ((uint32_t)spsp << 8) | ((uint32_t)nssf);
+	cmd->cdw11 = payload_size;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
+
+int
+spdk_nvme_ctrlr_cmd_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
+				  uint16_t spsp, uint8_t nssf, void *payload,
+				  uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+	int rc;
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq, payload, payload_size,
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_SECURITY_SEND;
+	cmd->cdw10 = ((uint32_t)secp << 24) | ((uint32_t)spsp << 8) | ((uint32_t)nssf);
+	cmd->cdw11 = payload_size;
+
+	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return rc;
+}
diff --git a/src/spdk/lib/nvme/nvme_ctrlr_ocssd_cmd.c b/src/spdk/lib/nvme/nvme_ctrlr_ocssd_cmd.c
new file mode 100644
index 00000000..80de5328
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ctrlr_ocssd_cmd.c
@@ -0,0 +1,83 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/nvme_ocssd.h"
+#include "nvme_internal.h"
+
+bool
+spdk_nvme_ctrlr_is_ocssd_supported(struct spdk_nvme_ctrlr *ctrlr)
+{
+	if (ctrlr->quirks & NVME_QUIRK_OCSSD) {
+		// TODO: There isn't a standardized way to identify Open-Channel SSD
+		// different verdors may have different conditions.
+
+		/*
+		 * Current QEMU OpenChannel Device needs to check nsdata->vs[0].
+		 * Here check nsdata->vs[0] of the first namespace.
+		 */
+		if (ctrlr->cdata.vid == SPDK_PCI_VID_CNEXLABS) {
+			if (ctrlr->num_ns && ctrlr->nsdata[0].vendor_specific[0] == 0x1) {
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+
+int
+spdk_nvme_ocssd_ctrlr_cmd_geometry(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+				   void *payload, uint32_t payload_size,
+				   spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	if (!payload || (payload_size != sizeof(struct spdk_ocssd_geometry_data))) {
+		return -EINVAL;
+	}
+
+	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	req = nvme_allocate_request_user_copy(ctrlr->adminq,
+					      payload, payload_size, cb_fn, cb_arg, false);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_OCSSD_OPC_GEOMETRY;
+	cmd->nsid = nsid;
+	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
diff --git a/src/spdk/lib/nvme/nvme_fabric.c b/src/spdk/lib/nvme/nvme_fabric.c
new file mode 100644
index 00000000..4589596a
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_fabric.c
@@ -0,0 +1,340 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over Fabrics transport-independent functions
+ */
+
+#include "nvme_internal.h"
+
+#include "spdk/endian.h"
+#include "spdk/string.h"
+
+static int
+nvme_fabric_prop_set_cmd(struct spdk_nvme_ctrlr *ctrlr,
+			 uint32_t offset, uint8_t size, uint64_t value)
+{
+	struct spdk_nvmf_fabric_prop_set_cmd cmd = {};
+	struct nvme_completion_poll_status status;
+	int rc;
+
+	assert(size == SPDK_NVMF_PROP_SIZE_4 || size == SPDK_NVMF_PROP_SIZE_8);
+
+	cmd.opcode = SPDK_NVME_OPC_FABRIC;
+	cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET;
+	cmd.ofst = offset;
+	cmd.attrib.size = size;
+	cmd.value.u64 = value;
+
+	rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, (struct spdk_nvme_cmd *)&cmd,
+					   NULL, 0,
+					   nvme_completion_poll_cb, &status);
+	if (rc < 0) {
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		SPDK_ERRLOG("Property Set failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+nvme_fabric_prop_get_cmd(struct spdk_nvme_ctrlr *ctrlr,
+			 uint32_t offset, uint8_t size, uint64_t *value)
+{
+	struct spdk_nvmf_fabric_prop_set_cmd cmd = {};
+	struct nvme_completion_poll_status status;
+	struct spdk_nvmf_fabric_prop_get_rsp *response;
+	int rc;
+
+	assert(size == SPDK_NVMF_PROP_SIZE_4 || size == SPDK_NVMF_PROP_SIZE_8);
+
+	cmd.opcode = SPDK_NVME_OPC_FABRIC;
+	cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET;
+	cmd.ofst = offset;
+	cmd.attrib.size = size;
+
+	rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, (struct spdk_nvme_cmd *)&cmd,
+					   NULL, 0, nvme_completion_poll_cb,
+					   &status);
+	if (rc < 0) {
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		SPDK_ERRLOG("Property Get failed\n");
+		return -1;
+	}
+
+	response = (struct spdk_nvmf_fabric_prop_get_rsp *)&status.cpl;
+
+	if (size == SPDK_NVMF_PROP_SIZE_4) {
+		*value = response->value.u32.low;
+	} else {
+		*value = response->value.u64;
+	}
+
+	return 0;
+}
+
+int
+nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
+{
+	return nvme_fabric_prop_set_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_4, value);
+}
+
+int
+nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
+{
+	return nvme_fabric_prop_set_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_8, value);
+}
+
+int
+nvme_fabric_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
+{
+	uint64_t tmp_value;
+	int rc;
+	rc = nvme_fabric_prop_get_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_4, &tmp_value);
+
+	if (!rc) {
+		*value = (uint32_t)tmp_value;
+	}
+	return rc;
+}
+
+int
+nvme_fabric_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
+{
+	return nvme_fabric_prop_get_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_8, value);
+}
+
+static void
+nvme_fabric_discover_probe(struct spdk_nvmf_discovery_log_page_entry *entry,
+			   void *cb_ctx, spdk_nvme_probe_cb probe_cb)
+{
+	struct spdk_nvme_transport_id trid;
+	uint8_t *end;
+	size_t len;
+
+	memset(&trid, 0, sizeof(trid));
+
+	if (entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+		SPDK_WARNLOG("Skipping unsupported discovery service referral\n");
+		return;
+	} else if (entry->subtype != SPDK_NVMF_SUBTYPE_NVME) {
+		SPDK_WARNLOG("Skipping unknown subtype %u\n", entry->subtype);
+		return;
+	}
+
+	trid.trtype = entry->trtype;
+	if (!spdk_nvme_transport_available(trid.trtype)) {
+		SPDK_WARNLOG("NVMe transport type %u not available; skipping probe\n",
+			     trid.trtype);
+		return;
+	}
+
+	trid.adrfam = entry->adrfam;
+
+	/* Ensure that subnqn is null terminated. */
+	end = memchr(entry->subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1);
+	if (!end) {
+		SPDK_ERRLOG("Discovery entry SUBNQN is not null terminated\n");
+		return;
+	}
+	len = end - entry->subnqn;
+	memcpy(trid.subnqn, entry->subnqn, len);
+	trid.subnqn[len] = '\0';
+
+	/* Convert traddr to a null terminated string. */
+	len = spdk_strlen_pad(entry->traddr, sizeof(entry->traddr), ' ');
+	memcpy(trid.traddr, entry->traddr, len);
+	if (spdk_str_chomp(trid.traddr) != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Trailing newlines removed from discovery TRADDR\n");
+	}
+
+	/* Convert trsvcid to a null terminated string. */
+	len = spdk_strlen_pad(entry->trsvcid, sizeof(entry->trsvcid), ' ');
+	memcpy(trid.trsvcid, entry->trsvcid, len);
+	if (spdk_str_chomp(trid.trsvcid) != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Trailing newlines removed from discovery TRSVCID\n");
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "subnqn=%s, trtype=%u, traddr=%s, trsvcid=%s\n",
+		      trid.subnqn, trid.trtype,
+		      trid.traddr, trid.trsvcid);
+
+	nvme_ctrlr_probe(&trid, NULL, probe_cb, cb_ctx);
+}
+
+static int
+nvme_fabric_get_discovery_log_page(struct spdk_nvme_ctrlr *ctrlr,
+				   void *log_page, uint32_t size, uint64_t offset)
+{
+	struct nvme_completion_poll_status status;
+	int rc;
+
+	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, 0, log_page, size, offset,
+					      nvme_completion_poll_cb, &status);
+	if (rc < 0) {
+		return -1;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+nvme_fabric_ctrlr_discover(struct spdk_nvme_ctrlr *ctrlr,
+			   void *cb_ctx, spdk_nvme_probe_cb probe_cb)
+{
+	struct spdk_nvmf_discovery_log_page *log_page;
+	struct spdk_nvmf_discovery_log_page_entry *log_page_entry;
+	char buffer[4096];
+	int rc;
+	uint64_t i, numrec, buffer_max_entries_first, buffer_max_entries, log_page_offset = 0;
+	uint64_t remaining_num_rec = 0;
+	uint16_t recfmt;
+
+	memset(buffer, 0x0, 4096);
+	buffer_max_entries_first = (sizeof(buffer) - offsetof(struct spdk_nvmf_discovery_log_page,
+				    entries[0])) /
+				   sizeof(struct spdk_nvmf_discovery_log_page_entry);
+	buffer_max_entries = sizeof(buffer) / sizeof(struct spdk_nvmf_discovery_log_page_entry);
+	do {
+		rc = nvme_fabric_get_discovery_log_page(ctrlr, buffer, sizeof(buffer), log_page_offset);
+		if (rc < 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Get Log Page - Discovery error\n");
+			return rc;
+		}
+
+		if (!remaining_num_rec) {
+			log_page = (struct spdk_nvmf_discovery_log_page *)buffer;
+			recfmt = from_le16(&log_page->recfmt);
+			if (recfmt != 0) {
+				SPDK_ERRLOG("Unrecognized discovery log record format %" PRIu16 "\n", recfmt);
+				return -EPROTO;
+			}
+			remaining_num_rec = log_page->numrec;
+			log_page_offset = offsetof(struct spdk_nvmf_discovery_log_page, entries[0]);
+			log_page_entry = &log_page->entries[0];
+			numrec = spdk_min(remaining_num_rec, buffer_max_entries_first);
+		} else {
+			numrec = spdk_min(remaining_num_rec, buffer_max_entries);
+			log_page_entry = (struct spdk_nvmf_discovery_log_page_entry *)buffer;
+		}
+
+		for (i = 0; i < numrec; i++) {
+			nvme_fabric_discover_probe(log_page_entry++, cb_ctx, probe_cb);
+		}
+		remaining_num_rec -= numrec;
+		log_page_offset += numrec * sizeof(struct spdk_nvmf_discovery_log_page_entry);
+	} while (remaining_num_rec != 0);
+
+	return 0;
+}
+
+int
+nvme_fabric_qpair_connect(struct spdk_nvme_qpair *qpair, uint32_t num_entries)
+{
+	struct nvme_completion_poll_status status;
+	struct spdk_nvmf_fabric_connect_rsp *rsp;
+	struct spdk_nvmf_fabric_connect_cmd cmd;
+	struct spdk_nvmf_fabric_connect_data *nvmf_data;
+	struct spdk_nvme_ctrlr *ctrlr;
+	int rc;
+
+	if (num_entries == 0 || num_entries > SPDK_NVME_IO_QUEUE_MAX_ENTRIES) {
+		return -EINVAL;
+	}
+
+	ctrlr = qpair->ctrlr;
+	if (!ctrlr) {
+		return -EINVAL;
+	}
+
+	nvmf_data = spdk_dma_zmalloc(sizeof(*nvmf_data), 0, NULL);
+	if (!nvmf_data) {
+		SPDK_ERRLOG("nvmf_data allocation error\n");
+		return -ENOMEM;
+	}
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = SPDK_NVME_OPC_FABRIC;
+	cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT;
+	cmd.qid = qpair->id;
+	cmd.sqsize = num_entries - 1;
+	cmd.kato = ctrlr->opts.keep_alive_timeout_ms;
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvmf_data->cntlid = 0xFFFF;
+	} else {
+		nvmf_data->cntlid = ctrlr->cntlid;
+	}
+
+	SPDK_STATIC_ASSERT(sizeof(nvmf_data->hostid) == sizeof(ctrlr->opts.extended_host_id),
+			   "host ID size mismatch");
+	memcpy(nvmf_data->hostid, ctrlr->opts.extended_host_id, sizeof(nvmf_data->hostid));
+	snprintf(nvmf_data->hostnqn, sizeof(nvmf_data->hostnqn), "%s", ctrlr->opts.hostnqn);
+	snprintf(nvmf_data->subnqn, sizeof(nvmf_data->subnqn), "%s", ctrlr->trid.subnqn);
+
+	rc = spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair,
+					(struct spdk_nvme_cmd *)&cmd,
+					nvmf_data, sizeof(*nvmf_data),
+					nvme_completion_poll_cb, &status);
+	if (rc < 0) {
+		SPDK_ERRLOG("Connect command failed\n");
+		spdk_dma_free(nvmf_data);
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(qpair, &status)) {
+		SPDK_ERRLOG("Connect command failed\n");
+		spdk_dma_free(nvmf_data);
+		return -EIO;
+	}
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		rsp = (struct spdk_nvmf_fabric_connect_rsp *)&status.cpl;
+		ctrlr->cntlid = rsp->status_code_specific.success.cntlid;
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cntlid);
+	}
+
+	spdk_dma_free(nvmf_data);
+	return 0;
+}
diff --git a/src/spdk/lib/nvme/nvme_internal.h b/src/spdk/lib/nvme/nvme_internal.h
new file mode 100644
index 00000000..6e7714a4
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_internal.h
@@ -0,0 +1,1003 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVME_INTERNAL_H__
+#define __NVME_INTERNAL_H__
+
+#include "spdk/config.h"
+#include "spdk/likely.h"
+#include "spdk/stdinc.h"
+
+#include "spdk/nvme.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <x86intrin.h>
+#endif
+
+#include "spdk/queue.h"
+#include "spdk/barrier.h"
+#include "spdk/bit_array.h"
+#include "spdk/mmio.h"
+#include "spdk/pci_ids.h"
+#include "spdk/util.h"
+#include "spdk/nvme_intel.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/uuid.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+
+extern pid_t g_spdk_nvme_pid;
+
+/*
+ * Some Intel devices support vendor-unique read latency log page even
+ * though the log page directory says otherwise.
+ */
+#define NVME_INTEL_QUIRK_READ_LATENCY 0x1
+
+/*
+ * Some Intel devices support vendor-unique write latency log page even
+ * though the log page directory says otherwise.
+ */
+#define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2
+
+/*
+ * The controller needs a delay before starts checking the device
+ * readiness, which is done by reading the NVME_CSTS_RDY bit.
+ */
+#define NVME_QUIRK_DELAY_BEFORE_CHK_RDY	0x4
+
+/*
+ * The controller performs best when I/O is split on particular
+ * LBA boundaries.
+ */
+#define NVME_INTEL_QUIRK_STRIPING 0x8
+
+/*
+ * The controller needs a delay after allocating an I/O queue pair
+ * before it is ready to accept I/O commands.
+ */
+#define NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC 0x10
+
+/*
+ * Earlier NVMe devices do not indicate whether unmapped blocks
+ * will read all zeroes or not. This define indicates that the
+ * device does in fact read all zeroes after an unmap event
+ */
+#define NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE 0x20
+
+/*
+ * The controller doesn't handle Identify value others than 0 or 1 correctly.
+ */
+#define NVME_QUIRK_IDENTIFY_CNS 0x40
+
+/*
+ * The controller supports Open Channel command set if matching additional
+ * condition, like the first byte (value 0x1) in the vendor specific
+ * bits of the namespace identify structure is set.
+ */
+#define NVME_QUIRK_OCSSD 0x80
+
+/*
+ * The controller has an Intel vendor ID but does not support Intel vendor-specific
+ * log pages.  This is primarily for QEMU emulated SSDs which report an Intel vendor
+ * ID but do not support these log pages.
+ */
+#define NVME_INTEL_QUIRK_NO_LOG_PAGES 0x100
+
+#define NVME_MAX_ASYNC_EVENTS	(8)
+
+#define NVME_MIN_TIMEOUT_PERIOD		(5)
+#define NVME_MAX_TIMEOUT_PERIOD		(120)
+
+/* Maximum log page size to fetch for AERs. */
+#define NVME_MAX_AER_LOG_SIZE		(4096)
+
+/*
+ * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this
+ *  define specifies the maximum number of queues this driver will actually
+ *  try to configure, if available.
+ */
+#define DEFAULT_MAX_IO_QUEUES		(1024)
+#define DEFAULT_IO_QUEUE_SIZE		(256)
+
+#define DEFAULT_ADMIN_QUEUE_REQUESTS	(32)
+#define DEFAULT_IO_QUEUE_REQUESTS	(512)
+
+/* We want to fit submission and completion rings each in a single 2MB
+ * hugepage to ensure physical address contiguity.
+ */
+#define MAX_IO_QUEUE_ENTRIES		(0x200000 / spdk_max( \
+						sizeof(struct spdk_nvme_cmd), \
+						sizeof(struct spdk_nvme_cpl)))
+
+enum nvme_payload_type {
+	NVME_PAYLOAD_TYPE_INVALID = 0,
+
+	/** nvme_request::u.payload.contig_buffer is valid for this request */
+	NVME_PAYLOAD_TYPE_CONTIG,
+
+	/** nvme_request::u.sgl is valid for this request */
+	NVME_PAYLOAD_TYPE_SGL,
+};
+
+/*
+ * Controller support flags.
+ */
+enum spdk_nvme_ctrlr_flags {
+	SPDK_NVME_CTRLR_SGL_SUPPORTED		= 0x1, /**< The SGL is supported */
+};
+
+/**
+ * Descriptor for a request data payload.
+ */
+struct nvme_payload {
+	/**
+	 * Functions for retrieving physical addresses for scattered payloads.
+	 */
+	spdk_nvme_req_reset_sgl_cb reset_sgl_fn;
+	spdk_nvme_req_next_sge_cb next_sge_fn;
+
+	/**
+	 * If reset_sgl_fn == NULL, this is a contig payload, and contig_or_cb_arg contains the
+	 * virtual memory address of a single virtually contiguous buffer.
+	 *
+	 * If reset_sgl_fn != NULL, this is a SGL payload, and contig_or_cb_arg contains the
+	 * cb_arg that will be passed to the SGL callback functions.
+	 */
+	void *contig_or_cb_arg;
+
+	/** Virtual memory address of a single virtually contiguous metadata buffer */
+	void *md;
+};
+
+#define NVME_PAYLOAD_CONTIG(contig_, md_) \
+	(struct nvme_payload) { \
+		.reset_sgl_fn = NULL, \
+		.next_sge_fn = NULL, \
+		.contig_or_cb_arg = (contig_), \
+		.md = (md_), \
+	}
+
+#define NVME_PAYLOAD_SGL(reset_sgl_fn_, next_sge_fn_, cb_arg_, md_) \
+	(struct nvme_payload) { \
+		.reset_sgl_fn = (reset_sgl_fn_), \
+		.next_sge_fn = (next_sge_fn_), \
+		.contig_or_cb_arg = (cb_arg_), \
+		.md = (md_), \
+	}
+
+static inline enum nvme_payload_type
+nvme_payload_type(const struct nvme_payload *payload) {
+	return payload->reset_sgl_fn ? NVME_PAYLOAD_TYPE_SGL : NVME_PAYLOAD_TYPE_CONTIG;
+}
+
+struct nvme_error_cmd {
+	bool				do_not_submit;
+	uint64_t			timeout_tsc;
+	uint32_t			err_count;
+	uint8_t				opc;
+	struct spdk_nvme_status		status;
+	TAILQ_ENTRY(nvme_error_cmd)	link;
+};
+
+struct nvme_request {
+	struct spdk_nvme_cmd		cmd;
+
+	uint8_t				retries;
+
+	bool				timed_out;
+
+	/**
+	 * Number of children requests still outstanding for this
+	 *  request which was split into multiple child requests.
+	 */
+	uint16_t			num_children;
+
+	/**
+	 * Offset in bytes from the beginning of payload for this request.
+	 * This is used for I/O commands that are split into multiple requests.
+	 */
+	uint32_t			payload_offset;
+	uint32_t			md_offset;
+
+	uint32_t			payload_size;
+
+	/**
+	 * Timeout ticks for error injection requests, can be extended in future
+	 * to support per-request timeout feature.
+	 */
+	uint64_t			timeout_tsc;
+
+	/**
+	 * Data payload for this request's command.
+	 */
+	struct nvme_payload		payload;
+
+	spdk_nvme_cmd_cb		cb_fn;
+	void				*cb_arg;
+	STAILQ_ENTRY(nvme_request)	stailq;
+
+	struct spdk_nvme_qpair		*qpair;
+
+	/*
+	 * The value of spdk_get_ticks() when the request was submitted to the hardware.
+	 * Only set if ctrlr->timeout_enabled is true.
+	 */
+	uint64_t			submit_tick;
+
+	/**
+	 * The active admin request can be moved to a per process pending
+	 *  list based on the saved pid to tell which process it belongs
+	 *  to. The cpl saves the original completion information which
+	 *  is used in the completion callback.
+	 * NOTE: these below two fields are only used for admin request.
+	 */
+	pid_t				pid;
+	struct spdk_nvme_cpl		cpl;
+
+	/**
+	 * The following members should not be reordered with members
+	 *  above.  These members are only needed when splitting
+	 *  requests which is done rarely, and the driver is careful
+	 *  to not touch the following fields until a split operation is
+	 *  needed, to avoid touching an extra cacheline.
+	 */
+
+	/**
+	 * Points to the outstanding child requests for a parent request.
+	 *  Only valid if a request was split into multiple children
+	 *  requests, and is not initialized for non-split requests.
+	 */
+	TAILQ_HEAD(, nvme_request)	children;
+
+	/**
+	 * Linked-list pointers for a child request in its parent's list.
+	 */
+	TAILQ_ENTRY(nvme_request)	child_tailq;
+
+	/**
+	 * Points to a parent request if part of a split request,
+	 *   NULL otherwise.
+	 */
+	struct nvme_request		*parent;
+
+	/**
+	 * Completion status for a parent request.  Initialized to all 0's
+	 *  (SUCCESS) before child requests are submitted.  If a child
+	 *  request completes with error, the error status is copied here,
+	 *  to ensure that the parent request is also completed with error
+	 *  status once all child requests are completed.
+	 */
+	struct spdk_nvme_cpl		parent_status;
+
+	/**
+	 * The user_cb_fn and user_cb_arg fields are used for holding the original
+	 * callback data when using nvme_allocate_request_user_copy.
+	 */
+	spdk_nvme_cmd_cb		user_cb_fn;
+	void				*user_cb_arg;
+	void				*user_buffer;
+};
+
+struct nvme_completion_poll_status {
+	struct spdk_nvme_cpl	cpl;
+	bool			done;
+};
+
+struct nvme_async_event_request {
+	struct spdk_nvme_ctrlr		*ctrlr;
+	struct nvme_request		*req;
+	struct spdk_nvme_cpl		cpl;
+};
+
+struct spdk_nvme_qpair {
+	STAILQ_HEAD(, nvme_request)	free_req;
+	STAILQ_HEAD(, nvme_request)	queued_req;
+	/** Commands opcode in this list will return error */
+	TAILQ_HEAD(, nvme_error_cmd)	err_cmd_head;
+	/** Requests in this list will return error */
+	STAILQ_HEAD(, nvme_request)	err_req_head;
+
+	enum spdk_nvme_transport_type	trtype;
+
+	uint16_t			id;
+
+	uint8_t				qprio;
+
+	/*
+	 * Members for handling IO qpair deletion inside of a completion context.
+	 * These are specifically defined as single bits, so that they do not
+	 *  push this data structure out to another cacheline.
+	 */
+	uint8_t				in_completion_context : 1;
+	uint8_t				delete_after_completion_context: 1;
+
+	/*
+	 * Set when no deletion notification is needed. For example, the process
+	 * which allocated this qpair exited unexpectedly.
+	 */
+	uint8_t				no_deletion_notification_needed: 1;
+
+	struct spdk_nvme_ctrlr		*ctrlr;
+
+	/* List entry for spdk_nvme_ctrlr::active_io_qpairs */
+	TAILQ_ENTRY(spdk_nvme_qpair)	tailq;
+
+	/* List entry for spdk_nvme_ctrlr_process::allocated_io_qpairs */
+	TAILQ_ENTRY(spdk_nvme_qpair)	per_process_tailq;
+
+	struct spdk_nvme_ctrlr_process	*active_proc;
+
+	void				*req_buf;
+};
+
+struct spdk_nvme_ns {
+	struct spdk_nvme_ctrlr		*ctrlr;
+	uint32_t			sector_size;
+
+	/*
+	 * Size of data transferred as part of each block,
+	 * including metadata if FLBAS indicates the metadata is transferred
+	 * as part of the data buffer at the end of each LBA.
+	 */
+	uint32_t			extended_lba_size;
+
+	uint32_t			md_size;
+	uint32_t			pi_type;
+	uint32_t			sectors_per_max_io;
+	uint32_t			sectors_per_stripe;
+	uint32_t			id;
+	uint16_t			flags;
+
+	/* Namespace Identification Descriptor List (CNS = 03h) */
+	uint8_t				id_desc_list[4096];
+};
+
+/**
+ * State of struct spdk_nvme_ctrlr (in particular, during initialization).
+ */
+enum nvme_ctrlr_state {
+	/**
+	 * Wait before initializing the controller.
+	 */
+	NVME_CTRLR_STATE_INIT_DELAY,
+
+	/**
+	 * Controller has not been initialized yet.
+	 */
+	NVME_CTRLR_STATE_INIT,
+
+	/**
+	 * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0.
+	 */
+	NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
+
+	/**
+	 * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1.
+	 */
+	NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
+
+	/**
+	 * Enable the controller by writing CC.EN to 1
+	 */
+	NVME_CTRLR_STATE_ENABLE,
+
+	/**
+	 * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller.
+	 */
+	NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
+
+	/**
+	 * Enable the Admin queue of the controller.
+	 */
+	NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE,
+
+	/**
+	 * Identify Controller command will be sent to then controller.
+	 */
+	NVME_CTRLR_STATE_IDENTIFY,
+
+	/**
+	 * Waiting for Identify Controller command be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
+
+	/**
+	 * Set Number of Queues of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_NUM_QUEUES,
+
+	/**
+	 * Waiting for Set Num of Queues command to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
+
+	/**
+	 * Get Number of Queues of the controller.
+	 */
+	NVME_CTRLR_STATE_GET_NUM_QUEUES,
+
+	/**
+	 * Waiting for Get Num of Queues command to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES,
+
+	/**
+	 * Construct Namespace data structures of the controller.
+	 */
+	NVME_CTRLR_STATE_CONSTRUCT_NS,
+
+	/**
+	 * Get active Namespace list of the controller.
+	 */
+	NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
+
+	/**
+	 * Get Identify Namespace Data structure for each NS.
+	 */
+	NVME_CTRLR_STATE_IDENTIFY_NS,
+
+	/**
+	 * Waiting for the Identify Namespace commands to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
+
+	/**
+	 * Get Identify Namespace Identification Descriptors.
+	 */
+	NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
+
+	/**
+	 * Waiting for the Identify Namespace Identification
+	 * Descriptors to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
+
+	/**
+	 * Configure AER of the controller.
+	 */
+	NVME_CTRLR_STATE_CONFIGURE_AER,
+
+	/**
+	 * Waiting for the Configure AER to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
+
+	/**
+	 * Set supported log pages of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
+
+	/**
+	 * Set supported features of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
+
+	/**
+	 * Set Doorbell Buffer Config of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_DB_BUF_CFG,
+
+	/**
+	 * Waiting for Doorbell Buffer Config to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
+
+	/**
+	 * Set Keep Alive Timeout of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
+
+	/**
+	 * Waiting for Set Keep Alive Timeout to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
+
+	/**
+	 * Set Host ID of the controller.
+	 */
+	NVME_CTRLR_STATE_SET_HOST_ID,
+
+	/**
+	 * Waiting for Set Host ID to be completed.
+	 */
+	NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
+
+	/**
+	 * Controller initialization has completed and the controller is ready.
+	 */
+	NVME_CTRLR_STATE_READY,
+
+	/**
+	 * Controller inilialization has an error.
+	 */
+	NVME_CTRLR_STATE_ERROR
+};
+
+#define NVME_TIMEOUT_INFINITE	UINT64_MAX
+
+/*
+ * Used to track properties for all processes accessing the controller.
+ */
+struct spdk_nvme_ctrlr_process {
+	/** Whether it is the primary process  */
+	bool						is_primary;
+
+	/** Process ID */
+	pid_t						pid;
+
+	/** Active admin requests to be completed */
+	STAILQ_HEAD(, nvme_request)			active_reqs;
+
+	TAILQ_ENTRY(spdk_nvme_ctrlr_process)		tailq;
+
+	/** Per process PCI device handle */
+	struct spdk_pci_device				*devhandle;
+
+	/** Reference to track the number of attachment to this controller. */
+	int						ref;
+
+	/** Allocated IO qpairs */
+	TAILQ_HEAD(, spdk_nvme_qpair)			allocated_io_qpairs;
+
+	spdk_nvme_aer_cb				aer_cb_fn;
+	void						*aer_cb_arg;
+
+	/**
+	 * A function pointer to timeout callback function
+	 */
+	spdk_nvme_timeout_cb		timeout_cb_fn;
+	void				*timeout_cb_arg;
+	uint64_t			timeout_ticks;
+};
+
+/*
+ * One of these per allocated PCI device.
+ */
+struct spdk_nvme_ctrlr {
+	/* Hot data (accessed in I/O path) starts here. */
+
+	/** Array of namespaces indexed by nsid - 1 */
+	struct spdk_nvme_ns		*ns;
+
+	struct spdk_nvme_transport_id	trid;
+
+	uint32_t			num_ns;
+
+	bool				is_removed;
+
+	bool				is_resetting;
+
+	bool				is_failed;
+
+	bool				timeout_enabled;
+
+	uint16_t			max_sges;
+
+	uint16_t			cntlid;
+
+	/** Controller support flags */
+	uint64_t			flags;
+
+	/* Cold data (not accessed in normal I/O path) is after this point. */
+
+	union spdk_nvme_cap_register	cap;
+	union spdk_nvme_vs_register	vs;
+
+	enum nvme_ctrlr_state		state;
+	uint64_t			state_timeout_tsc;
+
+	uint64_t			next_keep_alive_tick;
+	uint64_t			keep_alive_interval_ticks;
+
+	TAILQ_ENTRY(spdk_nvme_ctrlr)	tailq;
+
+	/** All the log pages supported */
+	bool				log_page_supported[256];
+
+	/** All the features supported */
+	bool				feature_supported[256];
+
+	/** maximum i/o size in bytes */
+	uint32_t			max_xfer_size;
+
+	/** minimum page size supported by this controller in bytes */
+	uint32_t			min_page_size;
+
+	/** selected memory page size for this controller in bytes */
+	uint32_t			page_size;
+
+	uint32_t			num_aers;
+	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
+
+	/** guards access to the controller itself, including admin queues */
+	pthread_mutex_t			ctrlr_lock;
+
+
+	struct spdk_nvme_qpair		*adminq;
+
+	/** shadow doorbell buffer */
+	uint32_t			*shadow_doorbell;
+	/** eventidx buffer */
+	uint32_t			*eventidx;
+
+	/**
+	 * Identify Controller data.
+	 */
+	struct spdk_nvme_ctrlr_data	cdata;
+
+	/**
+	 * Keep track of active namespaces
+	 */
+	uint32_t			*active_ns_list;
+
+	/**
+	 * Array of Identify Namespace data.
+	 *
+	 * Stored separately from ns since nsdata should not normally be accessed during I/O.
+	 */
+	struct spdk_nvme_ns_data	*nsdata;
+
+	struct spdk_bit_array		*free_io_qids;
+	TAILQ_HEAD(, spdk_nvme_qpair)	active_io_qpairs;
+
+	struct spdk_nvme_ctrlr_opts	opts;
+
+	uint64_t			quirks;
+
+	/* Extra sleep time during controller initialization */
+	uint64_t			sleep_timeout_tsc;
+
+	/** Track all the processes manage this controller */
+	TAILQ_HEAD(, spdk_nvme_ctrlr_process)	active_procs;
+
+
+	STAILQ_HEAD(, nvme_request)	queued_aborts;
+	uint32_t			outstanding_aborts;
+};
+
+struct nvme_driver {
+	pthread_mutex_t			lock;
+
+	/** Multi-process shared attached controller list */
+	TAILQ_HEAD(, spdk_nvme_ctrlr)	shared_attached_ctrlrs;
+
+	bool				initialized;
+	struct spdk_uuid		default_extended_host_id;
+};
+
+extern struct nvme_driver *g_spdk_nvme_driver;
+
+int nvme_driver_init(void);
+
+/*
+ * Used for the spdk_nvme_connect() public API to save user specified opts.
+ */
+struct spdk_nvme_ctrlr_connect_opts {
+	const struct spdk_nvme_ctrlr_opts	*opts;
+	size_t					opts_size;
+};
+
+#define nvme_delay		usleep
+
+static inline bool
+nvme_qpair_is_admin_queue(struct spdk_nvme_qpair *qpair)
+{
+	return qpair->id == 0;
+}
+
+static inline bool
+nvme_qpair_is_io_queue(struct spdk_nvme_qpair *qpair)
+{
+	return qpair->id != 0;
+}
+
+static inline int
+nvme_robust_mutex_lock(pthread_mutex_t *mtx)
+{
+	int rc = pthread_mutex_lock(mtx);
+
+#ifndef __FreeBSD__
+	if (rc == EOWNERDEAD) {
+		rc = pthread_mutex_consistent(mtx);
+	}
+#endif
+
+	return rc;
+}
+
+static inline int
+nvme_robust_mutex_unlock(pthread_mutex_t *mtx)
+{
+	return pthread_mutex_unlock(mtx);
+}
+
+/* Admin functions */
+int	nvme_ctrlr_cmd_identify(struct spdk_nvme_ctrlr *ctrlr,
+				uint8_t cns, uint16_t cntid, uint32_t nsid,
+				void *payload, size_t payload_size,
+				spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_set_num_queues(struct spdk_nvme_ctrlr *ctrlr,
+				      uint32_t num_queues, spdk_nvme_cmd_cb cb_fn,
+				      void *cb_arg);
+int	nvme_ctrlr_cmd_get_num_queues(struct spdk_nvme_ctrlr *ctrlr,
+				      spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_set_async_event_config(struct spdk_nvme_ctrlr *ctrlr,
+		union spdk_nvme_feat_async_event_configuration config,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_set_host_id(struct spdk_nvme_ctrlr *ctrlr, void *host_id, uint32_t host_id_size,
+				   spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+				 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+				 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload,
+				 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr,
+		uint64_t prp1, uint64_t prp2,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, spdk_nvme_cmd_cb cb_fn,
+				 void *cb_arg);
+int	nvme_ctrlr_cmd_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+			      struct spdk_nvme_format *format, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_fw_commit(struct spdk_nvme_ctrlr *ctrlr,
+				 const struct spdk_nvme_fw_commit *fw_commit,
+				 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int	nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr,
+		uint32_t size, uint32_t offset, void *payload,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+void	nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl);
+int	spdk_nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
+				      struct nvme_completion_poll_status *status);
+int	spdk_nvme_wait_for_completion_robust_lock(struct spdk_nvme_qpair *qpair,
+		struct nvme_completion_poll_status *status,
+		pthread_mutex_t *robust_mutex);
+
+struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr,
+		pid_t pid);
+struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr);
+int	nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle);
+void	nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr);
+struct spdk_pci_device *nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr);
+
+int	nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, void *devhandle,
+			 spdk_nvme_probe_cb probe_cb, void *cb_ctx);
+
+int	nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove);
+int	nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ctrlr_connected(struct spdk_nvme_ctrlr *ctrlr);
+
+int	nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
+					struct nvme_request *req);
+int	nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap);
+int	nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs);
+void	nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
+			    const union spdk_nvme_vs_register *vs);
+int	nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
+			struct spdk_nvme_ctrlr *ctrlr,
+			enum spdk_nvme_qprio qprio,
+			uint32_t num_requests);
+void	nvme_qpair_deinit(struct spdk_nvme_qpair *qpair);
+void	nvme_qpair_enable(struct spdk_nvme_qpair *qpair);
+void	nvme_qpair_disable(struct spdk_nvme_qpair *qpair);
+int	nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair,
+				  struct nvme_request *req);
+
+int	nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ns_set_identify_data(struct spdk_nvme_ns *ns);
+int	nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id,
+			  struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ns_destruct(struct spdk_nvme_ns *ns);
+
+int	nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);
+int	nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);
+int	nvme_fabric_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value);
+int	nvme_fabric_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value);
+int	nvme_fabric_ctrlr_discover(struct spdk_nvme_ctrlr *ctrlr, void *cb_ctx,
+				   spdk_nvme_probe_cb probe_cb);
+int	nvme_fabric_qpair_connect(struct spdk_nvme_qpair *qpair, uint32_t num_entries);
+
+static inline struct nvme_request *
+nvme_allocate_request(struct spdk_nvme_qpair *qpair,
+		      const struct nvme_payload *payload, uint32_t payload_size,
+		      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+
+	req = STAILQ_FIRST(&qpair->free_req);
+	if (req == NULL) {
+		return req;
+	}
+
+	STAILQ_REMOVE_HEAD(&qpair->free_req, stailq);
+
+	/*
+	 * Only memset/zero fields that need it.  All other fields
+	 *  will be initialized appropriately either later in this
+	 *  function, or before they are needed later in the
+	 *  submission patch.  For example, the children
+	 *  TAILQ_ENTRY and following members are
+	 *  only used as part of I/O splitting so we avoid
+	 *  memsetting them until it is actually needed.
+	 *  They will be initialized in nvme_request_add_child()
+	 *  if the request is split.
+	 */
+	memset(req, 0, offsetof(struct nvme_request, payload_size));
+
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+	req->payload = *payload;
+	req->payload_size = payload_size;
+	req->qpair = qpair;
+	req->pid = g_spdk_nvme_pid;
+
+	return req;
+}
+
+static inline struct nvme_request *
+nvme_allocate_request_contig(struct spdk_nvme_qpair *qpair,
+			     void *buffer, uint32_t payload_size,
+			     spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, NULL);
+
+	return nvme_allocate_request(qpair, &payload, payload_size, cb_fn, cb_arg);
+}
+
+static inline struct nvme_request *
+nvme_allocate_request_null(struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	return nvme_allocate_request_contig(qpair, NULL, 0, cb_fn, cb_arg);
+}
+
+struct nvme_request *nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
+		void *buffer, uint32_t payload_size,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg, bool host_to_controller);
+
+static inline void
+nvme_complete_request(struct nvme_request *req, struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_qpair          *qpair = req->qpair;
+	struct spdk_nvme_cpl            err_cpl;
+	struct nvme_error_cmd           *cmd;
+
+	/* error injection at completion path,
+	 * only inject for successful completed commands
+	 */
+	if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head) &&
+			  !spdk_nvme_cpl_is_error(cpl))) {
+		TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) {
+
+			if (cmd->do_not_submit) {
+				continue;
+			}
+
+			if ((cmd->opc == req->cmd.opc) && cmd->err_count) {
+
+				err_cpl = *cpl;
+				err_cpl.status.sct = cmd->status.sct;
+				err_cpl.status.sc = cmd->status.sc;
+
+				cpl = &err_cpl;
+				cmd->err_count--;
+				break;
+			}
+		}
+	}
+
+	if (req->cb_fn) {
+		req->cb_fn(req->cb_arg, cpl);
+	}
+}
+
+static inline void
+nvme_free_request(struct nvme_request *req)
+{
+	assert(req != NULL);
+	assert(req->num_children == 0);
+	assert(req->qpair != NULL);
+
+	STAILQ_INSERT_HEAD(&req->qpair->free_req, req, stailq);
+}
+
+void	nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child);
+int	nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
+				   struct spdk_nvme_ctrlr_process *active_proc, uint64_t now_tick);
+uint64_t nvme_get_quirks(const struct spdk_pci_id *id);
+
+int	nvme_robust_mutex_init_shared(pthread_mutex_t *mtx);
+int	nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx);
+
+bool	nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl);
+void	nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd);
+void	nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl);
+
+struct spdk_nvme_ctrlr *spdk_nvme_get_ctrlr_by_trid_unsafe(
+	const struct spdk_nvme_transport_id *trid);
+
+/* Transport specific functions */
+#define DECLARE_TRANSPORT(name) \
+	struct spdk_nvme_ctrlr *nvme_ ## name ## _ctrlr_construct(const struct spdk_nvme_transport_id *trid, const struct spdk_nvme_ctrlr_opts *opts, \
+		void *devhandle); \
+	int nvme_ ## name ## _ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); \
+	int nvme_ ## name ## _ctrlr_scan(const struct spdk_nvme_transport_id *trid, void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_remove_cb remove_cb, bool direct_connect); \
+	int nvme_ ## name ## _ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr); \
+	int nvme_ ## name ## _ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); \
+	int nvme_ ## name ## _ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); \
+	int nvme_ ## name ## _ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); \
+	int nvme_ ## name ## _ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); \
+	uint32_t nvme_ ## name ## _ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr); \
+	uint16_t nvme_ ## name ## _ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr); \
+	struct spdk_nvme_qpair *nvme_ ## name ## _ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts); \
+	void *nvme_ ## name ## _ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size); \
+	int nvme_ ## name ## _ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size); \
+	int nvme_ ## name ## _ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _qpair_enable(struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _qpair_disable(struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _qpair_reset(struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _qpair_fail(struct spdk_nvme_qpair *qpair); \
+	int nvme_ ## name ## _qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req); \
+	int32_t nvme_ ## name ## _qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions);
+
+DECLARE_TRANSPORT(transport) /* generic transport dispatch functions */
+DECLARE_TRANSPORT(pcie)
+#ifdef  SPDK_CONFIG_RDMA
+DECLARE_TRANSPORT(rdma)
+#endif
+
+#undef DECLARE_TRANSPORT
+
+/*
+ * Below ref related functions must be called with the global
+ *  driver lock held for the multi-process condition.
+ *  Within these functions, the per ctrlr ctrlr_lock is also
+ *  acquired for the multi-thread condition.
+ */
+void	nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr);
+void	nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr);
+int	nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr);
+
+static inline bool
+_is_page_aligned(uint64_t address, uint64_t page_size)
+{
+	return (address & (page_size - 1)) == 0;
+}
+
+#endif /* __NVME_INTERNAL_H__ */
diff --git a/src/spdk/lib/nvme/nvme_ns.c b/src/spdk/lib/nvme/nvme_ns.c
new file mode 100644
index 00000000..b88bf174
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ns.c
@@ -0,0 +1,360 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+static inline struct spdk_nvme_ns_data *
+_nvme_ns_get_data(struct spdk_nvme_ns *ns)
+{
+	return &ns->ctrlr->nsdata[ns->id - 1];
+}
+
+/**
+ * Update Namespace flags based on Identify Controller
+ * and Identify Namespace.  This can be also used for
+ * Namespace Attribute Notice events and Namespace
+ * operations such as Attach/Detach.
+ */
+void
+nvme_ns_set_identify_data(struct spdk_nvme_ns *ns)
+{
+	struct spdk_nvme_ns_data	*nsdata;
+
+	nsdata = _nvme_ns_get_data(ns);
+
+	ns->flags = 0x0000;
+
+	ns->sector_size = 1 << nsdata->lbaf[nsdata->flbas.format].lbads;
+	ns->extended_lba_size = ns->sector_size;
+
+	ns->md_size = nsdata->lbaf[nsdata->flbas.format].ms;
+	if (nsdata->flbas.extended) {
+		ns->flags |= SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED;
+		ns->extended_lba_size += ns->md_size;
+	}
+
+	ns->sectors_per_max_io = spdk_nvme_ns_get_max_io_xfer_size(ns) / ns->extended_lba_size;
+
+	if (nsdata->noiob) {
+		ns->sectors_per_stripe = nsdata->noiob;
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "ns %u optimal IO boundary %" PRIu32 " blocks\n",
+			      ns->id, ns->sectors_per_stripe);
+	} else if (ns->ctrlr->quirks & NVME_INTEL_QUIRK_STRIPING &&
+		   ns->ctrlr->cdata.vs[3] != 0) {
+		ns->sectors_per_stripe = (1ULL << ns->ctrlr->cdata.vs[3]) * ns->ctrlr->min_page_size /
+					 ns->sector_size;
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "ns %u stripe size quirk %" PRIu32 " blocks\n",
+			      ns->id, ns->sectors_per_stripe);
+	} else {
+		ns->sectors_per_stripe = 0;
+	}
+
+	if (ns->ctrlr->cdata.oncs.dsm) {
+		ns->flags |= SPDK_NVME_NS_DEALLOCATE_SUPPORTED;
+	}
+
+	if (ns->ctrlr->cdata.vwc.present) {
+		ns->flags |= SPDK_NVME_NS_FLUSH_SUPPORTED;
+	}
+
+	if (ns->ctrlr->cdata.oncs.write_zeroes) {
+		ns->flags |= SPDK_NVME_NS_WRITE_ZEROES_SUPPORTED;
+	}
+
+	if (nsdata->nsrescap.raw) {
+		ns->flags |= SPDK_NVME_NS_RESERVATION_SUPPORTED;
+	}
+
+	ns->pi_type = SPDK_NVME_FMT_NVM_PROTECTION_DISABLE;
+	if (nsdata->lbaf[nsdata->flbas.format].ms && nsdata->dps.pit) {
+		ns->flags |= SPDK_NVME_NS_DPS_PI_SUPPORTED;
+		ns->pi_type = nsdata->dps.pit;
+	}
+}
+
+static int
+nvme_ctrlr_identify_ns(struct spdk_nvme_ns *ns)
+{
+	struct nvme_completion_poll_status	status;
+	struct spdk_nvme_ns_data		*nsdata;
+	int					rc;
+
+	nsdata = _nvme_ns_get_data(ns);
+	rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id,
+				     nsdata, sizeof(*nsdata),
+				     nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion_robust_lock(ns->ctrlr->adminq, &status,
+			&ns->ctrlr->ctrlr_lock)) {
+		/* This can occur if the namespace is not active. Simply zero the
+		 * namespace data and continue. */
+		nvme_ns_destruct(ns);
+		return 0;
+	}
+
+	nvme_ns_set_identify_data(ns);
+
+	return 0;
+}
+
+static int
+nvme_ctrlr_identify_id_desc(struct spdk_nvme_ns *ns)
+{
+	struct nvme_completion_poll_status      status;
+	int                                     rc;
+
+	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
+
+	if (ns->ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) ||
+	    (ns->ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
+		return 0;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Attempting to retrieve NS ID Descriptor List\n");
+	rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST, 0, ns->id,
+				     ns->id_desc_list, sizeof(ns->id_desc_list),
+				     nvme_completion_poll_cb, &status);
+	if (rc < 0) {
+		return rc;
+	}
+
+	rc = spdk_nvme_wait_for_completion_robust_lock(ns->ctrlr->adminq, &status, &ns->ctrlr->ctrlr_lock);
+	if (rc != 0) {
+		SPDK_WARNLOG("Failed to retrieve NS ID Descriptor List\n");
+		memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
+	}
+
+	return rc;
+}
+
+uint32_t
+spdk_nvme_ns_get_id(struct spdk_nvme_ns *ns)
+{
+	return ns->id;
+}
+
+bool
+spdk_nvme_ns_is_active(struct spdk_nvme_ns *ns)
+{
+	const struct spdk_nvme_ns_data *nsdata = NULL;
+
+	/*
+	 * According to the spec, valid NS has non-zero id.
+	 */
+	if (ns->id == 0) {
+		return false;
+	}
+
+	nsdata = _nvme_ns_get_data(ns);
+
+	/*
+	 * According to the spec, Identify Namespace will return a zero-filled structure for
+	 *  inactive namespace IDs.
+	 * Check NCAP since it must be nonzero for an active namespace.
+	 */
+	return nsdata->ncap != 0;
+}
+
+struct spdk_nvme_ctrlr *
+spdk_nvme_ns_get_ctrlr(struct spdk_nvme_ns *ns)
+{
+	return ns->ctrlr;
+}
+
+uint32_t
+spdk_nvme_ns_get_max_io_xfer_size(struct spdk_nvme_ns *ns)
+{
+	return ns->ctrlr->max_xfer_size;
+}
+
+uint32_t
+spdk_nvme_ns_get_sector_size(struct spdk_nvme_ns *ns)
+{
+	return ns->sector_size;
+}
+
+uint32_t
+spdk_nvme_ns_get_extended_sector_size(struct spdk_nvme_ns *ns)
+{
+	return ns->extended_lba_size;
+}
+
+uint64_t
+spdk_nvme_ns_get_num_sectors(struct spdk_nvme_ns *ns)
+{
+	return _nvme_ns_get_data(ns)->nsze;
+}
+
+uint64_t
+spdk_nvme_ns_get_size(struct spdk_nvme_ns *ns)
+{
+	return spdk_nvme_ns_get_num_sectors(ns) * spdk_nvme_ns_get_sector_size(ns);
+}
+
+uint32_t
+spdk_nvme_ns_get_flags(struct spdk_nvme_ns *ns)
+{
+	return ns->flags;
+}
+
+enum spdk_nvme_pi_type
+spdk_nvme_ns_get_pi_type(struct spdk_nvme_ns *ns) {
+	return ns->pi_type;
+}
+
+bool
+spdk_nvme_ns_supports_extended_lba(struct spdk_nvme_ns *ns)
+{
+	return (ns->flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) ? true : false;
+}
+
+uint32_t
+spdk_nvme_ns_get_md_size(struct spdk_nvme_ns *ns)
+{
+	return ns->md_size;
+}
+
+const struct spdk_nvme_ns_data *
+spdk_nvme_ns_get_data(struct spdk_nvme_ns *ns)
+{
+	return _nvme_ns_get_data(ns);
+}
+
+enum spdk_nvme_dealloc_logical_block_read_value spdk_nvme_ns_get_dealloc_logical_block_read_value(
+	struct spdk_nvme_ns *ns)
+{
+	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
+	const struct spdk_nvme_ns_data *data = spdk_nvme_ns_get_data(ns);
+
+	if (ctrlr->quirks & NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE) {
+		return SPDK_NVME_DEALLOC_READ_00;
+	} else {
+		return data->dlfeat.bits.read_value;
+	}
+}
+
+uint32_t
+spdk_nvme_ns_get_optimal_io_boundary(struct spdk_nvme_ns *ns)
+{
+	return ns->sectors_per_stripe;
+}
+
+static const void *
+_spdk_nvme_ns_find_id_desc(const struct spdk_nvme_ns *ns, enum spdk_nvme_nidt type, size_t *length)
+{
+	const struct spdk_nvme_ns_id_desc *desc;
+	size_t offset;
+
+	offset = 0;
+	while (offset + 4 < sizeof(ns->id_desc_list)) {
+		desc = (const struct spdk_nvme_ns_id_desc *)&ns->id_desc_list[offset];
+
+		if (desc->nidl == 0) {
+			/* End of list */
+			return NULL;
+		}
+
+		/*
+		 * Check if this descriptor fits within the list.
+		 * 4 is the fixed-size descriptor header (not counted in NIDL).
+		 */
+		if (offset + desc->nidl + 4 > sizeof(ns->id_desc_list)) {
+			/* Descriptor longer than remaining space in list (invalid) */
+			return NULL;
+		}
+
+		if (desc->nidt == type) {
+			*length = desc->nidl;
+			return &desc->nid[0];
+		}
+
+		offset += 4 + desc->nidl;
+	}
+
+	return NULL;
+}
+
+const struct spdk_uuid *
+spdk_nvme_ns_get_uuid(const struct spdk_nvme_ns *ns)
+{
+	const struct spdk_uuid *uuid;
+	size_t uuid_size;
+
+	uuid = _spdk_nvme_ns_find_id_desc(ns, SPDK_NVME_NIDT_UUID, &uuid_size);
+	if (uuid == NULL || uuid_size != sizeof(*uuid)) {
+		return NULL;
+	}
+
+	return uuid;
+}
+
+int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id,
+		      struct spdk_nvme_ctrlr *ctrlr)
+{
+	int	rc;
+
+	assert(id > 0);
+
+	ns->ctrlr = ctrlr;
+	ns->id = id;
+
+	rc = nvme_ctrlr_identify_ns(ns);
+	if (rc != 0) {
+		return rc;
+	}
+
+	return nvme_ctrlr_identify_id_desc(ns);
+}
+
+void nvme_ns_destruct(struct spdk_nvme_ns *ns)
+{
+	struct spdk_nvme_ns_data *nsdata;
+
+	if (!ns->id) {
+		return;
+	}
+
+	nsdata = _nvme_ns_get_data(ns);
+	memset(nsdata, 0, sizeof(*nsdata));
+	ns->sector_size = 0;
+	ns->extended_lba_size = 0;
+	ns->md_size = 0;
+	ns->pi_type = 0;
+	ns->sectors_per_max_io = 0;
+	ns->sectors_per_stripe = 0;
+	ns->flags = 0;
+}
diff --git a/src/spdk/lib/nvme/nvme_ns_cmd.c b/src/spdk/lib/nvme/nvme_ns_cmd.c
new file mode 100644
index 00000000..9562cf5a
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ns_cmd.c
@@ -0,0 +1,1026 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+static struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+		const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset,
+		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
+		void *cb_arg, uint32_t opc, uint32_t io_flags,
+		uint16_t apptag_mask, uint16_t apptag, bool check_sgl);
+
+
+static bool
+spdk_nvme_ns_check_request_length(uint32_t lba_count, uint32_t sectors_per_max_io,
+				  uint32_t sectors_per_stripe, uint32_t qdepth)
+{
+	uint32_t child_per_io;
+
+	if (sectors_per_stripe > 0) {
+		child_per_io = (lba_count + sectors_per_stripe - 1) / sectors_per_stripe;
+	} else {
+		child_per_io = (lba_count + sectors_per_max_io - 1) / sectors_per_max_io;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "checking maximum i/o length %d\n", child_per_io);
+
+	return child_per_io >= qdepth;
+}
+
+static void
+nvme_cb_complete_child(void *child_arg, const struct spdk_nvme_cpl *cpl)
+{
+	struct nvme_request *child = child_arg;
+	struct nvme_request *parent = child->parent;
+
+	nvme_request_remove_child(parent, child);
+
+	if (spdk_nvme_cpl_is_error(cpl)) {
+		memcpy(&parent->parent_status, cpl, sizeof(*cpl));
+	}
+
+	if (parent->num_children == 0) {
+		nvme_complete_request(parent, &parent->parent_status);
+		nvme_free_request(parent);
+	}
+}
+
+static void
+nvme_request_add_child(struct nvme_request *parent, struct nvme_request *child)
+{
+	assert(parent->num_children != UINT16_MAX);
+
+	if (parent->num_children == 0) {
+		/*
+		 * Defer initialization of the children TAILQ since it falls
+		 *  on a separate cacheline.  This ensures we do not touch this
+		 *  cacheline except on request splitting cases, which are
+		 *  relatively rare.
+		 */
+		TAILQ_INIT(&parent->children);
+		parent->parent = NULL;
+		memset(&parent->parent_status, 0, sizeof(struct spdk_nvme_cpl));
+	}
+
+	parent->num_children++;
+	TAILQ_INSERT_TAIL(&parent->children, child, child_tailq);
+	child->parent = parent;
+	child->cb_fn = nvme_cb_complete_child;
+	child->cb_arg = child;
+}
+
+void
+nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child)
+{
+	assert(parent != NULL);
+	assert(child != NULL);
+	assert(child->parent == parent);
+	assert(parent->num_children != 0);
+
+	parent->num_children--;
+	TAILQ_REMOVE(&parent->children, child, child_tailq);
+}
+
+static void
+nvme_request_free_children(struct nvme_request *req)
+{
+	struct nvme_request *child, *tmp;
+
+	if (req->num_children == 0) {
+		return;
+	}
+
+	/* free all child nvme_request */
+	TAILQ_FOREACH_SAFE(child, &req->children, child_tailq, tmp) {
+		nvme_request_remove_child(req, child);
+		nvme_request_free_children(child);
+		nvme_free_request(child);
+	}
+}
+
+static struct nvme_request *
+_nvme_add_child_request(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			const struct nvme_payload *payload,
+			uint32_t payload_offset, uint32_t md_offset,
+			uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
+			uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag,
+			struct nvme_request *parent, bool check_sgl)
+{
+	struct nvme_request	*child;
+
+	child = _nvme_ns_cmd_rw(ns, qpair, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
+				cb_arg, opc, io_flags, apptag_mask, apptag, check_sgl);
+	if (child == NULL) {
+		nvme_request_free_children(parent);
+		nvme_free_request(parent);
+		return NULL;
+	}
+
+	nvme_request_add_child(parent, child);
+	return child;
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns,
+			   struct spdk_nvme_qpair *qpair,
+			   const struct nvme_payload *payload,
+			   uint32_t payload_offset, uint32_t md_offset,
+			   uint64_t lba, uint32_t lba_count,
+			   spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
+			   uint32_t io_flags, struct nvme_request *req,
+			   uint32_t sectors_per_max_io, uint32_t sector_mask,
+			   uint16_t apptag_mask, uint16_t apptag)
+{
+	uint32_t		sector_size;
+	uint32_t		md_size = ns->md_size;
+	uint32_t		remaining_lba_count = lba_count;
+	struct nvme_request	*child;
+
+	sector_size = ns->extended_lba_size;
+
+	if ((io_flags & SPDK_NVME_IO_FLAGS_PRACT) &&
+	    (ns->flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) &&
+	    (ns->flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) &&
+	    (md_size == 8)) {
+		sector_size -= 8;
+	}
+
+	while (remaining_lba_count > 0) {
+		lba_count = sectors_per_max_io - (lba & sector_mask);
+		lba_count = spdk_min(remaining_lba_count, lba_count);
+
+		child = _nvme_add_child_request(ns, qpair, payload, payload_offset, md_offset,
+						lba, lba_count, cb_fn, cb_arg, opc,
+						io_flags, apptag_mask, apptag, req, true);
+		if (child == NULL) {
+			return NULL;
+		}
+
+		remaining_lba_count -= lba_count;
+		lba += lba_count;
+		payload_offset += lba_count * sector_size;
+		md_offset += lba_count * md_size;
+	}
+
+	return req;
+}
+
+static void
+_nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req,
+			   uint32_t opc, uint64_t lba, uint32_t lba_count,
+			   uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
+{
+	struct spdk_nvme_cmd	*cmd;
+
+	cmd = &req->cmd;
+	cmd->opc = opc;
+	cmd->nsid = ns->id;
+
+	*(uint64_t *)&cmd->cdw10 = lba;
+
+	if (ns->flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) {
+		switch (ns->pi_type) {
+		case SPDK_NVME_FMT_NVM_PROTECTION_TYPE1:
+		case SPDK_NVME_FMT_NVM_PROTECTION_TYPE2:
+			cmd->cdw14 = (uint32_t)lba;
+			break;
+		}
+	}
+
+	cmd->cdw12 = lba_count - 1;
+	cmd->cdw12 |= io_flags;
+
+	cmd->cdw15 = apptag_mask;
+	cmd->cdw15 = (cmd->cdw15 << 16 | apptag);
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_split_request_prp(struct spdk_nvme_ns *ns,
+			       struct spdk_nvme_qpair *qpair,
+			       const struct nvme_payload *payload,
+			       uint32_t payload_offset, uint32_t md_offset,
+			       uint64_t lba, uint32_t lba_count,
+			       spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
+			       uint32_t io_flags, struct nvme_request *req,
+			       uint16_t apptag_mask, uint16_t apptag)
+{
+	spdk_nvme_req_reset_sgl_cb reset_sgl_fn = req->payload.reset_sgl_fn;
+	spdk_nvme_req_next_sge_cb next_sge_fn = req->payload.next_sge_fn;
+	void *sgl_cb_arg = req->payload.contig_or_cb_arg;
+	bool start_valid, end_valid, last_sge, child_equals_parent;
+	uint64_t child_lba = lba;
+	uint32_t req_current_length = 0;
+	uint32_t child_length = 0;
+	uint32_t sge_length;
+	uint32_t page_size = qpair->ctrlr->page_size;
+	uintptr_t address;
+
+	reset_sgl_fn(sgl_cb_arg, payload_offset);
+	next_sge_fn(sgl_cb_arg, (void **)&address, &sge_length);
+	while (req_current_length < req->payload_size) {
+
+		if (sge_length == 0) {
+			continue;
+		} else if (req_current_length + sge_length > req->payload_size) {
+			sge_length = req->payload_size - req_current_length;
+		}
+
+		/*
+		 * The start of the SGE is invalid if the start address is not page aligned,
+		 *  unless it is the first SGE in the child request.
+		 */
+		start_valid = child_length == 0 || _is_page_aligned(address, page_size);
+
+		/* Boolean for whether this is the last SGE in the parent request. */
+		last_sge = (req_current_length + sge_length == req->payload_size);
+
+		/*
+		 * The end of the SGE is invalid if the end address is not page aligned,
+		 *  unless it is the last SGE in the parent request.
+		 */
+		end_valid = last_sge || _is_page_aligned(address + sge_length, page_size);
+
+		/*
+		 * This child request equals the parent request, meaning that no splitting
+		 *  was required for the parent request (the one passed into this function).
+		 *  In this case, we do not create a child request at all - we just send
+		 *  the original request as a single request at the end of this function.
+		 */
+		child_equals_parent = (child_length + sge_length == req->payload_size);
+
+		if (start_valid) {
+			/*
+			 * The start of the SGE is valid, so advance the length parameters,
+			 *  to include this SGE with previous SGEs for this child request
+			 *  (if any).  If it is not valid, we do not advance the length
+			 *  parameters nor get the next SGE, because we must send what has
+			 *  been collected before this SGE as a child request.
+			 */
+			child_length += sge_length;
+			req_current_length += sge_length;
+			if (req_current_length < req->payload_size) {
+				next_sge_fn(sgl_cb_arg, (void **)&address, &sge_length);
+			}
+			/*
+			 * If the next SGE is not page aligned, we will need to create a child
+			 *  request for what we have so far, and then start a new child request for
+			 *  the next SGE.
+			 */
+			start_valid = _is_page_aligned(address, page_size);
+		}
+
+		if (start_valid && end_valid && !last_sge) {
+			continue;
+		}
+
+		/*
+		 * We need to create a split here.  Send what we have accumulated so far as a child
+		 *  request.  Checking if child_equals_parent allows us to *not* create a child request
+		 *  when no splitting is required - in that case we will fall-through and just create
+		 *  a single request with no children for the entire I/O.
+		 */
+		if (!child_equals_parent) {
+			struct nvme_request *child;
+			uint32_t child_lba_count;
+
+			if ((child_length % ns->extended_lba_size) != 0) {
+				SPDK_ERRLOG("child_length %u not even multiple of lba_size %u\n",
+					    child_length, ns->extended_lba_size);
+				return NULL;
+			}
+			child_lba_count = child_length / ns->extended_lba_size;
+			/*
+			 * Note the last parameter is set to "false" - this tells the recursive
+			 *  call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
+			 *  since we have already verified it here.
+			 */
+			child = _nvme_add_child_request(ns, qpair, payload, payload_offset, md_offset,
+							child_lba, child_lba_count,
+							cb_fn, cb_arg, opc, io_flags,
+							apptag_mask, apptag, req, false);
+			if (child == NULL) {
+				return NULL;
+			}
+			payload_offset += child_length;
+			md_offset += child_lba_count * ns->md_size;
+			child_lba += child_lba_count;
+			child_length = 0;
+		}
+	}
+
+	if (child_length == req->payload_size) {
+		/* No splitting was required, so setup the whole payload as one request. */
+		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
+	}
+
+	return req;
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns,
+			       struct spdk_nvme_qpair *qpair,
+			       const struct nvme_payload *payload,
+			       uint32_t payload_offset, uint32_t md_offset,
+			       uint64_t lba, uint32_t lba_count,
+			       spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
+			       uint32_t io_flags, struct nvme_request *req,
+			       uint16_t apptag_mask, uint16_t apptag)
+{
+	spdk_nvme_req_reset_sgl_cb reset_sgl_fn = req->payload.reset_sgl_fn;
+	spdk_nvme_req_next_sge_cb next_sge_fn = req->payload.next_sge_fn;
+	void *sgl_cb_arg = req->payload.contig_or_cb_arg;
+	uint64_t child_lba = lba;
+	uint32_t req_current_length = 0;
+	uint32_t child_length = 0;
+	uint32_t sge_length;
+	uint16_t max_sges, num_sges;
+	uintptr_t address;
+
+	max_sges = ns->ctrlr->max_sges;
+
+	reset_sgl_fn(sgl_cb_arg, payload_offset);
+	num_sges = 0;
+
+	while (req_current_length < req->payload_size) {
+		next_sge_fn(sgl_cb_arg, (void **)&address, &sge_length);
+
+		if (req_current_length + sge_length > req->payload_size) {
+			sge_length = req->payload_size - req_current_length;
+		}
+
+		child_length += sge_length;
+		req_current_length += sge_length;
+		num_sges++;
+
+		if (num_sges < max_sges) {
+			continue;
+		}
+
+		/*
+		 * We need to create a split here.  Send what we have accumulated so far as a child
+		 *  request.  Checking if the child equals the full payload allows us to *not*
+		 *  create a child request when no splitting is required - in that case we will
+		 *  fall-through and just create a single request with no children for the entire I/O.
+		 */
+		if (child_length != req->payload_size) {
+			struct nvme_request *child;
+			uint32_t child_lba_count;
+
+			if ((child_length % ns->extended_lba_size) != 0) {
+				SPDK_ERRLOG("child_length %u not even multiple of lba_size %u\n",
+					    child_length, ns->extended_lba_size);
+				return NULL;
+			}
+			child_lba_count = child_length / ns->extended_lba_size;
+			/*
+			 * Note the last parameter is set to "false" - this tells the recursive
+			 *  call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
+			 *  since we have already verified it here.
+			 */
+			child = _nvme_add_child_request(ns, qpair, payload, payload_offset, md_offset,
+							child_lba, child_lba_count,
+							cb_fn, cb_arg, opc, io_flags,
+							apptag_mask, apptag, req, false);
+			if (child == NULL) {
+				return NULL;
+			}
+			payload_offset += child_length;
+			md_offset += child_lba_count * ns->md_size;
+			child_lba += child_lba_count;
+			child_length = 0;
+			num_sges = 0;
+		}
+	}
+
+	if (child_length == req->payload_size) {
+		/* No splitting was required, so setup the whole payload as one request. */
+		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
+	}
+
+	return req;
+}
+
+static struct nvme_request *
+_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+		const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset,
+		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
+		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl)
+{
+	struct nvme_request	*req;
+	uint32_t		sector_size;
+	uint32_t		sectors_per_max_io;
+	uint32_t		sectors_per_stripe;
+
+	if (io_flags & 0xFFFF) {
+		/* The bottom 16 bits must be empty */
+		SPDK_ERRLOG("io_flags 0x%x bottom 16 bits is not empty\n",
+			    io_flags);
+		return NULL;
+	}
+
+	sector_size = ns->extended_lba_size;
+	sectors_per_max_io = ns->sectors_per_max_io;
+	sectors_per_stripe = ns->sectors_per_stripe;
+
+	if ((io_flags & SPDK_NVME_IO_FLAGS_PRACT) &&
+	    (ns->flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) &&
+	    (ns->flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) &&
+	    (ns->md_size == 8)) {
+		sector_size -= 8;
+	}
+
+	req = nvme_allocate_request(qpair, payload, lba_count * sector_size, cb_fn, cb_arg);
+	if (req == NULL) {
+		return NULL;
+	}
+
+	req->payload_offset = payload_offset;
+	req->md_offset = md_offset;
+
+	/*
+	 * Intel DC P3*00 NVMe controllers benefit from driver-assisted striping.
+	 * If this controller defines a stripe boundary and this I/O spans a stripe
+	 *  boundary, split the request into multiple requests and submit each
+	 *  separately to hardware.
+	 */
+	if (sectors_per_stripe > 0 &&
+	    (((lba & (sectors_per_stripe - 1)) + lba_count) > sectors_per_stripe)) {
+
+		return _nvme_ns_cmd_split_request(ns, qpair, payload, payload_offset, md_offset, lba, lba_count,
+						  cb_fn,
+						  cb_arg, opc,
+						  io_flags, req, sectors_per_stripe, sectors_per_stripe - 1, apptag_mask, apptag);
+	} else if (lba_count > sectors_per_max_io) {
+		return _nvme_ns_cmd_split_request(ns, qpair, payload, payload_offset, md_offset, lba, lba_count,
+						  cb_fn,
+						  cb_arg, opc,
+						  io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag);
+	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL && check_sgl) {
+		if (ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
+			return _nvme_ns_cmd_split_request_sgl(ns, qpair, payload, payload_offset, md_offset,
+							      lba, lba_count, cb_fn, cb_arg, opc, io_flags,
+							      req, apptag_mask, apptag);
+		} else {
+			return _nvme_ns_cmd_split_request_prp(ns, qpair, payload, payload_offset, md_offset,
+							      lba, lba_count, cb_fn, cb_arg, opc, io_flags,
+							      req, apptag_mask, apptag);
+		}
+	}
+
+	_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
+	return req;
+}
+
+int
+spdk_nvme_ns_cmd_compare(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
+			 uint64_t lba,
+			 uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+			 uint32_t io_flags)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg,
+			      SPDK_NVME_OPC_COMPARE,
+			      io_flags, 0,
+			      0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_compare_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+				 void *buffer,
+				 void *metadata,
+				 uint64_t lba,
+				 uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+				 uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, metadata);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg,
+			      SPDK_NVME_OPC_COMPARE,
+			      io_flags,
+			      apptag_mask, apptag, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_comparev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			  uint64_t lba, uint32_t lba_count,
+			  spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+			  spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+			  spdk_nvme_req_next_sge_cb next_sge_fn)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg,
+			      SPDK_NVME_OPC_COMPARE,
+			      io_flags, 0, 0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
+		      uint64_t lba,
+		      uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+		      uint32_t io_flags)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
+			      io_flags, 0,
+			      0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
+			      void *metadata,
+			      uint64_t lba,
+			      uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+			      uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, metadata);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
+			      io_flags,
+			      apptag_mask, apptag, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+		       uint64_t lba, uint32_t lba_count,
+		       spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+		       spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+		       spdk_nvme_req_next_sge_cb next_sge_fn)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
+			      io_flags, 0, 0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			       uint64_t lba, uint32_t lba_count,
+			       spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+			       spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+			       spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
+			       uint16_t apptag_mask, uint16_t apptag)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
+			      io_flags, apptag_mask, apptag, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+		       void *buffer, uint64_t lba,
+		       uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+		       uint32_t io_flags)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
+			      io_flags, 0, 0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			       void *buffer, void *metadata, uint64_t lba,
+			       uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+			       uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, metadata);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
+			      io_flags, apptag_mask, apptag, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			uint64_t lba, uint32_t lba_count,
+			spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+			spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+			spdk_nvme_req_next_sge_cb next_sge_fn)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, NULL);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
+			      io_flags, 0, 0, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+				uint64_t lba, uint32_t lba_count,
+				spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+				spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+				spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
+				uint16_t apptag_mask, uint16_t apptag)
+{
+	struct nvme_request *req;
+	struct nvme_payload payload;
+
+	if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata);
+
+	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
+			      io_flags, apptag_mask, apptag, true);
+	if (req != NULL) {
+		return nvme_qpair_submit_request(qpair, req);
+	} else if (spdk_nvme_ns_check_request_length(lba_count,
+			ns->sectors_per_max_io,
+			ns->sectors_per_stripe,
+			qpair->ctrlr->opts.io_queue_requests)) {
+		return -EINVAL;
+	} else {
+		return -ENOMEM;
+	}
+}
+
+int
+spdk_nvme_ns_cmd_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+			      uint64_t lba, uint32_t lba_count,
+			      spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+			      uint32_t io_flags)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+	uint64_t		*tmp_lba;
+
+	if (lba_count == 0 || lba_count > UINT16_MAX + 1) {
+		return -EINVAL;
+	}
+
+	req = nvme_allocate_request_null(qpair, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_WRITE_ZEROES;
+	cmd->nsid = ns->id;
+
+	tmp_lba = (uint64_t *)&cmd->cdw10;
+	*tmp_lba = lba;
+	cmd->cdw12 = lba_count - 1;
+	cmd->cdw12 |= io_flags;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_dataset_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+				    uint32_t type,
+				    const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges,
+				    spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	if (num_ranges == 0 || num_ranges > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) {
+		return -EINVAL;
+	}
+
+	if (ranges == NULL) {
+		return -EINVAL;
+	}
+
+	req = nvme_allocate_request_user_copy(qpair, (void *)ranges,
+					      num_ranges * sizeof(struct spdk_nvme_dsm_range),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT;
+	cmd->nsid = ns->id;
+
+	cmd->cdw10 = num_ranges - 1;
+	cmd->cdw11 = type;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_flush(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+		       spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	req = nvme_allocate_request_null(qpair, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_FLUSH;
+	cmd->nsid = ns->id;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_reservation_register(struct spdk_nvme_ns *ns,
+				      struct spdk_nvme_qpair *qpair,
+				      struct spdk_nvme_reservation_register_data *payload,
+				      bool ignore_key,
+				      enum spdk_nvme_reservation_register_action action,
+				      enum spdk_nvme_reservation_register_cptpl cptpl,
+				      spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	req = nvme_allocate_request_user_copy(qpair,
+					      payload, sizeof(struct spdk_nvme_reservation_register_data),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_RESERVATION_REGISTER;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 30-31 */
+	cmd->cdw10 |= (uint32_t)cptpl << 30;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_reservation_release(struct spdk_nvme_ns *ns,
+				     struct spdk_nvme_qpair *qpair,
+				     struct spdk_nvme_reservation_key_data *payload,
+				     bool ignore_key,
+				     enum spdk_nvme_reservation_release_action action,
+				     enum spdk_nvme_reservation_type type,
+				     spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	req = nvme_allocate_request_user_copy(qpair,
+					      payload, sizeof(struct spdk_nvme_reservation_key_data), cb_fn,
+					      cb_arg, true);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_RESERVATION_RELEASE;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 8-15 */
+	cmd->cdw10 |= (uint32_t)type << 8;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_reservation_acquire(struct spdk_nvme_ns *ns,
+				     struct spdk_nvme_qpair *qpair,
+				     struct spdk_nvme_reservation_acquire_data *payload,
+				     bool ignore_key,
+				     enum spdk_nvme_reservation_acquire_action action,
+				     enum spdk_nvme_reservation_type type,
+				     spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	req = nvme_allocate_request_user_copy(qpair,
+					      payload, sizeof(struct spdk_nvme_reservation_acquire_data),
+					      cb_fn, cb_arg, true);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_RESERVATION_ACQUIRE;
+	cmd->nsid = ns->id;
+
+	/* Bits 0-2 */
+	cmd->cdw10 = action;
+	/* Bit 3 */
+	cmd->cdw10 |= ignore_key ? 1 << 3 : 0;
+	/* Bits 8-15 */
+	cmd->cdw10 |= (uint32_t)type << 8;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns,
+				    struct spdk_nvme_qpair *qpair,
+				    void *payload, uint32_t len,
+				    spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	uint32_t		num_dwords;
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	if (len % 4) {
+		return -EINVAL;
+	}
+	num_dwords = len / 4;
+
+	req = nvme_allocate_request_user_copy(qpair, payload, len, cb_fn, cb_arg, false);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_RESERVATION_REPORT;
+	cmd->nsid = ns->id;
+
+	cmd->cdw10 = num_dwords;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
diff --git a/src/spdk/lib/nvme/nvme_ns_ocssd_cmd.c b/src/spdk/lib/nvme/nvme_ns_ocssd_cmd.c
new file mode 100644
index 00000000..2a574992
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_ns_ocssd_cmd.c
@@ -0,0 +1,232 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/nvme_ocssd.h"
+#include "nvme_internal.h"
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_reset(struct spdk_nvme_ns *ns,
+				    struct spdk_nvme_qpair *qpair,
+				    uint64_t *lba_list, uint32_t num_lbas,
+				    struct spdk_ocssd_chunk_information_entry *chunk_info,
+				    spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	if (!lba_list || (num_lbas == 0) ||
+	    (num_lbas > SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES)) {
+		return -EINVAL;
+	}
+
+	req = nvme_allocate_request_null(qpair, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_OCSSD_OPC_VECTOR_RESET;
+	cmd->nsid = ns->id;
+
+	if (chunk_info != NULL) {
+		cmd->mptr = spdk_vtophys(chunk_info);
+	}
+
+	/*
+	 * Dword 10 and 11 store a pointer to the list of logical block addresses.
+	 * If there is a single entry in the LBA list, the logical block
+	 * address should be stored instead.
+	 */
+	if (num_lbas == 1) {
+		*(uint64_t *)&cmd->cdw10 = *lba_list;
+	} else {
+		*(uint64_t *)&cmd->cdw10 = spdk_vtophys(lba_list);
+	}
+
+	cmd->cdw12 = num_lbas - 1;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+static int
+_nvme_ocssd_ns_cmd_vector_rw_with_md(struct spdk_nvme_ns *ns,
+				     struct spdk_nvme_qpair *qpair,
+				     void *buffer, void *metadata,
+				     uint64_t *lba_list, uint32_t num_lbas,
+				     spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+				     enum spdk_ocssd_io_opcode opc,
+				     uint32_t io_flags)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+	struct nvme_payload	payload;
+	uint32_t valid_flags = SPDK_OCSSD_IO_FLAGS_LIMITED_RETRY;
+
+	if (io_flags & ~valid_flags) {
+		return -EINVAL;
+	}
+
+	if (!buffer || !lba_list || (num_lbas == 0) ||
+	    (num_lbas > SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES)) {
+		return -EINVAL;
+	}
+
+	payload = NVME_PAYLOAD_CONTIG(buffer, metadata);
+
+	req = nvme_allocate_request(qpair, &payload, num_lbas * ns->sector_size, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = opc;
+	cmd->nsid = ns->id;
+
+	/*
+	 * Dword 10 and 11 store a pointer to the list of logical block addresses.
+	 * If there is a single entry in the LBA list, the logical block
+	 * address should be stored instead.
+	 */
+	if (num_lbas == 1) {
+		*(uint64_t *)&cmd->cdw10 = *lba_list;
+	} else {
+		*(uint64_t *)&cmd->cdw10 = spdk_vtophys(lba_list);
+	}
+
+	cmd->cdw12 = num_lbas - 1;
+	cmd->cdw12 |= io_flags;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_write_with_md(struct spdk_nvme_ns *ns,
+		struct spdk_nvme_qpair *qpair,
+		void *buffer, void *metadata,
+		uint64_t *lba_list, uint32_t num_lbas,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+		uint32_t io_flags)
+{
+	return _nvme_ocssd_ns_cmd_vector_rw_with_md(ns, qpair, buffer, metadata, lba_list,
+			num_lbas, cb_fn, cb_arg, SPDK_OCSSD_OPC_VECTOR_WRITE, io_flags);
+}
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_write(struct spdk_nvme_ns *ns,
+				    struct spdk_nvme_qpair *qpair,
+				    void *buffer,
+				    uint64_t *lba_list, uint32_t num_lbas,
+				    spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+				    uint32_t io_flags)
+{
+	return _nvme_ocssd_ns_cmd_vector_rw_with_md(ns, qpair, buffer, NULL, lba_list,
+			num_lbas, cb_fn, cb_arg, SPDK_OCSSD_OPC_VECTOR_WRITE, io_flags);
+}
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_read_with_md(struct spdk_nvme_ns *ns,
+		struct spdk_nvme_qpair *qpair,
+		void *buffer, void *metadata,
+		uint64_t *lba_list, uint32_t num_lbas,
+		spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+		uint32_t io_flags)
+{
+	return _nvme_ocssd_ns_cmd_vector_rw_with_md(ns, qpair, buffer, metadata, lba_list,
+			num_lbas, cb_fn, cb_arg, SPDK_OCSSD_OPC_VECTOR_READ, io_flags);
+}
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_read(struct spdk_nvme_ns *ns,
+				   struct spdk_nvme_qpair *qpair,
+				   void *buffer,
+				   uint64_t *lba_list, uint32_t num_lbas,
+				   spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+				   uint32_t io_flags)
+{
+	return _nvme_ocssd_ns_cmd_vector_rw_with_md(ns, qpair, buffer, NULL, lba_list,
+			num_lbas, cb_fn, cb_arg, SPDK_OCSSD_OPC_VECTOR_READ, io_flags);
+}
+
+int
+spdk_nvme_ocssd_ns_cmd_vector_copy(struct spdk_nvme_ns *ns,
+				   struct spdk_nvme_qpair *qpair,
+				   uint64_t *dst_lba_list,
+				   uint64_t *src_lba_list,
+				   uint32_t num_lbas,
+				   spdk_nvme_cmd_cb cb_fn, void *cb_arg,
+				   uint32_t io_flags)
+{
+	struct nvme_request	*req;
+	struct spdk_nvme_cmd	*cmd;
+
+	uint32_t valid_flags = SPDK_OCSSD_IO_FLAGS_LIMITED_RETRY;
+
+	if (io_flags & ~valid_flags) {
+		return -EINVAL;
+	}
+
+	if (!dst_lba_list || !src_lba_list || (num_lbas == 0) ||
+	    (num_lbas > SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES)) {
+		return -EINVAL;
+	}
+
+	req = nvme_allocate_request_null(qpair, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_OCSSD_OPC_VECTOR_COPY;
+	cmd->nsid = ns->id;
+
+	/*
+	 * Dword 10 and 11 store a pointer to the list of source logical
+	 * block addresses.
+	 * Dword 14 and 15 store a pointer to the list of destination logical
+	 * block addresses.
+	 * If there is a single entry in the LBA list, the logical block
+	 * address should be stored instead.
+	 */
+	if (num_lbas == 1) {
+		*(uint64_t *)&cmd->cdw10 = *src_lba_list;
+		*(uint64_t *)&cmd->cdw14 = *dst_lba_list;
+	} else {
+		*(uint64_t *)&cmd->cdw10 = spdk_vtophys(src_lba_list);
+		*(uint64_t *)&cmd->cdw14 = spdk_vtophys(dst_lba_list);
+	}
+
+	cmd->cdw12 = num_lbas - 1;
+	cmd->cdw12 |= io_flags;
+
+	return nvme_qpair_submit_request(qpair, req);
+}
diff --git a/src/spdk/lib/nvme/nvme_pcie.c b/src/spdk/lib/nvme/nvme_pcie.c
new file mode 100644
index 00000000..8042380c
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_pcie.c
@@ -0,0 +1,2142 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   Copyright (c) 2017, IBM Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over PCIe transport
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/env.h"
+#include "spdk/likely.h"
+#include "nvme_internal.h"
+#include "nvme_uevent.h"
+
+/*
+ * Number of completion queue entries to process before ringing the
+ *  completion queue doorbell.
+ */
+#define NVME_MIN_COMPLETIONS	(1)
+#define NVME_MAX_COMPLETIONS	(128)
+
+#define NVME_ADMIN_ENTRIES	(128)
+
+/*
+ * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL
+ *  segment.
+ */
+#define NVME_MAX_SGL_DESCRIPTORS	(253)
+
+#define NVME_MAX_PRP_LIST_ENTRIES	(506)
+
+struct nvme_pcie_enum_ctx {
+	spdk_nvme_probe_cb probe_cb;
+	void *cb_ctx;
+	struct spdk_pci_addr pci_addr;
+	bool has_pci_addr;
+};
+
+/* PCIe transport extensions for spdk_nvme_ctrlr */
+struct nvme_pcie_ctrlr {
+	struct spdk_nvme_ctrlr ctrlr;
+
+	/** NVMe MMIO register space */
+	volatile struct spdk_nvme_registers *regs;
+
+	/** NVMe MMIO register size */
+	uint64_t regs_size;
+
+	/* BAR mapping address which contains controller memory buffer */
+	void *cmb_bar_virt_addr;
+
+	/* BAR physical address which contains controller memory buffer */
+	uint64_t cmb_bar_phys_addr;
+
+	/* Controller memory buffer size in Bytes */
+	uint64_t cmb_size;
+
+	/* Current offset of controller memory buffer, relative to start of BAR virt addr */
+	uint64_t cmb_current_offset;
+
+	/* Last valid offset into CMB, this differs if CMB memory registration occurs or not */
+	uint64_t cmb_max_offset;
+
+	void *cmb_mem_register_addr;
+	size_t cmb_mem_register_size;
+
+	bool cmb_io_data_supported;
+
+	/** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
+	uint32_t doorbell_stride_u32;
+
+	/* Opaque handle to associated PCI device. */
+	struct spdk_pci_device *devhandle;
+
+	/* File descriptor returned from spdk_pci_device_claim().  Closed when ctrlr is detached. */
+	int claim_fd;
+
+	/* Flag to indicate the MMIO register has been remapped */
+	bool is_remapped;
+};
+
+struct nvme_tracker {
+	TAILQ_ENTRY(nvme_tracker)       tq_list;
+
+	struct nvme_request		*req;
+	uint16_t			cid;
+
+	uint16_t			rsvd1: 15;
+	uint16_t			active: 1;
+
+	uint32_t			rsvd2;
+
+	uint64_t			rsvd3;
+
+	uint64_t			prp_sgl_bus_addr;
+
+	union {
+		uint64_t			prp[NVME_MAX_PRP_LIST_ENTRIES];
+		struct spdk_nvme_sgl_descriptor	sgl[NVME_MAX_SGL_DESCRIPTORS];
+	} u;
+};
+/*
+ * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary
+ * and so that there is no padding required to meet alignment requirements.
+ */
+SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K");
+SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned");
+
+/* PCIe transport extensions for spdk_nvme_qpair */
+struct nvme_pcie_qpair {
+	/* Submission queue tail doorbell */
+	volatile uint32_t *sq_tdbl;
+
+	/* Completion queue head doorbell */
+	volatile uint32_t *cq_hdbl;
+
+	/* Submission queue shadow tail doorbell */
+	volatile uint32_t *sq_shadow_tdbl;
+
+	/* Completion queue shadow head doorbell */
+	volatile uint32_t *cq_shadow_hdbl;
+
+	/* Submission queue event index */
+	volatile uint32_t *sq_eventidx;
+
+	/* Completion queue event index */
+	volatile uint32_t *cq_eventidx;
+
+	/* Submission queue */
+	struct spdk_nvme_cmd *cmd;
+
+	/* Completion queue */
+	struct spdk_nvme_cpl *cpl;
+
+	TAILQ_HEAD(, nvme_tracker) free_tr;
+	TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr;
+
+	/* Array of trackers indexed by command ID. */
+	struct nvme_tracker *tr;
+
+	uint16_t num_entries;
+
+	uint16_t max_completions_cap;
+
+	uint16_t sq_tail;
+	uint16_t cq_head;
+	uint16_t sq_head;
+
+	uint8_t phase;
+
+	bool is_enabled;
+
+	/*
+	 * Base qpair structure.
+	 * This is located after the hot data in this structure so that the important parts of
+	 * nvme_pcie_qpair are in the same cache line.
+	 */
+	struct spdk_nvme_qpair qpair;
+
+	/*
+	 * Fields below this point should not be touched on the normal I/O path.
+	 */
+
+	bool sq_in_cmb;
+
+	uint64_t cmd_bus_addr;
+	uint64_t cpl_bus_addr;
+};
+
+static int nvme_pcie_ctrlr_attach(spdk_nvme_probe_cb probe_cb, void *cb_ctx,
+				  struct spdk_pci_addr *pci_addr);
+static int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair);
+static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair);
+
+__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
+static volatile uint16_t g_signal_lock;
+static bool g_sigset = false;
+static int hotplug_fd = -1;
+
+static void
+nvme_sigbus_fault_sighandler(int signum, siginfo_t *info, void *ctx)
+{
+	void *map_address;
+
+	if (!__sync_bool_compare_and_swap(&g_signal_lock, 0, 1)) {
+		return;
+	}
+
+	assert(g_thread_mmio_ctrlr != NULL);
+
+	if (!g_thread_mmio_ctrlr->is_remapped) {
+		map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+		if (map_address == MAP_FAILED) {
+			SPDK_ERRLOG("mmap failed\n");
+			g_signal_lock = 0;
+			return;
+		}
+		memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers));
+		g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address;
+		g_thread_mmio_ctrlr->is_remapped = true;
+	}
+	g_signal_lock = 0;
+	return;
+}
+
+static void
+nvme_pcie_ctrlr_setup_signal(void)
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = nvme_sigbus_fault_sighandler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO;
+	sigaction(SIGBUS, &sa, NULL);
+}
+
+static int
+_nvme_pcie_hotplug_monitor(void *cb_ctx, spdk_nvme_probe_cb probe_cb,
+			   spdk_nvme_remove_cb remove_cb)
+{
+	struct spdk_nvme_ctrlr *ctrlr, *tmp;
+	struct spdk_uevent event;
+	struct spdk_pci_addr pci_addr;
+	union spdk_nvme_csts_register csts;
+	struct spdk_nvme_ctrlr_process *proc;
+
+	while (spdk_get_uevent(hotplug_fd, &event) > 0) {
+		if (event.subsystem == SPDK_NVME_UEVENT_SUBSYSTEM_UIO ||
+		    event.subsystem == SPDK_NVME_UEVENT_SUBSYSTEM_VFIO) {
+			if (event.action == SPDK_NVME_UEVENT_ADD) {
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "add nvme address: %s\n",
+					      event.traddr);
+				if (spdk_process_is_primary()) {
+					if (!spdk_pci_addr_parse(&pci_addr, event.traddr)) {
+						nvme_pcie_ctrlr_attach(probe_cb, cb_ctx, &pci_addr);
+					}
+				}
+			} else if (event.action == SPDK_NVME_UEVENT_REMOVE) {
+				struct spdk_nvme_transport_id trid;
+
+				memset(&trid, 0, sizeof(trid));
+				trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
+				snprintf(trid.traddr, sizeof(trid.traddr), "%s", event.traddr);
+
+				ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(&trid);
+				if (ctrlr == NULL) {
+					return 0;
+				}
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "remove nvme address: %s\n",
+					      event.traddr);
+
+				nvme_ctrlr_fail(ctrlr, true);
+
+				/* get the user app to clean up and stop I/O */
+				if (remove_cb) {
+					nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+					remove_cb(cb_ctx, ctrlr);
+					nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+				}
+			}
+		}
+	}
+
+	/* This is a work around for vfio-attached device hot remove detection. */
+	TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) {
+		/* NVMe controller BAR must be mapped to secondary process space before any access. */
+		proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+		if (proc) {
+			csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
+			if (csts.raw == 0xffffffffU) {
+				nvme_ctrlr_fail(ctrlr, true);
+				if (remove_cb) {
+					nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+					remove_cb(cb_ctx, ctrlr);
+					nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+static inline struct nvme_pcie_ctrlr *
+nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
+{
+	assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE);
+	return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
+}
+
+static inline struct nvme_pcie_qpair *
+nvme_pcie_qpair(struct spdk_nvme_qpair *qpair)
+{
+	assert(qpair->trtype == SPDK_NVME_TRANSPORT_PCIE);
+	return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair);
+}
+
+static volatile void *
+nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+
+	return (volatile void *)((uintptr_t)pctrlr->regs + offset);
+}
+
+int
+nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+
+	assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
+	g_thread_mmio_ctrlr = pctrlr;
+	spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value);
+	g_thread_mmio_ctrlr = NULL;
+	return 0;
+}
+
+int
+nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+
+	assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
+	g_thread_mmio_ctrlr = pctrlr;
+	spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value);
+	g_thread_mmio_ctrlr = NULL;
+	return 0;
+}
+
+int
+nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+
+	assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
+	assert(value != NULL);
+	g_thread_mmio_ctrlr = pctrlr;
+	*value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset));
+	g_thread_mmio_ctrlr = NULL;
+	if (~(*value) == 0) {
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+
+	assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
+	assert(value != NULL);
+	g_thread_mmio_ctrlr = pctrlr;
+	*value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset));
+	g_thread_mmio_ctrlr = NULL;
+	if (~(*value) == 0) {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value)
+{
+	return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq),
+					 value);
+}
+
+static int
+nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value)
+{
+	return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq),
+					 value);
+}
+
+static int
+nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa)
+{
+	return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw),
+					 aqa->raw);
+}
+
+static int
+nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc)
+{
+	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw),
+					 &cmbloc->raw);
+}
+
+static int
+nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz)
+{
+	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
+					 &cmbsz->raw);
+}
+
+uint32_t
+nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
+{
+	/*
+	 * For commands requiring more than 2 PRP entries, one PRP will be
+	 *  embedded in the command (prp1), and the rest of the PRP entries
+	 *  will be in a list pointed to by the command (prp2).  This means
+	 *  that real max number of PRP entries we support is 506+1, which
+	 *  results in a max xfer size of 506*ctrlr->page_size.
+	 */
+	return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size;
+}
+
+uint16_t
+nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return NVME_MAX_SGL_DESCRIPTORS;
+}
+
+static void
+nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr)
+{
+	int rc;
+	void *addr;
+	uint32_t bir;
+	union spdk_nvme_cmbsz_register cmbsz;
+	union spdk_nvme_cmbloc_register cmbloc;
+	uint64_t size, unit_size, offset, bar_size, bar_phys_addr;
+	uint64_t mem_register_start, mem_register_end;
+
+	if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) ||
+	    nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) {
+		SPDK_ERRLOG("get registers failed\n");
+		goto exit;
+	}
+
+	if (!cmbsz.bits.sz) {
+		goto exit;
+	}
+
+	bir = cmbloc.bits.bir;
+	/* Values 0 2 3 4 5 are valid for BAR */
+	if (bir > 5 || bir == 1) {
+		goto exit;
+	}
+
+	/* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
+	unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
+	/* controller memory buffer size in Bytes */
+	size = unit_size * cmbsz.bits.sz;
+	/* controller memory buffer offset from BAR in Bytes */
+	offset = unit_size * cmbloc.bits.ofst;
+
+	rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr,
+				     &bar_phys_addr, &bar_size);
+	if ((rc != 0) || addr == NULL) {
+		goto exit;
+	}
+
+	if (offset > bar_size) {
+		goto exit;
+	}
+
+	if (size > bar_size - offset) {
+		goto exit;
+	}
+
+	pctrlr->cmb_bar_virt_addr = addr;
+	pctrlr->cmb_bar_phys_addr = bar_phys_addr;
+	pctrlr->cmb_size = size;
+	pctrlr->cmb_current_offset = offset;
+	pctrlr->cmb_max_offset = offset + size;
+
+	if (!cmbsz.bits.sqs) {
+		pctrlr->ctrlr.opts.use_cmb_sqs = false;
+	}
+
+	/* If only SQS is supported use legacy mapping */
+	if (cmbsz.bits.sqs && !(cmbsz.bits.wds || cmbsz.bits.rds)) {
+		return;
+	}
+
+	/* If CMB is less than 4MiB in size then abort CMB mapping */
+	if (pctrlr->cmb_size < (1ULL << 22)) {
+		goto exit;
+	}
+
+	mem_register_start = (((uintptr_t)pctrlr->cmb_bar_virt_addr + offset + 0x1fffff) & ~(0x200000 - 1));
+	mem_register_end = ((uintptr_t)pctrlr->cmb_bar_virt_addr + offset + pctrlr->cmb_size);
+	mem_register_end &= ~(uint64_t)(0x200000 - 1);
+	pctrlr->cmb_mem_register_addr = (void *)mem_register_start;
+	pctrlr->cmb_mem_register_size = mem_register_end - mem_register_start;
+
+	rc = spdk_mem_register(pctrlr->cmb_mem_register_addr, pctrlr->cmb_mem_register_size);
+	if (rc) {
+		SPDK_ERRLOG("spdk_mem_register() failed\n");
+		goto exit;
+	}
+	pctrlr->cmb_current_offset = mem_register_start - ((uint64_t)pctrlr->cmb_bar_virt_addr);
+	pctrlr->cmb_max_offset = mem_register_end - ((uint64_t)pctrlr->cmb_bar_virt_addr);
+	pctrlr->cmb_io_data_supported = true;
+
+	return;
+exit:
+	pctrlr->cmb_bar_virt_addr = NULL;
+	pctrlr->ctrlr.opts.use_cmb_sqs = false;
+	return;
+}
+
+static int
+nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr)
+{
+	int rc = 0;
+	union spdk_nvme_cmbloc_register cmbloc;
+	void *addr = pctrlr->cmb_bar_virt_addr;
+
+	if (addr) {
+		if (pctrlr->cmb_mem_register_addr) {
+			spdk_mem_unregister(pctrlr->cmb_mem_register_addr, pctrlr->cmb_mem_register_size);
+		}
+
+		if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) {
+			SPDK_ERRLOG("get_cmbloc() failed\n");
+			return -EIO;
+		}
+		rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr);
+	}
+	return rc;
+}
+
+static int
+nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned,
+			  uint64_t *offset)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+	uint64_t round_offset;
+
+	round_offset = pctrlr->cmb_current_offset;
+	round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1);
+
+	/* CMB may only consume part of the BAR, calculate accordingly */
+	if (round_offset + length > pctrlr->cmb_max_offset) {
+		SPDK_ERRLOG("Tried to allocate past valid CMB range!\n");
+		return -1;
+	}
+
+	*offset = round_offset;
+	pctrlr->cmb_current_offset = round_offset + length;
+
+	return 0;
+}
+
+void *
+nvme_pcie_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+	uint64_t offset;
+
+	if (pctrlr->cmb_bar_virt_addr == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "CMB not available\n");
+		return NULL;
+	}
+
+	if (!pctrlr->cmb_io_data_supported) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "CMB doesn't support I/O data\n");
+		return NULL;
+	}
+
+	if (nvme_pcie_ctrlr_alloc_cmb(ctrlr, size, 4, &offset) != 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "%zu-byte CMB allocation failed\n", size);
+		return NULL;
+	}
+
+	return pctrlr->cmb_bar_virt_addr + offset;
+}
+
+int
+nvme_pcie_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
+{
+	/*
+	 * Do nothing for now.
+	 * TODO: Track free space so buffers may be reused.
+	 */
+	SPDK_ERRLOG("%s: no deallocation for CMB buffers yet!\n",
+		    __func__);
+	return 0;
+}
+
+static int
+nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr)
+{
+	int rc;
+	void *addr;
+	uint64_t phys_addr, size;
+
+	rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr,
+				     &phys_addr, &size);
+	pctrlr->regs = (volatile struct spdk_nvme_registers *)addr;
+	if ((pctrlr->regs == NULL) || (rc != 0)) {
+		SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n",
+			    rc, pctrlr->regs);
+		return -1;
+	}
+
+	pctrlr->regs_size = size;
+	nvme_pcie_ctrlr_map_cmb(pctrlr);
+
+	return 0;
+}
+
+static int
+nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr)
+{
+	int rc = 0;
+	void *addr = (void *)pctrlr->regs;
+
+	if (pctrlr->ctrlr.is_removed) {
+		return rc;
+	}
+
+	rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr);
+	if (rc != 0) {
+		SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc);
+		return -1;
+	}
+
+	if (addr) {
+		/* NOTE: addr may have been remapped here. We're relying on DPDK to call
+		 * munmap internally.
+		 */
+		rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr);
+	}
+	return rc;
+}
+
+static int
+nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct nvme_pcie_qpair *pqpair;
+	int rc;
+
+	pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (pqpair == NULL) {
+		return -ENOMEM;
+	}
+
+	pqpair->num_entries = NVME_ADMIN_ENTRIES;
+
+	ctrlr->adminq = &pqpair->qpair;
+
+	rc = nvme_qpair_init(ctrlr->adminq,
+			     0, /* qpair ID */
+			     ctrlr,
+			     SPDK_NVME_QPRIO_URGENT,
+			     NVME_ADMIN_ENTRIES);
+	if (rc != 0) {
+		return rc;
+	}
+
+	return nvme_pcie_qpair_construct(ctrlr->adminq);
+}
+
+/* This function must only be called while holding g_spdk_nvme_driver->lock */
+static int
+pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
+{
+	struct spdk_nvme_transport_id trid = {};
+	struct nvme_pcie_enum_ctx *enum_ctx = ctx;
+	struct spdk_nvme_ctrlr *ctrlr;
+	struct spdk_pci_addr pci_addr;
+
+	pci_addr = spdk_pci_device_get_addr(pci_dev);
+
+	trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
+	spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr);
+
+	/* Verify that this controller is not already attached */
+	ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(&trid);
+	if (ctrlr) {
+		if (spdk_process_is_primary()) {
+			/* Already attached */
+			return 0;
+		} else {
+			return nvme_ctrlr_add_process(ctrlr, pci_dev);
+		}
+	}
+
+	/* check whether user passes the pci_addr */
+	if (enum_ctx->has_pci_addr &&
+	    (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) {
+		return 1;
+	}
+
+	return nvme_ctrlr_probe(&trid, pci_dev,
+				enum_ctx->probe_cb, enum_ctx->cb_ctx);
+}
+
+int
+nvme_pcie_ctrlr_scan(const struct spdk_nvme_transport_id *trid,
+		     void *cb_ctx,
+		     spdk_nvme_probe_cb probe_cb,
+		     spdk_nvme_remove_cb remove_cb,
+		     bool direct_connect)
+{
+	struct nvme_pcie_enum_ctx enum_ctx = {};
+
+	enum_ctx.probe_cb = probe_cb;
+	enum_ctx.cb_ctx = cb_ctx;
+
+	if (strlen(trid->traddr) != 0) {
+		if (spdk_pci_addr_parse(&enum_ctx.pci_addr, trid->traddr)) {
+			return -1;
+		}
+		enum_ctx.has_pci_addr = true;
+	}
+
+	if (hotplug_fd < 0) {
+		hotplug_fd = spdk_uevent_connect();
+		if (hotplug_fd < 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Failed to open uevent netlink socket\n");
+		}
+	} else {
+		_nvme_pcie_hotplug_monitor(cb_ctx, probe_cb, remove_cb);
+	}
+
+	if (enum_ctx.has_pci_addr == false) {
+		return spdk_pci_nvme_enumerate(pcie_nvme_enum_cb, &enum_ctx);
+	} else {
+		return spdk_pci_nvme_device_attach(pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr);
+	}
+}
+
+static int
+nvme_pcie_ctrlr_attach(spdk_nvme_probe_cb probe_cb, void *cb_ctx, struct spdk_pci_addr *pci_addr)
+{
+	struct nvme_pcie_enum_ctx enum_ctx;
+
+	enum_ctx.probe_cb = probe_cb;
+	enum_ctx.cb_ctx = cb_ctx;
+
+	return spdk_pci_nvme_device_attach(pcie_nvme_enum_cb, &enum_ctx, pci_addr);
+}
+
+struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
+		const struct spdk_nvme_ctrlr_opts *opts,
+		void *devhandle)
+{
+	struct spdk_pci_device *pci_dev = devhandle;
+	struct nvme_pcie_ctrlr *pctrlr;
+	union spdk_nvme_cap_register cap;
+	union spdk_nvme_vs_register vs;
+	uint32_t cmd_reg;
+	int rc, claim_fd;
+	struct spdk_pci_id pci_id;
+	struct spdk_pci_addr pci_addr;
+
+	if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
+		SPDK_ERRLOG("could not parse pci address\n");
+		return NULL;
+	}
+
+	claim_fd = spdk_pci_device_claim(&pci_addr);
+	if (claim_fd < 0) {
+		SPDK_ERRLOG("could not claim device %s\n", trid->traddr);
+		return NULL;
+	}
+
+	pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL,
+			      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (pctrlr == NULL) {
+		close(claim_fd);
+		SPDK_ERRLOG("could not allocate ctrlr\n");
+		return NULL;
+	}
+
+	pctrlr->is_remapped = false;
+	pctrlr->ctrlr.is_removed = false;
+	pctrlr->ctrlr.trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
+	pctrlr->devhandle = devhandle;
+	pctrlr->ctrlr.opts = *opts;
+	pctrlr->claim_fd = claim_fd;
+	memcpy(&pctrlr->ctrlr.trid, trid, sizeof(pctrlr->ctrlr.trid));
+
+	rc = nvme_pcie_ctrlr_allocate_bars(pctrlr);
+	if (rc != 0) {
+		close(claim_fd);
+		spdk_free(pctrlr);
+		return NULL;
+	}
+
+	/* Enable PCI busmaster and disable INTx */
+	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
+	cmd_reg |= 0x404;
+	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
+
+	if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) {
+		SPDK_ERRLOG("get_cap() failed\n");
+		close(claim_fd);
+		spdk_free(pctrlr);
+		return NULL;
+	}
+
+	if (nvme_ctrlr_get_vs(&pctrlr->ctrlr, &vs)) {
+		SPDK_ERRLOG("get_vs() failed\n");
+		close(claim_fd);
+		spdk_free(pctrlr);
+		return NULL;
+	}
+
+	nvme_ctrlr_init_cap(&pctrlr->ctrlr, &cap, &vs);
+
+	/* Doorbell stride is 2 ^ (dstrd + 2),
+	 * but we want multiples of 4, so drop the + 2 */
+	pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
+
+	rc = nvme_ctrlr_construct(&pctrlr->ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_destruct(&pctrlr->ctrlr);
+		return NULL;
+	}
+
+	pci_id = spdk_pci_device_get_id(pci_dev);
+	pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id);
+
+	rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr);
+	if (rc != 0) {
+		nvme_ctrlr_destruct(&pctrlr->ctrlr);
+		return NULL;
+	}
+
+	/* Construct the primary process properties */
+	rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev);
+	if (rc != 0) {
+		nvme_ctrlr_destruct(&pctrlr->ctrlr);
+		return NULL;
+	}
+
+	if (g_sigset != true) {
+		nvme_pcie_ctrlr_setup_signal();
+		g_sigset = true;
+	}
+
+	return &pctrlr->ctrlr;
+}
+
+int
+nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+	struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq);
+	union spdk_nvme_aqa_register aqa;
+
+	if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) {
+		SPDK_ERRLOG("set_asq() failed\n");
+		return -EIO;
+	}
+
+	if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) {
+		SPDK_ERRLOG("set_acq() failed\n");
+		return -EIO;
+	}
+
+	aqa.raw = 0;
+	/* acqs and asqs are 0-based. */
+	aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1;
+	aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1;
+
+	if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) {
+		SPDK_ERRLOG("set_aqa() failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int
+nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
+	struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr);
+
+	close(pctrlr->claim_fd);
+
+	if (ctrlr->adminq) {
+		nvme_pcie_qpair_destroy(ctrlr->adminq);
+	}
+
+	nvme_ctrlr_destruct_finish(ctrlr);
+
+	nvme_ctrlr_free_processes(ctrlr);
+
+	nvme_pcie_ctrlr_free_bars(pctrlr);
+
+	if (devhandle) {
+		spdk_pci_device_detach(devhandle);
+	}
+
+	spdk_free(pctrlr);
+
+	return 0;
+}
+
+static void
+nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr)
+{
+	tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp);
+	tr->cid = cid;
+	tr->active = false;
+}
+
+int
+nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+
+	pqpair->sq_tail = pqpair->cq_head = 0;
+
+	/*
+	 * First time through the completion queue, HW will set phase
+	 *  bit on completions to 1.  So set this to 1 here, indicating
+	 *  we're looking for a 1 to know which entries have completed.
+	 *  we'll toggle the bit each time when the completion queue
+	 *  rolls over.
+	 */
+	pqpair->phase = 1;
+
+	memset(pqpair->cmd, 0,
+	       pqpair->num_entries * sizeof(struct spdk_nvme_cmd));
+	memset(pqpair->cpl, 0,
+	       pqpair->num_entries * sizeof(struct spdk_nvme_cpl));
+
+	return 0;
+}
+
+static int
+nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair)
+{
+	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
+	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(ctrlr);
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_tracker	*tr;
+	uint16_t		i;
+	volatile uint32_t	*doorbell_base;
+	uint64_t		offset;
+	uint16_t		num_trackers;
+	size_t			page_align = 0x200000;
+	uint32_t                flags = SPDK_MALLOC_DMA;
+
+	/*
+	 * Limit the maximum number of completions to return per call to prevent wraparound,
+	 * and calculate how many trackers can be submitted at once without overflowing the
+	 * completion queue.
+	 */
+	pqpair->max_completions_cap = pqpair->num_entries / 4;
+	pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS);
+	pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS);
+	num_trackers = pqpair->num_entries - pqpair->max_completions_cap;
+
+	SPDK_INFOLOG(SPDK_LOG_NVME, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n",
+		     pqpair->max_completions_cap, num_trackers);
+
+	assert(num_trackers != 0);
+
+	pqpair->sq_in_cmb = false;
+
+	if (nvme_qpair_is_admin_queue(&pqpair->qpair)) {
+		flags |= SPDK_MALLOC_SHARE;
+	}
+
+	/* cmd and cpl rings must be aligned on page size boundaries. */
+	if (ctrlr->opts.use_cmb_sqs) {
+		if (nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd),
+					      sysconf(_SC_PAGESIZE), &offset) == 0) {
+			pqpair->cmd = pctrlr->cmb_bar_virt_addr + offset;
+			pqpair->cmd_bus_addr = pctrlr->cmb_bar_phys_addr + offset;
+			pqpair->sq_in_cmb = true;
+		}
+	}
+
+	/* To ensure physical address contiguity we make each ring occupy
+	 * a single hugepage only. See MAX_IO_QUEUE_ENTRIES.
+	 */
+	if (pqpair->sq_in_cmb == false) {
+		pqpair->cmd = spdk_zmalloc(pqpair->num_entries * sizeof(struct spdk_nvme_cmd),
+					   page_align, &pqpair->cmd_bus_addr,
+					   SPDK_ENV_SOCKET_ID_ANY, flags);
+		if (pqpair->cmd == NULL) {
+			SPDK_ERRLOG("alloc qpair_cmd failed\n");
+			return -ENOMEM;
+		}
+	}
+
+	pqpair->cpl = spdk_zmalloc(pqpair->num_entries * sizeof(struct spdk_nvme_cpl),
+				   page_align, &pqpair->cpl_bus_addr,
+				   SPDK_ENV_SOCKET_ID_ANY, flags);
+	if (pqpair->cpl == NULL) {
+		SPDK_ERRLOG("alloc qpair_cpl failed\n");
+		return -ENOMEM;
+	}
+
+	doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl;
+	pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
+	pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
+
+	/*
+	 * Reserve space for all of the trackers in a single allocation.
+	 *   struct nvme_tracker must be padded so that its size is already a power of 2.
+	 *   This ensures the PRP list embedded in the nvme_tracker object will not span a
+	 *   4KB boundary, while allowing access to trackers in tr[] via normal array indexing.
+	 */
+	pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL,
+				  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (pqpair->tr == NULL) {
+		SPDK_ERRLOG("nvme_tr failed\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INIT(&pqpair->free_tr);
+	TAILQ_INIT(&pqpair->outstanding_tr);
+
+	for (i = 0; i < num_trackers; i++) {
+		tr = &pqpair->tr[i];
+		nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr));
+		TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
+	}
+
+	nvme_pcie_qpair_reset(qpair);
+
+	return 0;
+}
+
+static inline void
+nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
+{
+	/* dst and src are known to be non-overlapping and 64-byte aligned. */
+#if defined(__AVX__)
+	__m256i *d256 = (__m256i *)dst;
+	const __m256i *s256 = (const __m256i *)src;
+
+	_mm256_store_si256(&d256[0], _mm256_load_si256(&s256[0]));
+	_mm256_store_si256(&d256[1], _mm256_load_si256(&s256[1]));
+#elif defined(__SSE2__)
+	__m128i *d128 = (__m128i *)dst;
+	const __m128i *s128 = (const __m128i *)src;
+
+	_mm_store_si128(&d128[0], _mm_load_si128(&s128[0]));
+	_mm_store_si128(&d128[1], _mm_load_si128(&s128[1]));
+	_mm_store_si128(&d128[2], _mm_load_si128(&s128[2]));
+	_mm_store_si128(&d128[3], _mm_load_si128(&s128[3]));
+#else
+	*dst = *src;
+#endif
+}
+
+/**
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
+		struct nvme_request *req, struct spdk_nvme_cpl *cpl)
+{
+	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
+	struct nvme_request		*active_req = req;
+	struct spdk_nvme_ctrlr_process	*active_proc;
+
+	/*
+	 * The admin request is from another process. Move to the per
+	 *  process list for that process to handle it later.
+	 */
+	assert(nvme_qpair_is_admin_queue(qpair));
+	assert(active_req->pid != getpid());
+
+	active_proc = spdk_nvme_ctrlr_get_process(ctrlr, active_req->pid);
+	if (active_proc) {
+		/* Save the original completion information */
+		memcpy(&active_req->cpl, cpl, sizeof(*cpl));
+		STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq);
+	} else {
+		SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n",
+			    active_req->pid);
+
+		nvme_free_request(active_req);
+	}
+}
+
+/**
+ * Note: the ctrlr_lock must be held when calling this function.
+ */
+static void
+nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair)
+{
+	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
+	struct nvme_request		*req, *tmp_req;
+	pid_t				pid = getpid();
+	struct spdk_nvme_ctrlr_process	*proc;
+
+	/*
+	 * Check whether there is any pending admin request from
+	 * other active processes.
+	 */
+	assert(nvme_qpair_is_admin_queue(qpair));
+
+	proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	if (!proc) {
+		SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid);
+		assert(proc);
+		return;
+	}
+
+	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
+		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
+
+		assert(req->pid == pid);
+
+		nvme_complete_request(req, &req->cpl);
+		nvme_free_request(req);
+	}
+}
+
+static inline int
+nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+	return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old);
+}
+
+static bool
+nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value,
+				     volatile uint32_t *shadow_db,
+				     volatile uint32_t *eventidx)
+{
+	uint16_t old;
+
+	if (!shadow_db) {
+		return true;
+	}
+
+	old = *shadow_db;
+	*shadow_db = value;
+
+	if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
+{
+	struct nvme_request	*req;
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
+
+	req = tr->req;
+	assert(req != NULL);
+	req->timed_out = false;
+	if (spdk_unlikely(pctrlr->ctrlr.timeout_enabled)) {
+		req->submit_tick = spdk_get_ticks();
+	} else {
+		req->submit_tick = 0;
+	}
+
+	pqpair->tr[tr->cid].active = true;
+
+	/* Copy the command from the tracker to the submission queue. */
+	nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
+
+	if (++pqpair->sq_tail == pqpair->num_entries) {
+		pqpair->sq_tail = 0;
+	}
+
+	if (pqpair->sq_tail == pqpair->sq_head) {
+		SPDK_ERRLOG("sq_tail is passing sq_head!\n");
+	}
+
+	spdk_wmb();
+	if (spdk_likely(nvme_pcie_qpair_update_mmio_required(qpair,
+			pqpair->sq_tail,
+			pqpair->sq_shadow_tdbl,
+			pqpair->sq_eventidx))) {
+		g_thread_mmio_ctrlr = pctrlr;
+		spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail);
+		g_thread_mmio_ctrlr = NULL;
+	}
+}
+
+static void
+nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
+				 struct spdk_nvme_cpl *cpl, bool print_on_error)
+{
+	struct nvme_pcie_qpair		*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_request		*req;
+	bool				retry, error, was_active;
+	bool				req_from_current_proc = true;
+
+	req = tr->req;
+
+	assert(req != NULL);
+
+	error = spdk_nvme_cpl_is_error(cpl);
+	retry = error && nvme_completion_is_retry(cpl) &&
+		req->retries < spdk_nvme_retry_count;
+
+	if (error && print_on_error) {
+		nvme_qpair_print_command(qpair, &req->cmd);
+		nvme_qpair_print_completion(qpair, cpl);
+	}
+
+	was_active = pqpair->tr[cpl->cid].active;
+	pqpair->tr[cpl->cid].active = false;
+
+	assert(cpl->cid == req->cmd.cid);
+
+	if (retry) {
+		req->retries++;
+		nvme_pcie_qpair_submit_tracker(qpair, tr);
+	} else {
+		if (was_active) {
+			/* Only check admin requests from different processes. */
+			if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) {
+				req_from_current_proc = false;
+				nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl);
+			} else {
+				nvme_complete_request(req, cpl);
+			}
+		}
+
+		if (req_from_current_proc == true) {
+			nvme_free_request(req);
+		}
+
+		tr->req = NULL;
+
+		TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list);
+		TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
+
+		/*
+		 * If the controller is in the middle of resetting, don't
+		 *  try to submit queued requests here - let the reset logic
+		 *  handle that instead.
+		 */
+		if (!STAILQ_EMPTY(&qpair->queued_req) &&
+		    !qpair->ctrlr->is_resetting) {
+			req = STAILQ_FIRST(&qpair->queued_req);
+			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+			nvme_qpair_submit_request(qpair, req);
+		}
+	}
+}
+
+static void
+nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
+					struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
+					bool print_on_error)
+{
+	struct spdk_nvme_cpl	cpl;
+
+	memset(&cpl, 0, sizeof(cpl));
+	cpl.sqid = qpair->id;
+	cpl.cid = tr->cid;
+	cpl.status.sct = sct;
+	cpl.status.sc = sc;
+	cpl.status.dnr = dnr;
+	nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
+}
+
+static void
+nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_tracker *tr, *temp;
+
+	TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) {
+		SPDK_ERRLOG("aborting outstanding command\n");
+		nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
+							SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true);
+	}
+}
+
+static void
+nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_tracker	*tr;
+
+	tr = TAILQ_FIRST(&pqpair->outstanding_tr);
+	while (tr != NULL) {
+		assert(tr->req != NULL);
+		if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
+			nvme_pcie_qpair_manual_complete_tracker(qpair, tr,
+								SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0,
+								false);
+			tr = TAILQ_FIRST(&pqpair->outstanding_tr);
+		} else {
+			tr = TAILQ_NEXT(tr, tq_list);
+		}
+	}
+}
+
+static void
+nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair)
+{
+	nvme_pcie_admin_qpair_abort_aers(qpair);
+}
+
+static int
+nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvme_pcie_admin_qpair_destroy(qpair);
+	}
+	if (pqpair->cmd && !pqpair->sq_in_cmb) {
+		spdk_free(pqpair->cmd);
+	}
+	if (pqpair->cpl) {
+		spdk_free(pqpair->cpl);
+	}
+	if (pqpair->tr) {
+		spdk_free(pqpair->tr);
+	}
+
+	nvme_qpair_deinit(qpair);
+
+	spdk_free(pqpair);
+
+	return 0;
+}
+
+static void
+nvme_pcie_admin_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	/*
+	 * Manually abort each outstanding admin command.  Do not retry
+	 *  admin commands found here, since they will be left over from
+	 *  a controller reset and its likely the context in which the
+	 *  command was issued no longer applies.
+	 */
+	nvme_pcie_qpair_abort_trackers(qpair, 1 /* do not retry */);
+}
+
+static void
+nvme_pcie_io_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	/* Manually abort each outstanding I/O. */
+	nvme_pcie_qpair_abort_trackers(qpair, 0);
+}
+
+int
+nvme_pcie_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+
+	pqpair->is_enabled = true;
+	if (nvme_qpair_is_io_queue(qpair)) {
+		nvme_pcie_io_qpair_enable(qpair);
+	} else {
+		nvme_pcie_admin_qpair_enable(qpair);
+	}
+
+	return 0;
+}
+
+static void
+nvme_pcie_admin_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+	nvme_pcie_admin_qpair_abort_aers(qpair);
+}
+
+static void
+nvme_pcie_io_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+}
+
+int
+nvme_pcie_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+
+	pqpair->is_enabled = false;
+	if (nvme_qpair_is_io_queue(qpair)) {
+		nvme_pcie_io_qpair_disable(qpair);
+	} else {
+		nvme_pcie_admin_qpair_disable(qpair);
+	}
+
+	return 0;
+}
+
+
+int
+nvme_pcie_qpair_fail(struct spdk_nvme_qpair *qpair)
+{
+	nvme_pcie_qpair_abort_trackers(qpair, 1 /* do not retry */);
+
+	return 0;
+}
+
+static int
+nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
+				 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
+				 void *cb_arg)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ;
+
+	/*
+	 * TODO: create a create io completion queue command data
+	 *  structure.
+	 */
+	cmd->cdw10 = ((pqpair->num_entries - 1) << 16) | io_que->id;
+	/*
+	 * 0x2 = interrupts enabled
+	 * 0x1 = physically contiguous
+	 */
+	cmd->cdw11 = 0x1;
+	cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr;
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+static int
+nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
+				 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ;
+
+	/*
+	 * TODO: create a create io submission queue command data
+	 *  structure.
+	 */
+	cmd->cdw10 = ((pqpair->num_entries - 1) << 16) | io_que->id;
+	/* 0x1 = physically contiguous */
+	cmd->cdw11 = (io_que->id << 16) | (io_que->qprio << 1) | 0x1;
+	cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr;
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+static int
+nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
+				 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ;
+	cmd->cdw10 = qpair->id;
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+static int
+nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
+				 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+	struct spdk_nvme_cmd *cmd;
+
+	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
+	if (req == NULL) {
+		return -ENOMEM;
+	}
+
+	cmd = &req->cmd;
+	cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ;
+	cmd->cdw10 = qpair->id;
+
+	return nvme_ctrlr_submit_admin_request(ctrlr, req);
+}
+
+static int
+_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
+				 uint16_t qid)
+{
+	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(ctrlr);
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_completion_poll_status	status;
+	int					rc;
+
+	rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		SPDK_ERRLOG("nvme_create_io_cq failed!\n");
+		return -1;
+	}
+
+	rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		SPDK_ERRLOG("nvme_create_io_sq failed!\n");
+		/* Attempt to delete the completion queue */
+		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status);
+		if (rc != 0) {
+			return -1;
+		}
+		spdk_nvme_wait_for_completion(ctrlr->adminq, &status);
+		return -1;
+	}
+
+	if (ctrlr->shadow_doorbell) {
+		pqpair->sq_shadow_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
+		pqpair->cq_shadow_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
+		pqpair->sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
+		pqpair->cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
+	}
+	nvme_pcie_qpair_reset(qpair);
+
+	return 0;
+}
+
+struct spdk_nvme_qpair *
+nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
+				const struct spdk_nvme_io_qpair_opts *opts)
+{
+	struct nvme_pcie_qpair *pqpair;
+	struct spdk_nvme_qpair *qpair;
+	int rc;
+
+	assert(ctrlr != NULL);
+
+	pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL,
+			      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (pqpair == NULL) {
+		return NULL;
+	}
+
+	pqpair->num_entries = opts->io_queue_size;
+
+	qpair = &pqpair->qpair;
+
+	rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests);
+	if (rc != 0) {
+		nvme_pcie_qpair_destroy(qpair);
+		return NULL;
+	}
+
+	rc = nvme_pcie_qpair_construct(qpair);
+	if (rc != 0) {
+		nvme_pcie_qpair_destroy(qpair);
+		return NULL;
+	}
+
+	rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qid);
+
+	if (rc != 0) {
+		SPDK_ERRLOG("I/O queue creation failed\n");
+		nvme_pcie_qpair_destroy(qpair);
+		return NULL;
+	}
+
+	return qpair;
+}
+
+int
+nvme_pcie_ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);
+}
+
+int
+nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_completion_poll_status status;
+	int rc;
+
+	assert(ctrlr != NULL);
+
+	if (ctrlr->is_removed) {
+		goto free;
+	}
+
+	/* Delete the I/O submission queue */
+	rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		return rc;
+	}
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		return -1;
+	}
+
+	if (qpair->no_deletion_notification_needed == 0) {
+		/* Complete any I/O in the completion queue */
+		nvme_pcie_qpair_process_completions(qpair, 0);
+
+		/* Abort the rest of the I/O */
+		nvme_pcie_qpair_abort_trackers(qpair, 1);
+	}
+
+	/* Delete the completion queue */
+	rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		return rc;
+	}
+	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
+		return -1;
+	}
+
+free:
+	nvme_pcie_qpair_destroy(qpair);
+	return 0;
+}
+
+static void
+nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
+{
+	/*
+	 * Bad vtophys translation, so abort this request and return
+	 *  immediately.
+	 */
+	nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
+						SPDK_NVME_SC_INVALID_FIELD,
+						1 /* do not retry */, true);
+}
+
+/*
+ * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes.
+ *
+ * *prp_index will be updated to account for the number of PRP entries used.
+ */
+static int
+nvme_pcie_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len,
+			  uint32_t page_size)
+{
+	struct spdk_nvme_cmd *cmd = &tr->req->cmd;
+	uintptr_t page_mask = page_size - 1;
+	uint64_t phys_addr;
+	uint32_t i;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp_index:%u virt_addr:%p len:%u\n",
+		      *prp_index, virt_addr, (uint32_t)len);
+
+	if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) {
+		SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
+		return -EINVAL;
+	}
+
+	i = *prp_index;
+	while (len) {
+		uint32_t seg_len;
+
+		/*
+		 * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array,
+		 * so prp_index == count is valid.
+		 */
+		if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) {
+			SPDK_ERRLOG("out of PRP entries\n");
+			return -EINVAL;
+		}
+
+		phys_addr = spdk_vtophys(virt_addr);
+		if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) {
+			SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr);
+			return -EINVAL;
+		}
+
+		if (i == 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp1 = %p\n", (void *)phys_addr);
+			cmd->dptr.prp.prp1 = phys_addr;
+			seg_len = page_size - ((uintptr_t)virt_addr & page_mask);
+		} else {
+			if ((phys_addr & page_mask) != 0) {
+				SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr);
+				return -EINVAL;
+			}
+
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp[%u] = %p\n", i - 1, (void *)phys_addr);
+			tr->u.prp[i - 1] = phys_addr;
+			seg_len = page_size;
+		}
+
+		seg_len = spdk_min(seg_len, len);
+		virt_addr += seg_len;
+		len -= seg_len;
+		i++;
+	}
+
+	cmd->psdt = SPDK_NVME_PSDT_PRP;
+	if (i <= 1) {
+		cmd->dptr.prp.prp2 = 0;
+	} else if (i == 2) {
+		cmd->dptr.prp.prp2 = tr->u.prp[0];
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2);
+	} else {
+		cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr;
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2);
+	}
+
+	*prp_index = i;
+	return 0;
+}
+
+/**
+ * Build PRP list describing physically contiguous payload buffer.
+ */
+static int
+nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
+				     struct nvme_tracker *tr)
+{
+	uint32_t prp_index = 0;
+	int rc;
+
+	rc = nvme_pcie_prp_list_append(tr, &prp_index, req->payload.contig_or_cb_arg + req->payload_offset,
+				       req->payload_size, qpair->ctrlr->page_size);
+	if (rc) {
+		nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+		return rc;
+	}
+
+	return 0;
+}
+
+#define _2MB_OFFSET(ptr)	(((uintptr_t)(ptr)) &  (0x200000 - 1))
+
+/**
+ * Build SGL list describing scattered payload buffer.
+ */
+static int
+nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
+				     struct nvme_tracker *tr)
+{
+	int rc;
+	void *virt_addr;
+	uint64_t phys_addr;
+	uint32_t remaining_transfer_len, remaining_user_sge_len, length;
+	struct spdk_nvme_sgl_descriptor *sgl;
+	uint32_t nseg = 0;
+
+	/*
+	 * Build scattered payloads.
+	 */
+	assert(req->payload_size != 0);
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
+	assert(req->payload.reset_sgl_fn != NULL);
+	assert(req->payload.next_sge_fn != NULL);
+	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
+
+	sgl = tr->u.sgl;
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+	req->cmd.dptr.sgl1.unkeyed.subtype = 0;
+
+	remaining_transfer_len = req->payload_size;
+
+	while (remaining_transfer_len > 0) {
+		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg,
+					      &virt_addr, &remaining_user_sge_len);
+		if (rc) {
+			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+			return -1;
+		}
+
+		remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len);
+		remaining_transfer_len -= remaining_user_sge_len;
+		while (remaining_user_sge_len > 0) {
+			if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
+				nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+				return -1;
+			}
+
+			phys_addr = spdk_vtophys(virt_addr);
+			if (phys_addr == SPDK_VTOPHYS_ERROR) {
+				nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+				return -1;
+			}
+
+			length = spdk_min(remaining_user_sge_len, 0x200000 - _2MB_OFFSET(virt_addr));
+			remaining_user_sge_len -= length;
+			virt_addr += length;
+
+			if (nseg > 0 && phys_addr ==
+			    (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) {
+				/* extend previous entry */
+				(*(sgl - 1)).unkeyed.length += length;
+				continue;
+			}
+
+			sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
+			sgl->unkeyed.length = length;
+			sgl->address = phys_addr;
+			sgl->unkeyed.subtype = 0;
+
+			sgl++;
+			nseg++;
+		}
+	}
+
+	if (nseg == 1) {
+		/*
+		 * The whole transfer can be described by a single SGL descriptor.
+		 *  Use the special case described by the spec where SGL1's type is Data Block.
+		 *  This means the SGL in the tracker is not used at all, so copy the first (and only)
+		 *  SGL element into SGL1.
+		 */
+		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
+		req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
+		req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
+	} else {
+		/* For now we can only support 1 SGL segment in NVMe controller */
+		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
+		req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
+		req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor);
+	}
+
+	return 0;
+}
+
+/**
+ * Build PRP list describing scattered payload buffer.
+ */
+static int
+nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
+				       struct nvme_tracker *tr)
+{
+	int rc;
+	void *virt_addr;
+	uint32_t remaining_transfer_len, length;
+	uint32_t prp_index = 0;
+	uint32_t page_size = qpair->ctrlr->page_size;
+
+	/*
+	 * Build scattered payloads.
+	 */
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
+	assert(req->payload.reset_sgl_fn != NULL);
+	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
+
+	remaining_transfer_len = req->payload_size;
+	while (remaining_transfer_len > 0) {
+		assert(req->payload.next_sge_fn != NULL);
+		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
+		if (rc) {
+			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+			return -1;
+		}
+
+		length = spdk_min(remaining_transfer_len, length);
+
+		/*
+		 * Any incompatible sges should have been handled up in the splitting routine,
+		 *  but assert here as an additional check.
+		 *
+		 * All SGEs except last must end on a page boundary.
+		 */
+		assert((length == remaining_transfer_len) ||
+		       _is_page_aligned((uintptr_t)virt_addr + length, page_size));
+
+		rc = nvme_pcie_prp_list_append(tr, &prp_index, virt_addr, length, page_size);
+		if (rc) {
+			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+			return rc;
+		}
+
+		remaining_transfer_len -= length;
+	}
+
+	return 0;
+}
+
+static inline bool
+nvme_pcie_qpair_check_enabled(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+
+	if (!pqpair->is_enabled &&
+	    !qpair->ctrlr->is_resetting) {
+		nvme_qpair_enable(qpair);
+	}
+	return pqpair->is_enabled;
+}
+
+int
+nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
+{
+	struct nvme_tracker	*tr;
+	int			rc = 0;
+	void			*md_payload;
+	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+
+	nvme_pcie_qpair_check_enabled(qpair);
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	}
+
+	tr = TAILQ_FIRST(&pqpair->free_tr);
+
+	if (tr == NULL || !pqpair->is_enabled) {
+		/*
+		 * No tracker is available, or the qpair is disabled due to
+		 *  an in-progress controller-level reset.
+		 *
+		 * Put the request on the qpair's request queue to be
+		 *  processed when a tracker frees up via a command
+		 *  completion or when the controller reset is
+		 *  completed.
+		 */
+		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+		goto exit;
+	}
+
+	TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */
+	TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list);
+	tr->req = req;
+	req->cmd.cid = tr->cid;
+
+	if (req->payload_size && req->payload.md) {
+		md_payload = req->payload.md + req->md_offset;
+		tr->req->cmd.mptr = spdk_vtophys(md_payload);
+		if (tr->req->cmd.mptr == SPDK_VTOPHYS_ERROR) {
+			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+			rc = -EINVAL;
+			goto exit;
+		}
+	}
+
+	if (req->payload_size == 0) {
+		/* Null payload - leave PRP fields zeroed */
+		rc = 0;
+	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
+		rc = nvme_pcie_qpair_build_contig_request(qpair, req, tr);
+	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
+		if (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
+			rc = nvme_pcie_qpair_build_hw_sgl_request(qpair, req, tr);
+		} else {
+			rc = nvme_pcie_qpair_build_prps_sgl_request(qpair, req, tr);
+		}
+	} else {
+		assert(0);
+		nvme_pcie_fail_request_bad_vtophys(qpair, tr);
+		rc = -EINVAL;
+	}
+
+	if (rc < 0) {
+		goto exit;
+	}
+
+	nvme_pcie_qpair_submit_tracker(qpair, tr);
+
+exit:
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	}
+
+	return rc;
+}
+
+static void
+nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
+{
+	uint64_t t02;
+	struct nvme_tracker *tr, *tmp;
+	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
+	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
+	struct spdk_nvme_ctrlr_process *active_proc;
+
+	/* Don't check timeouts during controller initialization. */
+	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
+		return;
+	}
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	} else {
+		active_proc = qpair->active_proc;
+	}
+
+	/* Only check timeouts if the current process has a timeout callback. */
+	if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
+		return;
+	}
+
+	t02 = spdk_get_ticks();
+	TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
+		assert(tr->req != NULL);
+
+		if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) {
+			/*
+			 * The requests are in order, so as soon as one has not timed out,
+			 * stop iterating.
+			 */
+			break;
+		}
+	}
+}
+
+int32_t
+nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
+{
+	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
+	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
+	struct nvme_tracker	*tr;
+	struct spdk_nvme_cpl	*cpl;
+	uint32_t		 num_completions = 0;
+	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
+
+	if (spdk_unlikely(!nvme_pcie_qpair_check_enabled(qpair))) {
+		/*
+		 * qpair is not enabled, likely because a controller reset is
+		 *  is in progress.  Ignore the interrupt - any I/O that was
+		 *  associated with this interrupt will get retried when the
+		 *  reset is complete.
+		 */
+		return 0;
+	}
+
+	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
+		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+	}
+
+	if (max_completions == 0 || max_completions > pqpair->max_completions_cap) {
+		/*
+		 * max_completions == 0 means unlimited, but complete at most
+		 * max_completions_cap batch of I/O at a time so that the completion
+		 * queue doorbells don't wrap around.
+		 */
+		max_completions = pqpair->max_completions_cap;
+	}
+
+	while (1) {
+		cpl = &pqpair->cpl[pqpair->cq_head];
+
+		if (cpl->status.p != pqpair->phase) {
+			break;
+		}
+#ifdef __PPC64__
+		/*
+		 * This memory barrier prevents reordering of:
+		 * - load after store from/to tr
+		 * - load after load cpl phase and cpl cid
+		 */
+		spdk_mb();
+#endif
+
+		if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) {
+			pqpair->cq_head = 0;
+			pqpair->phase = !pqpair->phase;
+		}
+
+		tr = &pqpair->tr[cpl->cid];
+		pqpair->sq_head = cpl->sqhd;
+
+		if (tr->active) {
+			nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true);
+		} else {
+			SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
+			nvme_qpair_print_completion(qpair, cpl);
+			assert(0);
+		}
+
+		if (++num_completions == max_completions) {
+			break;
+		}
+	}
+
+	if (num_completions > 0) {
+		if (spdk_likely(nvme_pcie_qpair_update_mmio_required(qpair, pqpair->cq_head,
+				pqpair->cq_shadow_hdbl,
+				pqpair->cq_eventidx))) {
+			g_thread_mmio_ctrlr = pctrlr;
+			spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head);
+			g_thread_mmio_ctrlr = NULL;
+		}
+	}
+
+	if (spdk_unlikely(ctrlr->timeout_enabled)) {
+		/*
+		 * User registered for timeout callback
+		 */
+		nvme_pcie_qpair_check_timeout(qpair);
+	}
+
+	/* Before returning, complete any pending admin request. */
+	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
+		nvme_pcie_qpair_complete_pending_admin_request(qpair);
+
+		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
+	}
+
+	return num_completions;
+}
diff --git a/src/spdk/lib/nvme/nvme_qpair.c b/src/spdk/lib/nvme/nvme_qpair.c
new file mode 100644
index 00000000..9f585798
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_qpair.c
@@ -0,0 +1,663 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+#include "spdk/nvme_ocssd.h"
+
+static void nvme_qpair_fail(struct spdk_nvme_qpair *qpair);
+
+struct nvme_string {
+	uint16_t	value;
+	const char	*str;
+};
+
+static const struct nvme_string admin_opcode[] = {
+	{ SPDK_NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
+	{ SPDK_NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
+	{ SPDK_NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
+	{ SPDK_NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
+	{ SPDK_NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
+	{ SPDK_NVME_OPC_IDENTIFY, "IDENTIFY" },
+	{ SPDK_NVME_OPC_ABORT, "ABORT" },
+	{ SPDK_NVME_OPC_SET_FEATURES, "SET FEATURES" },
+	{ SPDK_NVME_OPC_GET_FEATURES, "GET FEATURES" },
+	{ SPDK_NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
+	{ SPDK_NVME_OPC_NS_MANAGEMENT, "NAMESPACE MANAGEMENT" },
+	{ SPDK_NVME_OPC_FIRMWARE_COMMIT, "FIRMWARE COMMIT" },
+	{ SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
+	{ SPDK_NVME_OPC_DEVICE_SELF_TEST, "DEVICE SELF-TEST" },
+	{ SPDK_NVME_OPC_NS_ATTACHMENT, "NAMESPACE ATTACHMENT" },
+	{ SPDK_NVME_OPC_KEEP_ALIVE, "KEEP ALIVE" },
+	{ SPDK_NVME_OPC_DIRECTIVE_SEND, "DIRECTIVE SEND" },
+	{ SPDK_NVME_OPC_DIRECTIVE_RECEIVE, "DIRECTIVE RECEIVE" },
+	{ SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT, "VIRTUALIZATION MANAGEMENT" },
+	{ SPDK_NVME_OPC_NVME_MI_SEND, "NVME-MI SEND" },
+	{ SPDK_NVME_OPC_NVME_MI_RECEIVE, "NVME-MI RECEIVE" },
+	{ SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG, "DOORBELL BUFFER CONFIG" },
+	{ SPDK_NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
+	{ SPDK_NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
+	{ SPDK_NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
+	{ SPDK_NVME_OPC_SANITIZE, "SANITIZE" },
+	{ SPDK_OCSSD_OPC_GEOMETRY, "OCSSD / GEOMETRY" },
+	{ 0xFFFF, "ADMIN COMMAND" }
+};
+
+static const struct nvme_string io_opcode[] = {
+	{ SPDK_NVME_OPC_FLUSH, "FLUSH" },
+	{ SPDK_NVME_OPC_WRITE, "WRITE" },
+	{ SPDK_NVME_OPC_READ, "READ" },
+	{ SPDK_NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
+	{ SPDK_NVME_OPC_COMPARE, "COMPARE" },
+	{ SPDK_NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
+	{ SPDK_NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
+	{ SPDK_NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
+	{ SPDK_NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
+	{ SPDK_NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
+	{ SPDK_NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
+	{ SPDK_OCSSD_OPC_VECTOR_RESET, "OCSSD / VECTOR RESET" },
+	{ SPDK_OCSSD_OPC_VECTOR_WRITE, "OCSSD / VECTOR WRITE" },
+	{ SPDK_OCSSD_OPC_VECTOR_READ, "OCSSD / VECTOR READ" },
+	{ SPDK_OCSSD_OPC_VECTOR_COPY, "OCSSD / VECTOR COPY" },
+	{ 0xFFFF, "IO COMMAND" }
+};
+
+static const char *
+nvme_get_string(const struct nvme_string *strings, uint16_t value)
+{
+	const struct nvme_string *entry;
+
+	entry = strings;
+
+	while (entry->value != 0xFFFF) {
+		if (entry->value == value) {
+			return entry->str;
+		}
+		entry++;
+	}
+	return entry->str;
+}
+
+static void
+nvme_admin_qpair_print_command(struct spdk_nvme_qpair *qpair,
+			       struct spdk_nvme_cmd *cmd)
+{
+
+	SPDK_NOTICELOG("%s (%02x) sqid:%d cid:%d nsid:%x "
+		       "cdw10:%08x cdw11:%08x\n",
+		       nvme_get_string(admin_opcode, cmd->opc), cmd->opc, qpair->id, cmd->cid,
+		       cmd->nsid, cmd->cdw10, cmd->cdw11);
+}
+
+static void
+nvme_io_qpair_print_command(struct spdk_nvme_qpair *qpair,
+			    struct spdk_nvme_cmd *cmd)
+{
+	assert(qpair != NULL);
+	assert(cmd != NULL);
+	switch ((int)cmd->opc) {
+	case SPDK_NVME_OPC_WRITE:
+	case SPDK_NVME_OPC_READ:
+	case SPDK_NVME_OPC_WRITE_UNCORRECTABLE:
+	case SPDK_NVME_OPC_COMPARE:
+		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d "
+			       "lba:%llu len:%d\n",
+			       nvme_get_string(io_opcode, cmd->opc), qpair->id, cmd->cid,
+			       cmd->nsid,
+			       ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
+			       (cmd->cdw12 & 0xFFFF) + 1);
+		break;
+	case SPDK_NVME_OPC_FLUSH:
+	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+		SPDK_NOTICELOG("%s sqid:%d cid:%d nsid:%d\n",
+			       nvme_get_string(io_opcode, cmd->opc), qpair->id, cmd->cid,
+			       cmd->nsid);
+		break;
+	default:
+		SPDK_NOTICELOG("%s (%02x) sqid:%d cid:%d nsid:%d\n",
+			       nvme_get_string(io_opcode, cmd->opc), cmd->opc, qpair->id,
+			       cmd->cid, cmd->nsid);
+		break;
+	}
+}
+
+void
+nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd)
+{
+	assert(qpair != NULL);
+	assert(cmd != NULL);
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		nvme_admin_qpair_print_command(qpair, cmd);
+	} else {
+		nvme_io_qpair_print_command(qpair, cmd);
+	}
+}
+
+static const struct nvme_string generic_status[] = {
+	{ SPDK_NVME_SC_SUCCESS, "SUCCESS" },
+	{ SPDK_NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
+	{ SPDK_NVME_SC_INVALID_FIELD, "INVALID FIELD" },
+	{ SPDK_NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
+	{ SPDK_NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
+	{ SPDK_NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
+	{ SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
+	{ SPDK_NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
+	{ SPDK_NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
+	{ SPDK_NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
+	{ SPDK_NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
+	{ SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
+	{ SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
+	{ SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR, "INVALID SGL SEGMENT DESCRIPTOR" },
+	{ SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS, "INVALID NUMBER OF SGL DESCRIPTORS" },
+	{ SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
+	{ SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
+	{ SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
+	{ SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF, "INVALID CONTROLLER MEMORY BUFFER" },
+	{ SPDK_NVME_SC_INVALID_PRP_OFFSET, "INVALID PRP OFFSET" },
+	{ SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
+	{ SPDK_NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
+	{ SPDK_NVME_SC_INVALID_SGL_OFFSET, "INVALID SGL OFFSET" },
+	{ SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT, "HOSTID INCONSISTENT FORMAT" },
+	{ SPDK_NVME_SC_KEEP_ALIVE_EXPIRED, "KEEP ALIVE EXPIRED" },
+	{ SPDK_NVME_SC_KEEP_ALIVE_INVALID, "KEEP ALIVE INVALID" },
+	{ SPDK_NVME_SC_ABORTED_PREEMPT, "ABORTED - PREEMPT AND ABORT" },
+	{ SPDK_NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
+	{ SPDK_NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
+	{ SPDK_NVME_SC_SGL_DATA_BLOCK_GRANULARITY_INVALID, "DATA BLOCK GRANULARITY INVALID" },
+	{ SPDK_NVME_SC_COMMAND_INVALID_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
+	{ SPDK_NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
+	{ SPDK_NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
+	{ SPDK_NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
+	{ SPDK_NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
+	{ SPDK_NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
+	{ 0xFFFF, "GENERIC" }
+};
+
+static const struct nvme_string command_specific_status[] = {
+	{ SPDK_NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
+	{ SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
+	{ SPDK_NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
+	{ SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
+	{ SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
+	{ SPDK_NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
+	{ SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
+	{ SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
+	{ SPDK_NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
+	{ SPDK_NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
+	{ SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET, "FIRMWARE REQUIRES CONVENTIONAL RESET" },
+	{ SPDK_NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
+	{ SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE, "FEATURE ID NOT SAVEABLE" },
+	{ SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
+	{ SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
+	{ SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET, "FIRMWARE REQUIRES NVM RESET" },
+	{ SPDK_NVME_SC_FIRMWARE_REQ_RESET, "FIRMWARE REQUIRES RESET" },
+	{ SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION, "FIRMWARE REQUIRES MAX TIME VIOLATION" },
+	{ SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
+	{ SPDK_NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
+	{ SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
+	{ SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE, "NAMESPACE ID UNAVAILABLE" },
+	{ SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
+	{ SPDK_NVME_SC_NAMESPACE_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
+	{ SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED, "NAMESPACE NOT ATTACHED" },
+	{ SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED, "THINPROVISIONING NOT SUPPORTED" },
+	{ SPDK_NVME_SC_CONTROLLER_LIST_INVALID, "CONTROLLER LIST INVALID" },
+	{ SPDK_NVME_SC_DEVICE_SELF_TEST_IN_PROGRESS, "DEVICE SELF-TEST IN PROGRESS" },
+	{ SPDK_NVME_SC_BOOT_PARTITION_WRITE_PROHIBITED, "BOOT PARTITION WRITE PROHIBITED" },
+	{ SPDK_NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER ID" },
+	{ SPDK_NVME_SC_INVALID_SECONDARY_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
+	{ SPDK_NVME_SC_INVALID_NUM_CTRLR_RESOURCES, "INVALID NUMBER OF CONTROLLER RESOURCES" },
+	{ SPDK_NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
+	{ SPDK_NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
+	{ SPDK_NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
+	{ SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
+	{ 0xFFFF, "COMMAND SPECIFIC" }
+};
+
+static const struct nvme_string media_error_status[] = {
+	{ SPDK_NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
+	{ SPDK_NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
+	{ SPDK_NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
+	{ SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
+	{ SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
+	{ SPDK_NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
+	{ SPDK_NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
+	{ SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK, "DEALLOCATED OR UNWRITTEN BLOCK" },
+	{ SPDK_OCSSD_SC_OFFLINE_CHUNK, "RESET OFFLINE CHUNK" },
+	{ SPDK_OCSSD_SC_INVALID_RESET, "INVALID RESET" },
+	{ SPDK_OCSSD_SC_WRITE_FAIL_WRITE_NEXT_UNIT, "WRITE FAIL WRITE NEXT UNIT" },
+	{ SPDK_OCSSD_SC_WRITE_FAIL_CHUNK_EARLY_CLOSE, "WRITE FAIL CHUNK EARLY CLOSE" },
+	{ SPDK_OCSSD_SC_OUT_OF_ORDER_WRITE, "OUT OF ORDER WRITE" },
+	{ SPDK_OCSSD_SC_READ_HIGH_ECC, "READ HIGH ECC" },
+	{ 0xFFFF, "MEDIA ERROR" }
+};
+
+static const struct nvme_string path_status[] = {
+	{ SPDK_NVME_SC_INTERNAL_PATH_ERROR, "INTERNAL PATH ERROR" },
+	{ SPDK_NVME_SC_CONTROLLER_PATH_ERROR, "CONTROLLER PATH ERROR" },
+	{ SPDK_NVME_SC_HOST_PATH_ERROR, "HOST PATH ERROR" },
+	{ SPDK_NVME_SC_ABORTED_BY_HOST, "ABORTED BY HOST" },
+	{ 0xFFFF, "PATH ERROR" }
+};
+
+static const char *
+get_status_string(uint16_t sct, uint16_t sc)
+{
+	const struct nvme_string *entry;
+
+	switch (sct) {
+	case SPDK_NVME_SCT_GENERIC:
+		entry = generic_status;
+		break;
+	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
+		entry = command_specific_status;
+		break;
+	case SPDK_NVME_SCT_MEDIA_ERROR:
+		entry = media_error_status;
+		break;
+	case SPDK_NVME_SCT_PATH:
+		entry = path_status;
+		break;
+	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
+		return "VENDOR SPECIFIC";
+	default:
+		return "RESERVED";
+	}
+
+	return nvme_get_string(entry, sc);
+}
+
+void
+nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair,
+			    struct spdk_nvme_cpl *cpl)
+{
+	SPDK_NOTICELOG("%s (%02x/%02x) sqid:%d cid:%d cdw0:%x sqhd:%04x p:%x m:%x dnr:%x\n",
+		       get_status_string(cpl->status.sct, cpl->status.sc),
+		       cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0,
+		       cpl->sqhd, cpl->status.p, cpl->status.m, cpl->status.dnr);
+}
+
+bool
+nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl)
+{
+	/*
+	 * TODO: spec is not clear how commands that are aborted due
+	 *  to TLER will be marked.  So for now, it seems
+	 *  NAMESPACE_NOT_READY is the only case where we should
+	 *  look at the DNR bit.
+	 */
+	switch ((int)cpl->status.sct) {
+	case SPDK_NVME_SCT_GENERIC:
+		switch ((int)cpl->status.sc) {
+		case SPDK_NVME_SC_NAMESPACE_NOT_READY:
+		case SPDK_NVME_SC_FORMAT_IN_PROGRESS:
+			if (cpl->status.dnr) {
+				return false;
+			} else {
+				return true;
+			}
+		case SPDK_NVME_SC_INVALID_OPCODE:
+		case SPDK_NVME_SC_INVALID_FIELD:
+		case SPDK_NVME_SC_COMMAND_ID_CONFLICT:
+		case SPDK_NVME_SC_DATA_TRANSFER_ERROR:
+		case SPDK_NVME_SC_ABORTED_POWER_LOSS:
+		case SPDK_NVME_SC_INTERNAL_DEVICE_ERROR:
+		case SPDK_NVME_SC_ABORTED_BY_REQUEST:
+		case SPDK_NVME_SC_ABORTED_SQ_DELETION:
+		case SPDK_NVME_SC_ABORTED_FAILED_FUSED:
+		case SPDK_NVME_SC_ABORTED_MISSING_FUSED:
+		case SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
+		case SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR:
+		case SPDK_NVME_SC_LBA_OUT_OF_RANGE:
+		case SPDK_NVME_SC_CAPACITY_EXCEEDED:
+		default:
+			return false;
+		}
+	case SPDK_NVME_SCT_PATH:
+		/*
+		 * Per NVMe TP 4028 (Path and Transport Error Enhancements), retries should be
+		 * based on the setting of the DNR bit for Internal Path Error
+		 */
+		switch ((int)cpl->status.sc) {
+		case SPDK_NVME_SC_INTERNAL_PATH_ERROR:
+			return !cpl->status.dnr;
+		default:
+			return false;
+		}
+	case SPDK_NVME_SCT_COMMAND_SPECIFIC:
+	case SPDK_NVME_SCT_MEDIA_ERROR:
+	case SPDK_NVME_SCT_VENDOR_SPECIFIC:
+	default:
+		return false;
+	}
+}
+
+static void
+nvme_qpair_manual_complete_request(struct spdk_nvme_qpair *qpair,
+				   struct nvme_request *req, uint32_t sct, uint32_t sc,
+				   bool print_on_error)
+{
+	struct spdk_nvme_cpl	cpl;
+	bool			error;
+
+	memset(&cpl, 0, sizeof(cpl));
+	cpl.sqid = qpair->id;
+	cpl.status.sct = sct;
+	cpl.status.sc = sc;
+
+	error = spdk_nvme_cpl_is_error(&cpl);
+
+	if (error && print_on_error) {
+		SPDK_NOTICELOG("Command completed manually:\n");
+		nvme_qpair_print_command(qpair, &req->cmd);
+		nvme_qpair_print_completion(qpair, &cpl);
+	}
+
+	nvme_complete_request(req, &cpl);
+	nvme_free_request(req);
+}
+
+int32_t
+spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
+{
+	int32_t ret;
+	struct nvme_request *req, *tmp;
+
+	if (qpair->ctrlr->is_failed) {
+		nvme_qpair_fail(qpair);
+		return 0;
+	}
+
+	/* error injection for those queued error requests */
+	if (spdk_unlikely(!STAILQ_EMPTY(&qpair->err_req_head))) {
+		STAILQ_FOREACH_SAFE(req, &qpair->err_req_head, stailq, tmp) {
+			if (spdk_get_ticks() - req->submit_tick > req->timeout_tsc) {
+				STAILQ_REMOVE(&qpair->err_req_head, req, nvme_request, stailq);
+				nvme_qpair_manual_complete_request(qpair, req,
+								   req->cpl.status.sct,
+								   req->cpl.status.sc, true);
+			}
+		}
+	}
+
+	qpair->in_completion_context = 1;
+	ret = nvme_transport_qpair_process_completions(qpair, max_completions);
+	qpair->in_completion_context = 0;
+	if (qpair->delete_after_completion_context) {
+		/*
+		 * A request to delete this qpair was made in the context of this completion
+		 *  routine - so it is safe to delete it now.
+		 */
+		spdk_nvme_ctrlr_free_io_qpair(qpair);
+	}
+	return ret;
+}
+
+int
+nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
+		struct spdk_nvme_ctrlr *ctrlr,
+		enum spdk_nvme_qprio qprio,
+		uint32_t num_requests)
+{
+	size_t req_size_padded;
+	uint32_t i;
+
+	qpair->id = id;
+	qpair->qprio = qprio;
+
+	qpair->in_completion_context = 0;
+	qpair->delete_after_completion_context = 0;
+	qpair->no_deletion_notification_needed = 0;
+
+	qpair->ctrlr = ctrlr;
+	qpair->trtype = ctrlr->trid.trtype;
+
+	STAILQ_INIT(&qpair->free_req);
+	STAILQ_INIT(&qpair->queued_req);
+	TAILQ_INIT(&qpair->err_cmd_head);
+	STAILQ_INIT(&qpair->err_req_head);
+
+	req_size_padded = (sizeof(struct nvme_request) + 63) & ~(size_t)63;
+
+	qpair->req_buf = spdk_zmalloc(req_size_padded * num_requests, 64, NULL,
+				      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
+	if (qpair->req_buf == NULL) {
+		SPDK_ERRLOG("no memory to allocate qpair(cntlid:0x%x sqid:%d) req_buf with %d request\n",
+			    ctrlr->cntlid, qpair->id, num_requests);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_requests; i++) {
+		struct nvme_request *req = qpair->req_buf + i * req_size_padded;
+
+		STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq);
+	}
+
+	return 0;
+}
+
+void
+nvme_qpair_deinit(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_request *req;
+	struct nvme_error_cmd *cmd, *entry;
+
+	while (!STAILQ_EMPTY(&qpair->err_req_head)) {
+		req = STAILQ_FIRST(&qpair->err_req_head);
+		STAILQ_REMOVE_HEAD(&qpair->err_req_head, stailq);
+		nvme_qpair_manual_complete_request(qpair, req,
+						   req->cpl.status.sct,
+						   req->cpl.status.sc, true);
+	}
+
+	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
+		TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
+		spdk_dma_free(cmd);
+	}
+
+	spdk_dma_free(qpair->req_buf);
+}
+
+int
+nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
+{
+	int			rc = 0;
+	struct nvme_request	*child_req, *tmp;
+	struct nvme_error_cmd	*cmd;
+	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
+	bool			child_req_failed = false;
+
+	if (ctrlr->is_failed) {
+		nvme_free_request(req);
+		return -ENXIO;
+	}
+
+	if (req->num_children) {
+		/*
+		 * This is a split (parent) request. Submit all of the children but not the parent
+		 * request itself, since the parent is the original unsplit request.
+		 */
+		TAILQ_FOREACH_SAFE(child_req, &req->children, child_tailq, tmp) {
+			if (!child_req_failed) {
+				rc = nvme_qpair_submit_request(qpair, child_req);
+				if (rc != 0) {
+					child_req_failed = true;
+				}
+			} else { /* free remaining child_reqs since one child_req fails */
+				nvme_request_remove_child(req, child_req);
+				nvme_free_request(child_req);
+			}
+		}
+
+		return rc;
+	}
+
+	/* queue those requests which matches with opcode in err_cmd list */
+	if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head))) {
+		TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) {
+			if (!cmd->do_not_submit) {
+				continue;
+			}
+
+			if ((cmd->opc == req->cmd.opc) && cmd->err_count) {
+				/* add to error request list and set cpl */
+				req->timeout_tsc = cmd->timeout_tsc;
+				req->submit_tick = spdk_get_ticks();
+				req->cpl.status.sct = cmd->status.sct;
+				req->cpl.status.sc = cmd->status.sc;
+				STAILQ_INSERT_TAIL(&qpair->err_req_head, req, stailq);
+				cmd->err_count--;
+				return 0;
+			}
+		}
+	}
+
+	return nvme_transport_qpair_submit_request(qpair, req);
+}
+
+static void
+_nvme_io_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_request		*req;
+
+	/* Manually abort each queued I/O. */
+	while (!STAILQ_EMPTY(&qpair->queued_req)) {
+		req = STAILQ_FIRST(&qpair->queued_req);
+		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+		SPDK_ERRLOG("aborting queued i/o\n");
+		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
+						   SPDK_NVME_SC_ABORTED_BY_REQUEST, true);
+	}
+}
+
+void
+nvme_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	if (nvme_qpair_is_io_queue(qpair)) {
+		_nvme_io_qpair_enable(qpair);
+	}
+
+	nvme_transport_qpair_enable(qpair);
+}
+
+void
+nvme_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_request		*req;
+
+	while (!STAILQ_EMPTY(&qpair->err_req_head)) {
+		req = STAILQ_FIRST(&qpair->err_req_head);
+		STAILQ_REMOVE_HEAD(&qpair->err_req_head, stailq);
+		nvme_qpair_manual_complete_request(qpair, req,
+						   req->cpl.status.sct,
+						   req->cpl.status.sc, true);
+	}
+
+	nvme_transport_qpair_disable(qpair);
+}
+
+static void
+nvme_qpair_fail(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_request		*req;
+
+	while (!STAILQ_EMPTY(&qpair->queued_req)) {
+		req = STAILQ_FIRST(&qpair->queued_req);
+		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+		SPDK_ERRLOG("failing queued i/o\n");
+		nvme_qpair_manual_complete_request(qpair, req, SPDK_NVME_SCT_GENERIC,
+						   SPDK_NVME_SC_ABORTED_BY_REQUEST, true);
+	}
+
+	nvme_transport_qpair_fail(qpair);
+}
+
+int
+spdk_nvme_qpair_add_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
+					struct spdk_nvme_qpair *qpair,
+					uint8_t opc, bool do_not_submit,
+					uint64_t timeout_in_us,
+					uint32_t err_count,
+					uint8_t sct, uint8_t sc)
+{
+	struct nvme_error_cmd *entry, *cmd = NULL;
+
+	if (qpair == NULL) {
+		qpair = ctrlr->adminq;
+	}
+
+	TAILQ_FOREACH(entry, &qpair->err_cmd_head, link) {
+		if (entry->opc == opc) {
+			cmd = entry;
+			break;
+		}
+	}
+
+	if (cmd == NULL) {
+		cmd = spdk_dma_zmalloc(sizeof(*cmd), 64, NULL);
+		if (!cmd) {
+			return -ENOMEM;
+		}
+		TAILQ_INSERT_TAIL(&qpair->err_cmd_head, cmd, link);
+	}
+
+	cmd->do_not_submit = do_not_submit;
+	cmd->err_count = err_count;
+	cmd->timeout_tsc = timeout_in_us * spdk_get_ticks_hz() / 1000000ULL;
+	cmd->opc = opc;
+	cmd->status.sct = sct;
+	cmd->status.sc = sc;
+
+	return 0;
+}
+
+void
+spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
+		struct spdk_nvme_qpair *qpair,
+		uint8_t opc)
+{
+	struct nvme_error_cmd *cmd, *entry;
+
+	if (qpair == NULL) {
+		qpair = ctrlr->adminq;
+	}
+
+	TAILQ_FOREACH_SAFE(cmd, &qpair->err_cmd_head, link, entry) {
+		if (cmd->opc == opc) {
+			TAILQ_REMOVE(&qpair->err_cmd_head, cmd, link);
+			spdk_dma_free(cmd);
+			return;
+		}
+	}
+
+	return;
+}
diff --git a/src/spdk/lib/nvme/nvme_quirks.c b/src/spdk/lib/nvme/nvme_quirks.c
new file mode 100644
index 00000000..9a213b12
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_quirks.c
@@ -0,0 +1,141 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nvme_internal.h"
+
+struct nvme_quirk {
+	struct spdk_pci_id	id;
+	uint64_t		flags;
+};
+
+static const struct nvme_quirk nvme_quirks[] = {
+	{	{SPDK_PCI_VID_INTEL, 0x0953, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_INTEL_QUIRK_READ_LATENCY |
+		NVME_INTEL_QUIRK_WRITE_LATENCY |
+		NVME_INTEL_QUIRK_STRIPING |
+		NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE
+	},
+	{	{SPDK_PCI_VID_INTEL, 0x0A53, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_INTEL_QUIRK_READ_LATENCY |
+		NVME_INTEL_QUIRK_WRITE_LATENCY |
+		NVME_INTEL_QUIRK_STRIPING |
+		NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE
+	},
+	{	{SPDK_PCI_VID_INTEL, 0x0A54, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_INTEL_QUIRK_READ_LATENCY |
+		NVME_INTEL_QUIRK_WRITE_LATENCY |
+		NVME_INTEL_QUIRK_STRIPING |
+		NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE
+	},
+	{	{SPDK_PCI_VID_INTEL, 0x0A55, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_INTEL_QUIRK_READ_LATENCY |
+		NVME_INTEL_QUIRK_WRITE_LATENCY |
+		NVME_INTEL_QUIRK_STRIPING |
+		NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE
+	},
+	{	{SPDK_PCI_VID_MEMBLAZE, 0x0540, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_DELAY_BEFORE_CHK_RDY
+	},
+	{	{SPDK_PCI_VID_SAMSUNG, 0xa821, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_DELAY_BEFORE_CHK_RDY
+	},
+	{	{SPDK_PCI_VID_SAMSUNG, 0xa822, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_DELAY_BEFORE_CHK_RDY
+	},
+	{	{SPDK_PCI_VID_VIRTUALBOX, 0x4e56, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC
+	},
+	{	{SPDK_PCI_VID_INTEL, 0x5845, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_IDENTIFY_CNS |
+		NVME_INTEL_QUIRK_NO_LOG_PAGES
+	},
+	{	{SPDK_PCI_VID_CNEXLABS, 0x1f1f, SPDK_PCI_ANY_ID, SPDK_PCI_ANY_ID},
+		NVME_QUIRK_IDENTIFY_CNS |
+		NVME_QUIRK_OCSSD
+	},
+	{	{0x0000, 0x0000, 0x0000, 0x0000}, 0}
+};
+
+/* Compare each field. SPDK_PCI_ANY_ID in s1 matches everything */
+static bool
+pci_id_match(const struct spdk_pci_id *s1, const struct spdk_pci_id *s2)
+{
+	if ((s1->vendor_id == SPDK_PCI_ANY_ID || s1->vendor_id == s2->vendor_id) &&
+	    (s1->device_id == SPDK_PCI_ANY_ID || s1->device_id == s2->device_id) &&
+	    (s1->subvendor_id == SPDK_PCI_ANY_ID || s1->subvendor_id == s2->subvendor_id) &&
+	    (s1->subdevice_id == SPDK_PCI_ANY_ID || s1->subdevice_id == s2->subdevice_id)) {
+		return true;
+	}
+	return false;
+}
+
+uint64_t
+nvme_get_quirks(const struct spdk_pci_id *id)
+{
+	const struct nvme_quirk *quirk = nvme_quirks;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Searching for %04x:%04x [%04x:%04x]...\n",
+		      id->vendor_id, id->device_id,
+		      id->subvendor_id, id->subdevice_id);
+
+	while (quirk->id.vendor_id) {
+		if (pci_id_match(&quirk->id, id)) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Matched quirk %04x:%04x [%04x:%04x]:\n",
+				      quirk->id.vendor_id, quirk->id.device_id,
+				      quirk->id.subvendor_id, quirk->id.subdevice_id);
+
+#define PRINT_QUIRK(quirk_flag) \
+			do { \
+				if (quirk->flags & (quirk_flag)) { \
+					SPDK_DEBUGLOG(SPDK_LOG_NVME, "Quirk enabled: %s\n", #quirk_flag); \
+				} \
+			} while (0)
+
+			PRINT_QUIRK(NVME_INTEL_QUIRK_READ_LATENCY);
+			PRINT_QUIRK(NVME_INTEL_QUIRK_WRITE_LATENCY);
+			PRINT_QUIRK(NVME_QUIRK_DELAY_BEFORE_CHK_RDY);
+			PRINT_QUIRK(NVME_INTEL_QUIRK_STRIPING);
+			PRINT_QUIRK(NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC);
+			PRINT_QUIRK(NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE);
+			PRINT_QUIRK(NVME_QUIRK_IDENTIFY_CNS);
+			PRINT_QUIRK(NVME_QUIRK_OCSSD);
+
+			return quirk->flags;
+		}
+		quirk++;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "No quirks found.\n");
+
+	return 0;
+}
diff --git a/src/spdk/lib/nvme/nvme_rdma.c b/src/spdk/lib/nvme/nvme_rdma.c
new file mode 100644
index 00000000..b356e3a1
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_rdma.c
@@ -0,0 +1,1634 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over RDMA transport
+ */
+
+#include "spdk/stdinc.h"
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+#include "spdk/assert.h"
+#include "spdk/log.h"
+#include "spdk/trace.h"
+#include "spdk/event.h"
+#include "spdk/queue.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/string.h"
+#include "spdk/endian.h"
+#include "spdk/likely.h"
+
+#include "nvme_internal.h"
+
+#define NVME_RDMA_TIME_OUT_IN_MS 2000
+#define NVME_RDMA_RW_BUFFER_SIZE 131072
+
+/*
+ * NVME RDMA qpair Resource Defaults
+ */
+#define NVME_RDMA_DEFAULT_TX_SGE		2
+#define NVME_RDMA_DEFAULT_RX_SGE		1
+
+
+/* Max number of NVMe-oF SGL descriptors supported by the host */
+#define NVME_RDMA_MAX_SGL_DESCRIPTORS		16
+struct spdk_nvmf_cmd {
+	struct spdk_nvme_cmd cmd;
+	struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
+};
+
+/* Mapping from virtual address to ibv_mr pointer for a protection domain */
+struct spdk_nvme_rdma_mr_map {
+	struct ibv_pd				*pd;
+	struct spdk_mem_map			*map;
+	uint64_t				ref;
+	LIST_ENTRY(spdk_nvme_rdma_mr_map)	link;
+};
+
+/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
+struct nvme_rdma_ctrlr {
+	struct spdk_nvme_ctrlr			ctrlr;
+};
+
+/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
+struct nvme_rdma_qpair {
+	struct spdk_nvme_qpair			qpair;
+
+	struct rdma_event_channel		*cm_channel;
+
+	struct rdma_cm_id			*cm_id;
+
+	struct ibv_cq				*cq;
+
+	struct	spdk_nvme_rdma_req		*rdma_reqs;
+
+	uint16_t				num_entries;
+
+	/* Parallel arrays of response buffers + response SGLs of size num_entries */
+	struct ibv_sge				*rsp_sgls;
+	struct spdk_nvme_cpl			*rsps;
+
+	struct ibv_recv_wr			*rsp_recv_wrs;
+
+	/* Memory region describing all rsps for this qpair */
+	struct ibv_mr				*rsp_mr;
+
+	/*
+	 * Array of num_entries NVMe commands registered as RDMA message buffers.
+	 * Indexed by rdma_req->id.
+	 */
+	struct spdk_nvmf_cmd			*cmds;
+
+	/* Memory region describing all cmds for this qpair */
+	struct ibv_mr				*cmd_mr;
+
+	struct spdk_nvme_rdma_mr_map		*mr_map;
+
+	TAILQ_HEAD(, spdk_nvme_rdma_req)	free_reqs;
+	TAILQ_HEAD(, spdk_nvme_rdma_req)	outstanding_reqs;
+};
+
+struct spdk_nvme_rdma_req {
+	int					id;
+
+	struct ibv_send_wr			send_wr;
+
+	struct nvme_request			*req;
+
+	struct ibv_sge				send_sgl[NVME_RDMA_DEFAULT_TX_SGE];
+
+	TAILQ_ENTRY(spdk_nvme_rdma_req)		link;
+};
+
+static const char *rdma_cm_event_str[] = {
+	"RDMA_CM_EVENT_ADDR_RESOLVED",
+	"RDMA_CM_EVENT_ADDR_ERROR",
+	"RDMA_CM_EVENT_ROUTE_RESOLVED",
+	"RDMA_CM_EVENT_ROUTE_ERROR",
+	"RDMA_CM_EVENT_CONNECT_REQUEST",
+	"RDMA_CM_EVENT_CONNECT_RESPONSE",
+	"RDMA_CM_EVENT_CONNECT_ERROR",
+	"RDMA_CM_EVENT_UNREACHABLE",
+	"RDMA_CM_EVENT_REJECTED",
+	"RDMA_CM_EVENT_ESTABLISHED",
+	"RDMA_CM_EVENT_DISCONNECTED",
+	"RDMA_CM_EVENT_DEVICE_REMOVAL",
+	"RDMA_CM_EVENT_MULTICAST_JOIN",
+	"RDMA_CM_EVENT_MULTICAST_ERROR",
+	"RDMA_CM_EVENT_ADDR_CHANGE",
+	"RDMA_CM_EVENT_TIMEWAIT_EXIT"
+};
+
+static LIST_HEAD(, spdk_nvme_rdma_mr_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps);
+static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair);
+
+static inline struct nvme_rdma_qpair *
+nvme_rdma_qpair(struct spdk_nvme_qpair *qpair)
+{
+	assert(qpair->trtype == SPDK_NVME_TRANSPORT_RDMA);
+	return SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
+}
+
+static inline struct nvme_rdma_ctrlr *
+nvme_rdma_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
+{
+	assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA);
+	return SPDK_CONTAINEROF(ctrlr, struct nvme_rdma_ctrlr, ctrlr);
+}
+
+static struct spdk_nvme_rdma_req *
+nvme_rdma_req_get(struct nvme_rdma_qpair *rqpair)
+{
+	struct spdk_nvme_rdma_req *rdma_req;
+
+	rdma_req = TAILQ_FIRST(&rqpair->free_reqs);
+	if (rdma_req) {
+		TAILQ_REMOVE(&rqpair->free_reqs, rdma_req, link);
+		TAILQ_INSERT_TAIL(&rqpair->outstanding_reqs, rdma_req, link);
+	}
+
+	return rdma_req;
+}
+
+static void
+nvme_rdma_req_put(struct nvme_rdma_qpair *rqpair, struct spdk_nvme_rdma_req *rdma_req)
+{
+	TAILQ_REMOVE(&rqpair->outstanding_reqs, rdma_req, link);
+	TAILQ_INSERT_HEAD(&rqpair->free_reqs, rdma_req, link);
+}
+
+static void
+nvme_rdma_req_complete(struct nvme_request *req,
+		       struct spdk_nvme_cpl *rsp)
+{
+	nvme_complete_request(req, rsp);
+	nvme_free_request(req);
+}
+
+static const char *
+nvme_rdma_cm_event_str_get(uint32_t event)
+{
+	if (event < SPDK_COUNTOF(rdma_cm_event_str)) {
+		return rdma_cm_event_str[event];
+	} else {
+		return "Undefined";
+	}
+}
+
+static struct rdma_cm_event *
+nvme_rdma_get_event(struct rdma_event_channel *channel,
+		    enum rdma_cm_event_type evt)
+{
+	struct rdma_cm_event	*event;
+	int			rc;
+
+	rc = rdma_get_cm_event(channel, &event);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to get event from CM event channel. Error %d (%s)\n",
+			    errno, spdk_strerror(errno));
+		return NULL;
+	}
+
+	if (event->event != evt) {
+		SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n",
+			    nvme_rdma_cm_event_str_get(evt),
+			    nvme_rdma_cm_event_str_get(event->event), event->event, event->status);
+		rdma_ack_cm_event(event);
+		return NULL;
+	}
+
+	return event;
+}
+
+static int
+nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
+{
+	int			rc;
+	struct ibv_qp_init_attr	attr;
+
+	rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
+	if (!rqpair->cq) {
+		SPDK_ERRLOG("Unable to create completion queue: errno %d: %s\n", errno, spdk_strerror(errno));
+		return -1;
+	}
+
+	memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
+	attr.qp_type		= IBV_QPT_RC;
+	attr.send_cq		= rqpair->cq;
+	attr.recv_cq		= rqpair->cq;
+	attr.cap.max_send_wr	= rqpair->num_entries; /* SEND operations */
+	attr.cap.max_recv_wr	= rqpair->num_entries; /* RECV operations */
+	attr.cap.max_send_sge	= NVME_RDMA_DEFAULT_TX_SGE;
+	attr.cap.max_recv_sge	= NVME_RDMA_DEFAULT_RX_SGE;
+
+	rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
+	if (rc) {
+		SPDK_ERRLOG("rdma_create_qp failed\n");
+		return -1;
+	}
+
+	rqpair->cm_id->context = &rqpair->qpair;
+
+	return 0;
+}
+
+#define nvme_rdma_trace_ibv_sge(sg_list) \
+	if (sg_list) { \
+		SPDK_DEBUGLOG(SPDK_LOG_NVME, "local addr %p length 0x%x lkey 0x%x\n", \
+			      (void *)(sg_list)->addr, (sg_list)->length, (sg_list)->lkey); \
+	}
+
+static int
+nvme_rdma_post_recv(struct nvme_rdma_qpair *rqpair, uint16_t rsp_idx)
+{
+	struct ibv_recv_wr *wr, *bad_wr = NULL;
+	int rc;
+
+	wr = &rqpair->rsp_recv_wrs[rsp_idx];
+	nvme_rdma_trace_ibv_sge(wr->sg_list);
+
+	rc = ibv_post_recv(rqpair->cm_id->qp, wr, &bad_wr);
+	if (rc) {
+		SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc);
+	}
+
+	return rc;
+}
+
+static void
+nvme_rdma_free_rsps(struct nvme_rdma_qpair *rqpair)
+{
+	if (rqpair->rsp_mr && rdma_dereg_mr(rqpair->rsp_mr)) {
+		SPDK_ERRLOG("Unable to de-register rsp_mr\n");
+	}
+	rqpair->rsp_mr = NULL;
+
+	free(rqpair->rsps);
+	rqpair->rsps = NULL;
+	free(rqpair->rsp_sgls);
+	rqpair->rsp_sgls = NULL;
+	free(rqpair->rsp_recv_wrs);
+	rqpair->rsp_recv_wrs = NULL;
+}
+
+static int
+nvme_rdma_alloc_rsps(struct nvme_rdma_qpair *rqpair)
+{
+	uint16_t i;
+
+	rqpair->rsp_mr = NULL;
+	rqpair->rsps = NULL;
+	rqpair->rsp_recv_wrs = NULL;
+
+	rqpair->rsp_sgls = calloc(rqpair->num_entries, sizeof(*rqpair->rsp_sgls));
+	if (!rqpair->rsp_sgls) {
+		SPDK_ERRLOG("Failed to allocate rsp_sgls\n");
+		goto fail;
+	}
+
+	rqpair->rsp_recv_wrs = calloc(rqpair->num_entries,
+				      sizeof(*rqpair->rsp_recv_wrs));
+	if (!rqpair->rsp_recv_wrs) {
+		SPDK_ERRLOG("Failed to allocate rsp_recv_wrs\n");
+		goto fail;
+	}
+
+	rqpair->rsps = calloc(rqpair->num_entries, sizeof(*rqpair->rsps));
+	if (!rqpair->rsps) {
+		SPDK_ERRLOG("can not allocate rdma rsps\n");
+		goto fail;
+	}
+
+	rqpair->rsp_mr = rdma_reg_msgs(rqpair->cm_id, rqpair->rsps,
+				       rqpair->num_entries * sizeof(*rqpair->rsps));
+	if (rqpair->rsp_mr == NULL) {
+		SPDK_ERRLOG("Unable to register rsp_mr\n");
+		goto fail;
+	}
+
+	for (i = 0; i < rqpair->num_entries; i++) {
+		struct ibv_sge *rsp_sgl = &rqpair->rsp_sgls[i];
+
+		rsp_sgl->addr = (uint64_t)&rqpair->rsps[i];
+		rsp_sgl->length = sizeof(rqpair->rsps[i]);
+		rsp_sgl->lkey = rqpair->rsp_mr->lkey;
+
+		rqpair->rsp_recv_wrs[i].wr_id = i;
+		rqpair->rsp_recv_wrs[i].next = NULL;
+		rqpair->rsp_recv_wrs[i].sg_list = rsp_sgl;
+		rqpair->rsp_recv_wrs[i].num_sge = 1;
+
+		if (nvme_rdma_post_recv(rqpair, i)) {
+			SPDK_ERRLOG("Unable to post connection rx desc\n");
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	nvme_rdma_free_rsps(rqpair);
+	return -ENOMEM;
+}
+
+static void
+nvme_rdma_free_reqs(struct nvme_rdma_qpair *rqpair)
+{
+	if (!rqpair->rdma_reqs) {
+		return;
+	}
+
+	if (rqpair->cmd_mr && rdma_dereg_mr(rqpair->cmd_mr)) {
+		SPDK_ERRLOG("Unable to de-register cmd_mr\n");
+	}
+	rqpair->cmd_mr = NULL;
+
+	free(rqpair->cmds);
+	rqpair->cmds = NULL;
+
+	free(rqpair->rdma_reqs);
+	rqpair->rdma_reqs = NULL;
+}
+
+static int
+nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair)
+{
+	int i;
+
+	rqpair->rdma_reqs = calloc(rqpair->num_entries, sizeof(struct spdk_nvme_rdma_req));
+	if (rqpair->rdma_reqs == NULL) {
+		SPDK_ERRLOG("Failed to allocate rdma_reqs\n");
+		goto fail;
+	}
+
+	rqpair->cmds = calloc(rqpair->num_entries, sizeof(*rqpair->cmds));
+	if (!rqpair->cmds) {
+		SPDK_ERRLOG("Failed to allocate RDMA cmds\n");
+		goto fail;
+	}
+
+	rqpair->cmd_mr = rdma_reg_msgs(rqpair->cm_id, rqpair->cmds,
+				       rqpair->num_entries * sizeof(*rqpair->cmds));
+	if (!rqpair->cmd_mr) {
+		SPDK_ERRLOG("Unable to register cmd_mr\n");
+		goto fail;
+	}
+
+	TAILQ_INIT(&rqpair->free_reqs);
+	TAILQ_INIT(&rqpair->outstanding_reqs);
+	for (i = 0; i < rqpair->num_entries; i++) {
+		struct spdk_nvme_rdma_req	*rdma_req;
+		struct spdk_nvmf_cmd		*cmd;
+
+		rdma_req = &rqpair->rdma_reqs[i];
+		cmd = &rqpair->cmds[i];
+
+		rdma_req->id = i;
+
+		/* The first RDMA sgl element will always point
+		 * at this data structure. Depending on whether
+		 * an NVMe-oF SGL is required, the length of
+		 * this element may change. */
+		rdma_req->send_sgl[0].addr = (uint64_t)cmd;
+		rdma_req->send_sgl[0].lkey = rqpair->cmd_mr->lkey;
+
+		rdma_req->send_wr.wr_id = (uint64_t)rdma_req;
+		rdma_req->send_wr.next = NULL;
+		rdma_req->send_wr.opcode = IBV_WR_SEND;
+		rdma_req->send_wr.send_flags = IBV_SEND_SIGNALED;
+		rdma_req->send_wr.sg_list = rdma_req->send_sgl;
+		rdma_req->send_wr.imm_data = 0;
+
+		TAILQ_INSERT_TAIL(&rqpair->free_reqs, rdma_req, link);
+	}
+
+	return 0;
+
+fail:
+	nvme_rdma_free_reqs(rqpair);
+	return -ENOMEM;
+}
+
+static int
+nvme_rdma_recv(struct nvme_rdma_qpair *rqpair, uint64_t rsp_idx)
+{
+	struct spdk_nvme_qpair *qpair = &rqpair->qpair;
+	struct spdk_nvme_rdma_req *rdma_req;
+	struct spdk_nvme_cpl *rsp;
+	struct nvme_request *req;
+
+	assert(rsp_idx < rqpair->num_entries);
+	rsp = &rqpair->rsps[rsp_idx];
+	rdma_req = &rqpair->rdma_reqs[rsp->cid];
+
+	req = rdma_req->req;
+	nvme_rdma_req_complete(req, rsp);
+
+	nvme_rdma_req_put(rqpair, rdma_req);
+	if (nvme_rdma_post_recv(rqpair, rsp_idx)) {
+		SPDK_ERRLOG("Unable to re-post rx descriptor\n");
+		return -1;
+	}
+
+	if (!STAILQ_EMPTY(&qpair->queued_req) && !qpair->ctrlr->is_resetting) {
+		req = STAILQ_FIRST(&qpair->queued_req);
+		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
+		nvme_qpair_submit_request(qpair, req);
+	}
+
+	return 0;
+}
+
+static int
+nvme_rdma_resolve_addr(struct nvme_rdma_qpair *rqpair,
+		       struct sockaddr *src_addr,
+		       struct sockaddr *dst_addr,
+		       struct rdma_event_channel *cm_channel)
+{
+	int ret;
+	struct rdma_cm_event *event;
+
+	ret = rdma_resolve_addr(rqpair->cm_id, src_addr, dst_addr,
+				NVME_RDMA_TIME_OUT_IN_MS);
+	if (ret) {
+		SPDK_ERRLOG("rdma_resolve_addr, %d\n", errno);
+		return ret;
+	}
+
+	event = nvme_rdma_get_event(cm_channel, RDMA_CM_EVENT_ADDR_RESOLVED);
+	if (event == NULL) {
+		SPDK_ERRLOG("RDMA address resolution error\n");
+		return -1;
+	}
+	rdma_ack_cm_event(event);
+
+	ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS);
+	if (ret) {
+		SPDK_ERRLOG("rdma_resolve_route\n");
+		return ret;
+	}
+
+	event = nvme_rdma_get_event(cm_channel, RDMA_CM_EVENT_ROUTE_RESOLVED);
+	if (event == NULL) {
+		SPDK_ERRLOG("RDMA route resolution error\n");
+		return -1;
+	}
+	rdma_ack_cm_event(event);
+
+	return 0;
+}
+
+static int
+nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
+{
+	struct rdma_conn_param				param = {};
+	struct spdk_nvmf_rdma_request_private_data	request_data = {};
+	struct spdk_nvmf_rdma_accept_private_data	*accept_data;
+	struct ibv_device_attr				attr;
+	int						ret;
+	struct rdma_cm_event				*event;
+	struct spdk_nvme_ctrlr				*ctrlr;
+
+	ret = ibv_query_device(rqpair->cm_id->verbs, &attr);
+	if (ret != 0) {
+		SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
+		return ret;
+	}
+
+	param.responder_resources = spdk_min(rqpair->num_entries, attr.max_qp_rd_atom);
+
+	ctrlr = rqpair->qpair.ctrlr;
+	if (!ctrlr) {
+		return -1;
+	}
+
+	request_data.qid = rqpair->qpair.id;
+	request_data.hrqsize = rqpair->num_entries;
+	request_data.hsqsize = rqpair->num_entries - 1;
+	request_data.cntlid = ctrlr->cntlid;
+
+	param.private_data = &request_data;
+	param.private_data_len = sizeof(request_data);
+	param.retry_count = 7;
+	param.rnr_retry_count = 7;
+
+	ret = rdma_connect(rqpair->cm_id, &param);
+	if (ret) {
+		SPDK_ERRLOG("nvme rdma connect error\n");
+		return ret;
+	}
+
+	event = nvme_rdma_get_event(rqpair->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
+	if (event == NULL) {
+		SPDK_ERRLOG("RDMA connect error\n");
+		return -1;
+	}
+
+	accept_data = (struct spdk_nvmf_rdma_accept_private_data *)event->param.conn.private_data;
+	if (accept_data == NULL) {
+		rdma_ack_cm_event(event);
+		SPDK_ERRLOG("NVMe-oF target did not return accept data\n");
+		return -1;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested queue depth %d. Actually got queue depth %d.\n",
+		      rqpair->num_entries, accept_data->crqsize);
+
+	rqpair->num_entries = spdk_min(rqpair->num_entries, accept_data->crqsize);
+
+	rdma_ack_cm_event(event);
+
+	return 0;
+}
+
+static int
+nvme_rdma_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service)
+{
+	struct addrinfo *res;
+	struct addrinfo hints;
+	int ret;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = family;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = 0;
+
+	ret = getaddrinfo(addr, service, &hints, &res);
+	if (ret) {
+		SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret);
+		return ret;
+	}
+
+	if (res->ai_addrlen > sizeof(*sa)) {
+		SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen);
+		ret = EINVAL;
+	} else {
+		memcpy(sa, res->ai_addr, res->ai_addrlen);
+	}
+
+	freeaddrinfo(res);
+	return ret;
+}
+
+static int
+nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
+			enum spdk_mem_map_notify_action action,
+			void *vaddr, size_t size)
+{
+	struct ibv_pd *pd = cb_ctx;
+	struct ibv_mr *mr;
+	int rc;
+
+	switch (action) {
+	case SPDK_MEM_MAP_NOTIFY_REGISTER:
+		mr = ibv_reg_mr(pd, vaddr, size,
+				IBV_ACCESS_LOCAL_WRITE |
+				IBV_ACCESS_REMOTE_READ |
+				IBV_ACCESS_REMOTE_WRITE);
+		if (mr == NULL) {
+			SPDK_ERRLOG("ibv_reg_mr() failed\n");
+			return -EFAULT;
+		} else {
+			rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
+		}
+		break;
+	case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
+		mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
+		rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
+		if (mr) {
+			ibv_dereg_mr(mr);
+		}
+		break;
+	default:
+		SPDK_UNREACHABLE();
+	}
+
+	return rc;
+}
+
+static int
+nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
+{
+	struct ibv_pd *pd = rqpair->cm_id->qp->pd;
+	struct spdk_nvme_rdma_mr_map *mr_map;
+	const struct spdk_mem_map_ops nvme_rdma_map_ops = {
+		.notify_cb = nvme_rdma_mr_map_notify,
+		.are_contiguous = NULL
+	};
+
+	pthread_mutex_lock(&g_rdma_mr_maps_mutex);
+
+	/* Look up existing mem map registration for this pd */
+	LIST_FOREACH(mr_map, &g_rdma_mr_maps, link) {
+		if (mr_map->pd == pd) {
+			mr_map->ref++;
+			rqpair->mr_map = mr_map;
+			pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
+			return 0;
+		}
+	}
+
+	mr_map = calloc(1, sizeof(*mr_map));
+	if (mr_map == NULL) {
+		SPDK_ERRLOG("calloc() failed\n");
+		pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
+		return -1;
+	}
+
+	mr_map->ref = 1;
+	mr_map->pd = pd;
+	mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
+	if (mr_map->map == NULL) {
+		SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
+		free(mr_map);
+		pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
+		return -1;
+	}
+
+	rqpair->mr_map = mr_map;
+	LIST_INSERT_HEAD(&g_rdma_mr_maps, mr_map, link);
+
+	pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
+
+	return 0;
+}
+
+static void
+nvme_rdma_unregister_mem(struct nvme_rdma_qpair *rqpair)
+{
+	struct spdk_nvme_rdma_mr_map *mr_map;
+
+	mr_map = rqpair->mr_map;
+	rqpair->mr_map = NULL;
+
+	if (mr_map == NULL) {
+		return;
+	}
+
+	pthread_mutex_lock(&g_rdma_mr_maps_mutex);
+
+	assert(mr_map->ref > 0);
+	mr_map->ref--;
+	if (mr_map->ref == 0) {
+		LIST_REMOVE(mr_map, link);
+		spdk_mem_map_free(&mr_map->map);
+		free(mr_map);
+	}
+
+	pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
+}
+
+static int
+nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
+{
+	struct sockaddr_storage dst_addr;
+	struct sockaddr_storage src_addr;
+	bool src_addr_specified;
+	int rc;
+	struct spdk_nvme_ctrlr *ctrlr;
+	int family;
+
+	rqpair->cm_channel = rdma_create_event_channel();
+	if (rqpair->cm_channel == NULL) {
+		SPDK_ERRLOG("rdma_create_event_channel() failed\n");
+		return -1;
+	}
+
+	ctrlr = rqpair->qpair.ctrlr;
+
+	switch (ctrlr->trid.adrfam) {
+	case SPDK_NVMF_ADRFAM_IPV4:
+		family = AF_INET;
+		break;
+	case SPDK_NVMF_ADRFAM_IPV6:
+		family = AF_INET6;
+		break;
+	default:
+		SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam);
+		return -1;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family);
+
+	memset(&dst_addr, 0, sizeof(dst_addr));
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "trsvcid is %s\n", ctrlr->trid.trsvcid);
+	rc = nvme_rdma_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid);
+	if (rc != 0) {
+		SPDK_ERRLOG("dst_addr nvme_rdma_parse_addr() failed\n");
+		return -1;
+	}
+
+	if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) {
+		memset(&src_addr, 0, sizeof(src_addr));
+		rc = nvme_rdma_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid);
+		if (rc != 0) {
+			SPDK_ERRLOG("src_addr nvme_rdma_parse_addr() failed\n");
+			return -1;
+		}
+		src_addr_specified = true;
+	} else {
+		src_addr_specified = false;
+	}
+
+	rc = rdma_create_id(rqpair->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
+	if (rc < 0) {
+		SPDK_ERRLOG("rdma_create_id() failed\n");
+		return -1;
+	}
+
+	rc = nvme_rdma_resolve_addr(rqpair,
+				    src_addr_specified ? (struct sockaddr *)&src_addr : NULL,
+				    (struct sockaddr *)&dst_addr, rqpair->cm_channel);
+	if (rc < 0) {
+		SPDK_ERRLOG("nvme_rdma_resolve_addr() failed\n");
+		return -1;
+	}
+
+	rc = nvme_rdma_qpair_init(rqpair);
+	if (rc < 0) {
+		SPDK_ERRLOG("nvme_rdma_qpair_init() failed\n");
+		return -1;
+	}
+
+	rc = nvme_rdma_connect(rqpair);
+	if (rc != 0) {
+		SPDK_ERRLOG("Unable to connect the rqpair\n");
+		return -1;
+	}
+
+	rc = nvme_rdma_alloc_reqs(rqpair);
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
+	if (rc) {
+		SPDK_ERRLOG("Unable to allocate rqpair  RDMA requests\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA requests allocated\n");
+
+	rc = nvme_rdma_alloc_rsps(rqpair);
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
+	if (rc < 0) {
+		SPDK_ERRLOG("Unable to allocate rqpair RDMA responses\n");
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA responses allocated\n");
+
+	rc = nvme_rdma_register_mem(rqpair);
+	if (rc < 0) {
+		SPDK_ERRLOG("Unable to register memory for RDMA\n");
+		return -1;
+	}
+
+	rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Build SGL describing empty payload.
+ */
+static int
+nvme_rdma_build_null_request(struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct nvme_request *req = rdma_req->req;
+
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+
+	/* The first element of this SGL is pointing at an
+	 * spdk_nvmf_cmd object. For this particular command,
+	 * we only need the first 64 bytes corresponding to
+	 * the NVMe command. */
+	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
+
+	/* The RDMA SGL needs one element describing the NVMe command. */
+	rdma_req->send_wr.num_sge = 1;
+
+	req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
+	req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
+	req->cmd.dptr.sgl1.keyed.length = 0;
+	req->cmd.dptr.sgl1.keyed.key = 0;
+	req->cmd.dptr.sgl1.address = 0;
+
+	return 0;
+}
+
+/*
+ * Build inline SGL describing contiguous payload buffer.
+ */
+static int
+nvme_rdma_build_contig_inline_request(struct nvme_rdma_qpair *rqpair,
+				      struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct nvme_request *req = rdma_req->req;
+	struct ibv_mr *mr;
+	void *payload;
+	uint64_t requested_size;
+
+	payload = req->payload.contig_or_cb_arg + req->payload_offset;
+	assert(req->payload_size != 0);
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
+
+	requested_size = req->payload_size;
+	mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
+			(uint64_t)payload, &requested_size);
+
+	if (mr == NULL || requested_size < req->payload_size) {
+		return -EINVAL;
+	}
+
+	/* The first element of this SGL is pointing at an
+	 * spdk_nvmf_cmd object. For this particular command,
+	 * we only need the first 64 bytes corresponding to
+	 * the NVMe command. */
+	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
+
+	rdma_req->send_sgl[1].addr = (uint64_t)payload;
+	rdma_req->send_sgl[1].length = (uint32_t)req->payload_size;
+	rdma_req->send_sgl[1].lkey = mr->lkey;
+
+	/* The RDMA SGL contains two elements. The first describes
+	 * the NVMe command and the second describes the data
+	 * payload. */
+	rdma_req->send_wr.num_sge = 2;
+
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+	req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
+	req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
+	req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size;
+	/* Inline only supported for icdoff == 0 currently.  This function will
+	 * not get called for controllers with other values. */
+	req->cmd.dptr.sgl1.address = (uint64_t)0;
+
+	return 0;
+}
+
+/*
+ * Build SGL describing contiguous payload buffer.
+ */
+static int
+nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
+			       struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct nvme_request *req = rdma_req->req;
+	void *payload = req->payload.contig_or_cb_arg + req->payload_offset;
+	struct ibv_mr *mr;
+	uint64_t requested_size;
+
+	assert(req->payload_size != 0);
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
+
+	requested_size = req->payload_size;
+	mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
+			&requested_size);
+	if (mr == NULL || requested_size < req->payload_size) {
+		return -1;
+	}
+
+	/* The first element of this SGL is pointing at an
+	 * spdk_nvmf_cmd object. For this particular command,
+	 * we only need the first 64 bytes corresponding to
+	 * the NVMe command. */
+	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
+
+	/* The RDMA SGL needs one element describing the NVMe command. */
+	rdma_req->send_wr.num_sge = 1;
+
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+	req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
+	req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
+	req->cmd.dptr.sgl1.keyed.length = req->payload_size;
+	req->cmd.dptr.sgl1.keyed.key = mr->rkey;
+	req->cmd.dptr.sgl1.address = (uint64_t)payload;
+
+	return 0;
+}
+
+/*
+ * Build SGL describing scattered payload buffer.
+ */
+static int
+nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
+			    struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct nvme_request *req = rdma_req->req;
+	struct spdk_nvmf_cmd *cmd = &rqpair->cmds[rdma_req->id];
+	struct ibv_mr *mr = NULL;
+	void *virt_addr;
+	uint64_t remaining_size, mr_length;
+	uint32_t sge_length;
+	int rc, max_num_sgl, num_sgl_desc;
+
+	assert(req->payload_size != 0);
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
+	assert(req->payload.reset_sgl_fn != NULL);
+	assert(req->payload.next_sge_fn != NULL);
+	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
+
+	max_num_sgl = req->qpair->ctrlr->max_sges;
+
+	remaining_size = req->payload_size;
+	num_sgl_desc = 0;
+	do {
+		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &sge_length);
+		if (rc) {
+			return -1;
+		}
+
+		sge_length = spdk_min(remaining_size, sge_length);
+		mr_length = sge_length;
+
+		mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
+				&mr_length);
+
+		if (mr == NULL || mr_length < sge_length) {
+			return -1;
+		}
+
+		cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
+		cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
+		cmd->sgl[num_sgl_desc].keyed.length = sge_length;
+		cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
+		cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
+
+		remaining_size -= sge_length;
+		num_sgl_desc++;
+	} while (remaining_size > 0 && num_sgl_desc < max_num_sgl);
+
+
+	/* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
+	if (remaining_size > 0) {
+		return -1;
+	}
+
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+
+	/* The RDMA SGL needs one element describing some portion
+	 * of the spdk_nvmf_cmd structure. */
+	rdma_req->send_wr.num_sge = 1;
+
+	/*
+	 * If only one SGL descriptor is required, it can be embedded directly in the command
+	 * as a data block descriptor.
+	 */
+	if (num_sgl_desc == 1) {
+		/* The first element of this SGL is pointing at an
+		 * spdk_nvmf_cmd object. For this particular command,
+		 * we only need the first 64 bytes corresponding to
+		 * the NVMe command. */
+		rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
+
+		req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
+		req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
+		req->cmd.dptr.sgl1.keyed.length = req->payload_size;
+		req->cmd.dptr.sgl1.keyed.key = mr->rkey;
+		req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
+	} else {
+		/*
+		 * Otherwise, The SGL descriptor embedded in the command must point to the list of
+		 * SGL descriptors used to describe the operation. In that case it is a last segment descriptor.
+		 */
+		rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd) + sizeof(struct
+					       spdk_nvme_sgl_descriptor) * num_sgl_desc;
+
+		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
+		req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
+		req->cmd.dptr.sgl1.unkeyed.length = num_sgl_desc * sizeof(struct spdk_nvme_sgl_descriptor);
+		req->cmd.dptr.sgl1.address = (uint64_t)0;
+	}
+
+	return 0;
+}
+
+/*
+ * Build inline SGL describing sgl payload buffer.
+ */
+static int
+nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair,
+				   struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct nvme_request *req = rdma_req->req;
+	struct ibv_mr *mr;
+	uint32_t length;
+	uint64_t requested_size;
+	void *virt_addr;
+	int rc;
+
+	assert(req->payload_size != 0);
+	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
+	assert(req->payload.reset_sgl_fn != NULL);
+	assert(req->payload.next_sge_fn != NULL);
+	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
+
+	/* TODO: for now, we only support a single SGL entry */
+	rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
+	if (rc) {
+		return -1;
+	}
+
+	if (length < req->payload_size) {
+		SPDK_ERRLOG("multi-element SGL currently not supported for RDMA\n");
+		return -1;
+	}
+
+	requested_size = req->payload_size;
+	mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
+			&requested_size);
+	if (mr == NULL || requested_size < req->payload_size) {
+		return -1;
+	}
+
+	/* The first element of this SGL is pointing at an
+	 * spdk_nvmf_cmd object. For this particular command,
+	 * we only need the first 64 bytes corresponding to
+	 * the NVMe command. */
+	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
+
+	rdma_req->send_sgl[1].addr = (uint64_t)virt_addr;
+	rdma_req->send_sgl[1].length = (uint32_t)req->payload_size;
+	rdma_req->send_sgl[1].lkey = mr->lkey;
+
+	/* The RDMA SGL contains two elements. The first describes
+	 * the NVMe command and the second describes the data
+	 * payload. */
+	rdma_req->send_wr.num_sge = 2;
+
+	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
+	req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
+	req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
+	req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size;
+	/* Inline only supported for icdoff == 0 currently.  This function will
+	 * not get called for controllers with other values. */
+	req->cmd.dptr.sgl1.address = (uint64_t)0;
+
+	return 0;
+}
+
+static inline unsigned int
+nvme_rdma_icdsz_bytes(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return (ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd));
+}
+
+static int
+nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req,
+		   struct spdk_nvme_rdma_req *rdma_req)
+{
+	struct spdk_nvme_ctrlr *ctrlr = rqpair->qpair.ctrlr;
+	int rc;
+
+	rdma_req->req = req;
+	req->cmd.cid = rdma_req->id;
+
+	if (req->payload_size == 0) {
+		rc = nvme_rdma_build_null_request(rdma_req);
+	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
+		/*
+		 * Check if icdoff is non zero, to avoid interop conflicts with
+		 * targets with non-zero icdoff.  Both SPDK and the Linux kernel
+		 * targets use icdoff = 0.  For targets with non-zero icdoff, we
+		 * will currently just not use inline data for now.
+		 */
+		if (req->cmd.opc == SPDK_NVME_OPC_WRITE &&
+		    req->payload_size <= nvme_rdma_icdsz_bytes(ctrlr) &&
+		    (ctrlr->cdata.nvmf_specific.icdoff == 0)) {
+			rc = nvme_rdma_build_contig_inline_request(rqpair, rdma_req);
+		} else {
+			rc = nvme_rdma_build_contig_request(rqpair, rdma_req);
+		}
+	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
+		if (req->cmd.opc == SPDK_NVME_OPC_WRITE &&
+		    req->payload_size <= nvme_rdma_icdsz_bytes(ctrlr) &&
+		    ctrlr->cdata.nvmf_specific.icdoff == 0) {
+			rc = nvme_rdma_build_sgl_inline_request(rqpair, rdma_req);
+		} else {
+			rc = nvme_rdma_build_sgl_request(rqpair, rdma_req);
+		}
+	} else {
+		rc = -1;
+	}
+
+	if (rc) {
+		return rc;
+	}
+
+	memcpy(&rqpair->cmds[rdma_req->id], &req->cmd, sizeof(req->cmd));
+	return 0;
+}
+
+static struct spdk_nvme_qpair *
+nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
+			     uint16_t qid, uint32_t qsize,
+			     enum spdk_nvme_qprio qprio,
+			     uint32_t num_requests)
+{
+	struct nvme_rdma_qpair *rqpair;
+	struct spdk_nvme_qpair *qpair;
+	int rc;
+
+	rqpair = calloc(1, sizeof(struct nvme_rdma_qpair));
+	if (!rqpair) {
+		SPDK_ERRLOG("failed to get create rqpair\n");
+		return NULL;
+	}
+
+	rqpair->num_entries = qsize;
+
+	qpair = &rqpair->qpair;
+
+	rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests);
+	if (rc != 0) {
+		return NULL;
+	}
+
+	rc = nvme_rdma_qpair_connect(rqpair);
+	if (rc < 0) {
+		nvme_rdma_qpair_destroy(qpair);
+		return NULL;
+	}
+
+	return qpair;
+}
+
+static int
+nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair)
+{
+	struct nvme_rdma_qpair *rqpair;
+
+	if (!qpair) {
+		return -1;
+	}
+	nvme_qpair_deinit(qpair);
+
+	rqpair = nvme_rdma_qpair(qpair);
+
+	nvme_rdma_unregister_mem(rqpair);
+	nvme_rdma_free_reqs(rqpair);
+	nvme_rdma_free_rsps(rqpair);
+
+	if (rqpair->cm_id) {
+		if (rqpair->cm_id->qp) {
+			rdma_destroy_qp(rqpair->cm_id);
+		}
+		rdma_destroy_id(rqpair->cm_id);
+	}
+
+	if (rqpair->cq) {
+		ibv_destroy_cq(rqpair->cq);
+	}
+
+	if (rqpair->cm_channel) {
+		rdma_destroy_event_channel(rqpair->cm_channel);
+	}
+
+	free(rqpair);
+
+	return 0;
+}
+
+struct spdk_nvme_qpair *
+nvme_rdma_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
+				const struct spdk_nvme_io_qpair_opts *opts)
+{
+	return nvme_rdma_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio,
+					    opts->io_queue_requests);
+}
+
+int
+nvme_rdma_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
+{
+	/* do nothing here */
+	return 0;
+}
+
+/* This function must only be called while holding g_spdk_nvme_driver->lock */
+int
+nvme_rdma_ctrlr_scan(const struct spdk_nvme_transport_id *discovery_trid,
+		     void *cb_ctx,
+		     spdk_nvme_probe_cb probe_cb,
+		     spdk_nvme_remove_cb remove_cb,
+		     bool direct_connect)
+{
+	struct spdk_nvme_ctrlr_opts discovery_opts;
+	struct spdk_nvme_ctrlr *discovery_ctrlr;
+	union spdk_nvme_cc_register cc;
+	int rc;
+	struct nvme_completion_poll_status status;
+
+	if (strcmp(discovery_trid->subnqn, SPDK_NVMF_DISCOVERY_NQN) != 0) {
+		/* It is not a discovery_ctrlr info and try to directly connect it */
+		rc = nvme_ctrlr_probe(discovery_trid, NULL, probe_cb, cb_ctx);
+		return rc;
+	}
+
+	spdk_nvme_ctrlr_get_default_ctrlr_opts(&discovery_opts, sizeof(discovery_opts));
+	/* For discovery_ctrlr set the timeout to 0 */
+	discovery_opts.keep_alive_timeout_ms = 0;
+
+	discovery_ctrlr = nvme_rdma_ctrlr_construct(discovery_trid, &discovery_opts, NULL);
+	if (discovery_ctrlr == NULL) {
+		return -1;
+	}
+
+	/* TODO: this should be using the normal NVMe controller initialization process */
+	cc.raw = 0;
+	cc.bits.en = 1;
+	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
+	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
+	rc = nvme_transport_ctrlr_set_reg_4(discovery_ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
+					    cc.raw);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to set cc\n");
+		nvme_ctrlr_destruct(discovery_ctrlr);
+		return -1;
+	}
+
+	/* get the cdata info */
+	rc = nvme_ctrlr_cmd_identify(discovery_ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
+				     &discovery_ctrlr->cdata, sizeof(discovery_ctrlr->cdata),
+				     nvme_completion_poll_cb, &status);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to identify cdata\n");
+		return rc;
+	}
+
+	if (spdk_nvme_wait_for_completion(discovery_ctrlr->adminq, &status)) {
+		SPDK_ERRLOG("nvme_identify_controller failed!\n");
+		return -ENXIO;
+	}
+
+	/* Direct attach through spdk_nvme_connect() API */
+	if (direct_connect == true) {
+		/* Set the ready state to skip the normal init process */
+		discovery_ctrlr->state = NVME_CTRLR_STATE_READY;
+		nvme_ctrlr_connected(discovery_ctrlr);
+		nvme_ctrlr_add_process(discovery_ctrlr, 0);
+		return 0;
+	}
+
+	rc = nvme_fabric_ctrlr_discover(discovery_ctrlr, cb_ctx, probe_cb);
+	nvme_ctrlr_destruct(discovery_ctrlr);
+	return rc;
+}
+
+struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
+		const struct spdk_nvme_ctrlr_opts *opts,
+		void *devhandle)
+{
+	struct nvme_rdma_ctrlr *rctrlr;
+	union spdk_nvme_cap_register cap;
+	union spdk_nvme_vs_register vs;
+	int rc;
+
+	rctrlr = calloc(1, sizeof(struct nvme_rdma_ctrlr));
+	if (rctrlr == NULL) {
+		SPDK_ERRLOG("could not allocate ctrlr\n");
+		return NULL;
+	}
+
+	rctrlr->ctrlr.trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
+	rctrlr->ctrlr.opts = *opts;
+	memcpy(&rctrlr->ctrlr.trid, trid, sizeof(rctrlr->ctrlr.trid));
+
+	rc = nvme_ctrlr_construct(&rctrlr->ctrlr);
+	if (rc != 0) {
+		free(rctrlr);
+		return NULL;
+	}
+
+	rctrlr->ctrlr.adminq = nvme_rdma_ctrlr_create_qpair(&rctrlr->ctrlr, 0,
+			       SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES, 0, SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES);
+	if (!rctrlr->ctrlr.adminq) {
+		SPDK_ERRLOG("failed to create admin qpair\n");
+		nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
+		return NULL;
+	}
+
+	if (nvme_ctrlr_get_cap(&rctrlr->ctrlr, &cap)) {
+		SPDK_ERRLOG("get_cap() failed\n");
+		nvme_ctrlr_destruct(&rctrlr->ctrlr);
+		return NULL;
+	}
+
+	if (nvme_ctrlr_get_vs(&rctrlr->ctrlr, &vs)) {
+		SPDK_ERRLOG("get_vs() failed\n");
+		nvme_ctrlr_destruct(&rctrlr->ctrlr);
+		return NULL;
+	}
+
+	if (nvme_ctrlr_add_process(&rctrlr->ctrlr, 0) != 0) {
+		SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n");
+		nvme_ctrlr_destruct(&rctrlr->ctrlr);
+		return NULL;
+	}
+
+	nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
+	return &rctrlr->ctrlr;
+}
+
+int
+nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
+{
+	struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr);
+
+	if (ctrlr->adminq) {
+		nvme_rdma_qpair_destroy(ctrlr->adminq);
+	}
+
+	nvme_ctrlr_destruct_finish(ctrlr);
+
+	free(rctrlr);
+
+	return 0;
+}
+
+int
+nvme_rdma_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
+{
+	return nvme_fabric_ctrlr_set_reg_4(ctrlr, offset, value);
+}
+
+int
+nvme_rdma_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
+{
+	return nvme_fabric_ctrlr_set_reg_8(ctrlr, offset, value);
+}
+
+int
+nvme_rdma_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
+{
+	return nvme_fabric_ctrlr_get_reg_4(ctrlr, offset, value);
+}
+
+int
+nvme_rdma_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
+{
+	return nvme_fabric_ctrlr_get_reg_8(ctrlr, offset, value);
+}
+
+int
+nvme_rdma_qpair_submit_request(struct spdk_nvme_qpair *qpair,
+			       struct nvme_request *req)
+{
+	struct nvme_rdma_qpair *rqpair;
+	struct spdk_nvme_rdma_req *rdma_req;
+	struct ibv_send_wr *wr, *bad_wr = NULL;
+	int rc;
+
+	rqpair = nvme_rdma_qpair(qpair);
+	assert(rqpair != NULL);
+	assert(req != NULL);
+
+	rdma_req = nvme_rdma_req_get(rqpair);
+	if (!rdma_req) {
+		/*
+		 * No rdma_req is available.  Queue the request to be processed later.
+		 */
+		STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
+		return 0;
+	}
+
+	if (nvme_rdma_req_init(rqpair, req, rdma_req)) {
+		SPDK_ERRLOG("nvme_rdma_req_init() failed\n");
+		nvme_rdma_req_put(rqpair, rdma_req);
+		return -1;
+	}
+
+	req->timed_out = false;
+	if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
+		req->submit_tick = spdk_get_ticks();
+	} else {
+		req->submit_tick = 0;
+	}
+
+	wr = &rdma_req->send_wr;
+
+	nvme_rdma_trace_ibv_sge(wr->sg_list);
+
+	rc = ibv_post_send(rqpair->cm_id->qp, wr, &bad_wr);
+	if (rc) {
+		SPDK_ERRLOG("Failure posting rdma send for NVMf completion: %d (%s)\n", rc, spdk_strerror(rc));
+	}
+
+	return rc;
+}
+
+int
+nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	return nvme_rdma_qpair_destroy(qpair);
+}
+
+int
+nvme_rdma_ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	return nvme_rdma_qpair_connect(nvme_rdma_qpair(qpair));
+}
+
+int
+nvme_rdma_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	/* Currently, doing nothing here */
+	return 0;
+}
+
+int
+nvme_rdma_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+	/* Currently, doing nothing here */
+	return 0;
+}
+
+int
+nvme_rdma_qpair_reset(struct spdk_nvme_qpair *qpair)
+{
+	/* Currently, doing nothing here */
+	return 0;
+}
+
+int
+nvme_rdma_qpair_fail(struct spdk_nvme_qpair *qpair)
+{
+	/* Currently, doing nothing here */
+	return 0;
+}
+
+static void
+nvme_rdma_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
+{
+	uint64_t t02;
+	struct spdk_nvme_rdma_req *rdma_req, *tmp;
+	struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
+	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
+	struct spdk_nvme_ctrlr_process *active_proc;
+
+	/* Don't check timeouts during controller initialization. */
+	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
+		return;
+	}
+
+	if (nvme_qpair_is_admin_queue(qpair)) {
+		active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+	} else {
+		active_proc = qpair->active_proc;
+	}
+
+	/* Only check timeouts if the current process has a timeout callback. */
+	if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
+		return;
+	}
+
+	t02 = spdk_get_ticks();
+	TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) {
+		assert(rdma_req->req != NULL);
+
+		if (nvme_request_check_timeout(rdma_req->req, rdma_req->id, active_proc, t02)) {
+			/*
+			 * The requests are in order, so as soon as one has not timed out,
+			 * stop iterating.
+			 */
+			break;
+		}
+	}
+}
+
+#define MAX_COMPLETIONS_PER_POLL 128
+
+int
+nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
+				    uint32_t max_completions)
+{
+	struct nvme_rdma_qpair	*rqpair = nvme_rdma_qpair(qpair);
+	struct ibv_wc		wc[MAX_COMPLETIONS_PER_POLL];
+	int			i, rc, batch_size;
+	uint32_t		reaped;
+	struct ibv_cq		*cq;
+
+	if (max_completions == 0) {
+		max_completions = rqpair->num_entries;
+	} else {
+		max_completions = spdk_min(max_completions, rqpair->num_entries);
+	}
+
+	cq = rqpair->cq;
+
+	reaped = 0;
+	do {
+		batch_size = spdk_min((max_completions - reaped),
+				      MAX_COMPLETIONS_PER_POLL);
+		rc = ibv_poll_cq(cq, batch_size, wc);
+		if (rc < 0) {
+			SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
+				    errno, spdk_strerror(errno));
+			return -1;
+		} else if (rc == 0) {
+			/* Ran out of completions */
+			break;
+		}
+
+		for (i = 0; i < rc; i++) {
+			if (wc[i].status) {
+				SPDK_ERRLOG("CQ error on Queue Pair %p, Response Index %lu (%d): %s\n",
+					    qpair, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
+				return -1;
+			}
+
+			switch (wc[i].opcode) {
+			case IBV_WC_RECV:
+				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CQ recv completion\n");
+
+				reaped++;
+
+				if (wc[i].byte_len < sizeof(struct spdk_nvme_cpl)) {
+					SPDK_ERRLOG("recv length %u less than expected response size\n", wc[i].byte_len);
+					return -1;
+				}
+
+				if (nvme_rdma_recv(rqpair, wc[i].wr_id)) {
+					SPDK_ERRLOG("nvme_rdma_recv processing failure\n");
+					return -1;
+				}
+				break;
+
+			case IBV_WC_SEND:
+				break;
+
+			default:
+				SPDK_ERRLOG("Received an unexpected opcode on the CQ: %d\n", wc[i].opcode);
+				return -1;
+			}
+		}
+	} while (reaped < max_completions);
+
+	if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
+		nvme_rdma_qpair_check_timeout(qpair);
+	}
+
+	return reaped;
+}
+
+uint32_t
+nvme_rdma_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
+{
+	/* Todo, which should get from the NVMF target */
+	return NVME_RDMA_RW_BUFFER_SIZE;
+}
+
+uint16_t
+nvme_rdma_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
+{
+	return spdk_min(ctrlr->cdata.nvmf_specific.msdbd, NVME_RDMA_MAX_SGL_DESCRIPTORS);
+}
+
+void *
+nvme_rdma_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
+{
+	return NULL;
+}
+
+int
+nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
+{
+	return 0;
+}
diff --git a/src/spdk/lib/nvme/nvme_transport.c b/src/spdk/lib/nvme/nvme_transport.c
new file mode 100644
index 00000000..56052a0f
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_transport.c
@@ -0,0 +1,219 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe transport abstraction
+ */
+
+#include "nvme_internal.h"
+
+#ifdef DEBUG
+static __attribute__((noreturn)) void
+nvme_transport_unknown(enum spdk_nvme_transport_type trtype)
+{
+	SPDK_ERRLOG("Unknown transport %d\n", (int)trtype);
+	abort();
+}
+#define TRANSPORT_DEFAULT(trtype)	default: nvme_transport_unknown(trtype);
+#else
+#define TRANSPORT_DEFAULT(trtype)
+#endif
+
+#define TRANSPORT_PCIE(func_name, args)	case SPDK_NVME_TRANSPORT_PCIE: return nvme_pcie_ ## func_name args;
+#ifdef SPDK_CONFIG_RDMA
+#define TRANSPORT_FABRICS_RDMA(func_name, args)	case SPDK_NVME_TRANSPORT_RDMA: return nvme_rdma_ ## func_name args;
+#define TRANSPORT_RDMA_AVAILABLE		true
+#else
+#define TRANSPORT_FABRICS_RDMA(func_name, args)	case SPDK_NVME_TRANSPORT_RDMA: SPDK_UNREACHABLE();
+#define TRANSPORT_RDMA_AVAILABLE		false
+#endif
+#define TRANSPORT_FABRICS_FC(func_name, args)	case SPDK_NVME_TRANSPORT_FC: SPDK_UNREACHABLE();
+#define NVME_TRANSPORT_CALL(trtype, func_name, args)		\
+	do {							\
+		switch (trtype) {				\
+		TRANSPORT_PCIE(func_name, args)			\
+		TRANSPORT_FABRICS_RDMA(func_name, args)		\
+		TRANSPORT_FABRICS_FC(func_name, args)		\
+		TRANSPORT_DEFAULT(trtype)			\
+		}						\
+		SPDK_UNREACHABLE();				\
+	} while (0)
+
+bool
+spdk_nvme_transport_available(enum spdk_nvme_transport_type trtype)
+{
+	switch (trtype) {
+	case SPDK_NVME_TRANSPORT_PCIE:
+		return true;
+
+	case SPDK_NVME_TRANSPORT_RDMA:
+		return TRANSPORT_RDMA_AVAILABLE;
+
+	case SPDK_NVME_TRANSPORT_FC:
+		return false;
+	}
+
+	return false;
+}
+
+struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
+		const struct spdk_nvme_ctrlr_opts *opts,
+		void *devhandle)
+{
+	NVME_TRANSPORT_CALL(trid->trtype, ctrlr_construct, (trid, opts, devhandle));
+}
+
+int
+nvme_transport_ctrlr_scan(const struct spdk_nvme_transport_id *trid,
+			  void *cb_ctx,
+			  spdk_nvme_probe_cb probe_cb,
+			  spdk_nvme_remove_cb remove_cb,
+			  bool direct_connect)
+{
+	NVME_TRANSPORT_CALL(trid->trtype, ctrlr_scan, (trid, cb_ctx, probe_cb, remove_cb, direct_connect));
+}
+
+int
+nvme_transport_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_destruct, (ctrlr));
+}
+
+int
+nvme_transport_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_enable, (ctrlr));
+}
+
+int
+nvme_transport_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_set_reg_4, (ctrlr, offset, value));
+}
+
+int
+nvme_transport_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_set_reg_8, (ctrlr, offset, value));
+}
+
+int
+nvme_transport_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_get_reg_4, (ctrlr, offset, value));
+}
+
+int
+nvme_transport_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_get_reg_8, (ctrlr, offset, value));
+}
+
+uint32_t
+nvme_transport_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_get_max_xfer_size, (ctrlr));
+}
+
+uint16_t
+nvme_transport_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_get_max_sges, (ctrlr));
+}
+
+void *
+nvme_transport_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_alloc_cmb_io_buffer, (ctrlr, size));
+}
+
+int
+nvme_transport_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_free_cmb_io_buffer, (ctrlr, buf, size));
+}
+
+struct spdk_nvme_qpair *
+nvme_transport_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
+				     const struct spdk_nvme_io_qpair_opts *opts)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_create_io_qpair, (ctrlr, qid, opts));
+}
+
+int
+nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_delete_io_qpair, (ctrlr, qpair));
+}
+
+int
+nvme_transport_ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(ctrlr->trid.trtype, ctrlr_reinit_io_qpair, (ctrlr, qpair));
+}
+
+int
+nvme_transport_qpair_enable(struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_enable, (qpair));
+}
+
+int
+nvme_transport_qpair_disable(struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_disable, (qpair));
+}
+
+int
+nvme_transport_qpair_reset(struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_reset, (qpair));
+}
+
+int
+nvme_transport_qpair_fail(struct spdk_nvme_qpair *qpair)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_fail, (qpair));
+}
+
+int
+nvme_transport_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_submit_request, (qpair, req));
+}
+
+int32_t
+nvme_transport_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
+{
+	NVME_TRANSPORT_CALL(qpair->trtype, qpair_process_completions, (qpair, max_completions));
+}
diff --git a/src/spdk/lib/nvme/nvme_uevent.c b/src/spdk/lib/nvme/nvme_uevent.c
new file mode 100644
index 00000000..724cbc5c
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_uevent.c
@@ -0,0 +1,214 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+
+#include "spdk/log.h"
+#include "spdk/event.h"
+
+#include "nvme_uevent.h"
+
+#ifdef __linux__
+
+#include <linux/netlink.h>
+
+#define SPDK_UEVENT_MSG_LEN 4096
+
+int
+spdk_uevent_connect(void)
+{
+	struct sockaddr_nl addr;
+	int netlink_fd;
+	int size = 64 * 1024;
+	int flag;
+
+	memset(&addr, 0, sizeof(addr));
+	addr.nl_family = AF_NETLINK;
+	addr.nl_pid = getpid();
+	addr.nl_groups = 0xffffffff;
+
+	netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
+	if (netlink_fd < 0) {
+		return -1;
+	}
+
+	setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size));
+
+	flag = fcntl(netlink_fd, F_GETFL);
+	if (fcntl(netlink_fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n", netlink_fd,
+			    spdk_strerror(errno));
+		close(netlink_fd);
+		return -1;
+	}
+
+	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+		close(netlink_fd);
+		return -1;
+	}
+	return netlink_fd;
+}
+
+/* Note: We only parse the event from uio subsystem and will ignore
+ *       all the event from other subsystem. the event from uio subsystem
+ *       as below:
+ *       action: "add" or "remove"
+ *       subsystem: "uio"
+ *       dev_path: "/devices/pci0000:80/0000:80:01.0/0000:81:00.0/uio/uio0"
+ */
+static int
+parse_event(const char *buf, struct spdk_uevent *event)
+{
+	char action[SPDK_UEVENT_MSG_LEN];
+	char subsystem[SPDK_UEVENT_MSG_LEN];
+	char dev_path[SPDK_UEVENT_MSG_LEN];
+	char driver[SPDK_UEVENT_MSG_LEN];
+	char vfio_pci_addr[SPDK_UEVENT_MSG_LEN];
+
+	memset(action, 0, SPDK_UEVENT_MSG_LEN);
+	memset(subsystem, 0, SPDK_UEVENT_MSG_LEN);
+	memset(dev_path, 0, SPDK_UEVENT_MSG_LEN);
+	memset(driver, 0, SPDK_UEVENT_MSG_LEN);
+	memset(vfio_pci_addr, 0, SPDK_UEVENT_MSG_LEN);
+
+	while (*buf) {
+		if (!strncmp(buf, "ACTION=", 7)) {
+			buf += 7;
+			snprintf(action, sizeof(action), "%s", buf);
+		} else if (!strncmp(buf, "DEVPATH=", 8)) {
+			buf += 8;
+			snprintf(dev_path, sizeof(dev_path), "%s", buf);
+		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+			buf += 10;
+			snprintf(subsystem, sizeof(subsystem), "%s", buf);
+		} else if (!strncmp(buf, "DRIVER=", 7)) {
+			buf += 7;
+			snprintf(driver, sizeof(driver), "%s", buf);
+		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
+			buf += 14;
+			snprintf(vfio_pci_addr, sizeof(vfio_pci_addr), "%s", buf);
+		}
+		while (*buf++)
+			;
+	}
+
+	if (!strncmp(subsystem, "uio", 3)) {
+		char *pci_address, *tmp;
+		struct spdk_pci_addr pci_addr;
+
+		event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_UIO;
+		if (!strncmp(action, "add", 3)) {
+			event->action = SPDK_NVME_UEVENT_ADD;
+		}
+		if (!strncmp(action, "remove", 6)) {
+			event->action = SPDK_NVME_UEVENT_REMOVE;
+		}
+		tmp = strstr(dev_path, "/uio/");
+
+		memset(tmp, 0, SPDK_UEVENT_MSG_LEN - (tmp - dev_path));
+
+		pci_address = strrchr(dev_path, '/');
+		pci_address++;
+		if (spdk_pci_addr_parse(&pci_addr, pci_address) != 0) {
+			SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", pci_address);
+			return -1;
+		}
+		spdk_pci_addr_fmt(event->traddr, sizeof(event->traddr), &pci_addr);
+		return 1;
+	}
+	if (!strncmp(driver, "vfio-pci", 8)) {
+		struct spdk_pci_addr pci_addr;
+
+		event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_VFIO;
+		if (!strncmp(action, "add", 3)) {
+			event->action = SPDK_NVME_UEVENT_ADD;
+		}
+		if (!strncmp(action, "remove", 6)) {
+			event->action = SPDK_NVME_UEVENT_REMOVE;
+		}
+		if (spdk_pci_addr_parse(&pci_addr, vfio_pci_addr) != 0) {
+			SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", vfio_pci_addr);
+			return -1;
+		}
+		spdk_pci_addr_fmt(event->traddr, sizeof(event->traddr), &pci_addr);
+		return 1;
+
+	}
+	return -1;
+}
+
+int
+spdk_get_uevent(int fd, struct spdk_uevent *uevent)
+{
+	int ret;
+	char buf[SPDK_UEVENT_MSG_LEN];
+
+	memset(uevent, 0, sizeof(struct spdk_uevent));
+	memset(buf, 0, SPDK_UEVENT_MSG_LEN);
+
+	ret = recv(fd, buf, SPDK_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
+	if (ret > 0) {
+		return parse_event(buf, uevent);
+	}
+
+	if (ret < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+			return 0;
+		} else {
+			SPDK_ERRLOG("Socket read error(%d): %s\n", errno, spdk_strerror(errno));
+			return -1;
+		}
+	}
+
+	/* connection closed */
+	if (ret == 0) {
+		return -1;
+	}
+	return 0;
+}
+
+#else /* Not Linux */
+
+int
+spdk_uevent_connect(void)
+{
+	return -1;
+}
+
+int
+spdk_get_uevent(int fd, struct spdk_uevent *uevent)
+{
+	return -1;
+}
+#endif
diff --git a/src/spdk/lib/nvme/nvme_uevent.h b/src/spdk/lib/nvme/nvme_uevent.h
new file mode 100644
index 00000000..7fe0ab7a
--- /dev/null
+++ b/src/spdk/lib/nvme/nvme_uevent.h
@@ -0,0 +1,61 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * SPDK uevent
+ */
+
+#include "spdk/env.h"
+#include "spdk/nvmf_spec.h"
+
+#ifndef SPDK_UEVENT_H_
+#define SPDK_UEVENT_H_
+
+#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1
+#define SPDK_NVME_UEVENT_SUBSYSTEM_VFIO 2
+
+enum spdk_nvme_uevent_action {
+	SPDK_NVME_UEVENT_ADD = 0,
+	SPDK_NVME_UEVENT_REMOVE = 1,
+};
+
+struct spdk_uevent {
+	enum spdk_nvme_uevent_action action;
+	int subsystem;
+	char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
+};
+
+int spdk_uevent_connect(void);
+int spdk_get_uevent(int fd, struct spdk_uevent *uevent);
+
+#endif /* SPDK_UEVENT_H_ */
diff --git a/src/spdk/lib/nvmf/Makefile b/src/spdk/lib/nvmf/Makefile
new file mode 100644
index 00000000..8f299a90
--- /dev/null
+++ b/src/spdk/lib/nvmf/Makefile
@@ -0,0 +1,63 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = ctrlr.c ctrlr_discovery.c ctrlr_bdev.c \
+	 subsystem.c nvmf.c \
+	 request.c transport.c
+
+C_SRCS-$(CONFIG_RDMA) += rdma.c
+LIBNAME = nvmf
+LOCAL_SYS_LIBS = -luuid
+ifeq ($(CONFIG_RDMA),y)
+LOCAL_SYS_LIBS += -libverbs -lrdmacm
+#Attach only if FreeBSD and RDMA is specified with configure
+ifeq ($(OS),FreeBSD)
+# Mellanox - MLX4 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx4.*)","")
+LOCAL_SYS_LIBS += -lmlx4
+endif
+# Mellanox - MLX5 HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libmlx5.*)","")
+LOCAL_SYS_LIBS += -lmlx5
+endif
+# Chelsio HBA Userspace Library
+ifneq ("$(wildcard /usr/lib/libcxgb4.*)","")
+LOCAL_SYS_LIBS += -lcxgb4
+endif
+endif
+endif
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/nvmf/ctrlr.c b/src/spdk/lib/nvmf/ctrlr.c
new file mode 100644
index 00000000..ed5e68f0
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr.c
@@ -0,0 +1,1773 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/bit_array.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/trace.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/version.h"
+
+#include "spdk_internal/log.h"
+
+#define MIN_KEEP_ALIVE_TIMEOUT 10000
+
+#define MODEL_NUMBER "SPDK bdev Controller"
+
+/*
+ * Report the SPDK version as the firmware revision.
+ * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
+ */
+#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
+
+static inline void
+spdk_nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
+				   uint8_t iattr, uint16_t ipo)
+{
+	rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+	rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+	rsp->status_code_specific.invalid.iattr = iattr;
+	rsp->status_code_specific.invalid.ipo = ipo;
+}
+
+#define SPDK_NVMF_INVALID_CONNECT_CMD(rsp, field)	\
+	spdk_nvmf_invalid_connect_response(rsp, 0, offsetof(struct spdk_nvmf_fabric_connect_cmd, field))
+#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field)	\
+	spdk_nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
+
+static void
+ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
+			       struct spdk_nvmf_ctrlr *ctrlr,
+			       struct spdk_nvmf_fabric_connect_rsp *rsp)
+{
+	assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());
+
+	/* check if we would exceed ctrlr connection limit */
+	if (qpair->qid >= spdk_bit_array_capacity(ctrlr->qpair_mask)) {
+		SPDK_ERRLOG("Requested QID %u but Max QID is %u\n",
+			    qpair->qid, spdk_bit_array_capacity(ctrlr->qpair_mask) - 1);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+		return;
+	}
+
+	if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
+		SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+		return;
+	}
+
+	qpair->ctrlr = ctrlr;
+	spdk_bit_array_set(ctrlr->qpair_mask, qpair->qid);
+
+	rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+	rsp->status_code_specific.success.cntlid = ctrlr->cntlid;
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "connect capsule response: cntlid = 0x%04x\n",
+		      rsp->status_code_specific.success.cntlid);
+}
+
+static void
+_spdk_nvmf_request_complete(void *ctx)
+{
+	struct spdk_nvmf_request *req = ctx;
+
+	spdk_nvmf_request_complete(req);
+}
+
+static void
+_spdk_nvmf_ctrlr_add_admin_qpair(void *ctx)
+{
+	struct spdk_nvmf_request *req = ctx;
+	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+	ctrlr->admin_qpair = qpair;
+	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+	spdk_nvmf_request_complete(req);
+}
+
+static void
+_spdk_nvmf_subsystem_add_ctrlr(void *ctx)
+{
+	struct spdk_nvmf_request *req = ctx;
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+	if (spdk_nvmf_subsystem_add_ctrlr(ctrlr->subsys, ctrlr)) {
+		SPDK_ERRLOG("Unable to add controller to subsystem\n");
+		free(ctrlr);
+		qpair->ctrlr = NULL;
+		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+		return;
+	}
+
+	spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_add_admin_qpair, req);
+}
+
+static struct spdk_nvmf_ctrlr *
+spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
+		       struct spdk_nvmf_request *req,
+		       struct spdk_nvmf_fabric_connect_cmd *connect_cmd,
+		       struct spdk_nvmf_fabric_connect_data *connect_data)
+{
+	struct spdk_nvmf_ctrlr	*ctrlr;
+	struct spdk_nvmf_transport *transport;
+
+	ctrlr = calloc(1, sizeof(*ctrlr));
+	if (ctrlr == NULL) {
+		SPDK_ERRLOG("Memory allocation failed\n");
+		return NULL;
+	}
+
+	req->qpair->ctrlr = ctrlr;
+	ctrlr->subsys = subsystem;
+	ctrlr->thread = req->qpair->group->thread;
+
+	transport = req->qpair->transport;
+	ctrlr->qpair_mask = spdk_bit_array_create(transport->opts.max_qpairs_per_ctrlr);
+	if (!ctrlr->qpair_mask) {
+		SPDK_ERRLOG("Failed to allocate controller qpair mask\n");
+		free(ctrlr);
+		return NULL;
+	}
+
+	ctrlr->feat.keep_alive_timer.bits.kato = connect_cmd->kato;
+	ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
+	ctrlr->feat.volatile_write_cache.bits.wce = 1;
+
+	/* Subtract 1 for admin queue, 1 for 0's based */
+	ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+			1;
+	ctrlr->feat.number_of_queues.bits.nsqr = transport->opts.max_qpairs_per_ctrlr - 1 -
+			1;
+
+	memcpy(ctrlr->hostid, connect_data->hostid, sizeof(ctrlr->hostid));
+
+	ctrlr->vcprop.cap.raw = 0;
+	ctrlr->vcprop.cap.bits.cqr = 1; /* NVMe-oF specification required */
+	ctrlr->vcprop.cap.bits.mqes = transport->opts.max_queue_depth -
+				      1; /* max queue depth */
+	ctrlr->vcprop.cap.bits.ams = 0; /* optional arb mechanisms */
+	ctrlr->vcprop.cap.bits.to = 1; /* ready timeout - 500 msec units */
+	ctrlr->vcprop.cap.bits.dstrd = 0; /* fixed to 0 for NVMe-oF */
+	ctrlr->vcprop.cap.bits.css = SPDK_NVME_CAP_CSS_NVM; /* NVM command set */
+	ctrlr->vcprop.cap.bits.mpsmin = 0; /* 2 ^ (12 + mpsmin) == 4k */
+	ctrlr->vcprop.cap.bits.mpsmax = 0; /* 2 ^ (12 + mpsmax) == 4k */
+
+	/* Version Supported: 1.3 */
+	ctrlr->vcprop.vs.bits.mjr = 1;
+	ctrlr->vcprop.vs.bits.mnr = 3;
+	ctrlr->vcprop.vs.bits.ter = 0;
+
+	ctrlr->vcprop.cc.raw = 0;
+	ctrlr->vcprop.cc.bits.en = 0; /* Init controller disabled */
+
+	ctrlr->vcprop.csts.raw = 0;
+	ctrlr->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cap 0x%" PRIx64 "\n", ctrlr->vcprop.cap.raw);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "vs 0x%x\n", ctrlr->vcprop.vs.raw);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cc 0x%x\n", ctrlr->vcprop.cc.raw);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "csts 0x%x\n", ctrlr->vcprop.csts.raw);
+
+	spdk_thread_send_msg(subsystem->thread, _spdk_nvmf_subsystem_add_ctrlr, req);
+
+	return ctrlr;
+}
+
+void
+spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	spdk_nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
+
+	free(ctrlr);
+}
+
+static void
+spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+	struct spdk_nvmf_request *req = ctx;
+	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+	/* Unit test will check qpair->ctrlr after calling spdk_nvmf_ctrlr_connect.
+	  * For error case, the value should be NULL. So set it to NULL at first.
+	  */
+	qpair->ctrlr = NULL;
+
+	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+		SPDK_ERRLOG("I/O connect not allowed on discovery controller\n");
+		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+		goto end;
+	}
+
+	if (!ctrlr->vcprop.cc.bits.en) {
+		SPDK_ERRLOG("Got I/O connect before ctrlr was enabled\n");
+		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+		goto end;
+	}
+
+	if (1u << ctrlr->vcprop.cc.bits.iosqes != sizeof(struct spdk_nvme_cmd)) {
+		SPDK_ERRLOG("Got I/O connect with invalid IOSQES %u\n",
+			    ctrlr->vcprop.cc.bits.iosqes);
+		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+		goto end;
+	}
+
+	if (1u << ctrlr->vcprop.cc.bits.iocqes != sizeof(struct spdk_nvme_cpl)) {
+		SPDK_ERRLOG("Got I/O connect with invalid IOCQES %u\n",
+			    ctrlr->vcprop.cc.bits.iocqes);
+		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, qid);
+		goto end;
+	}
+
+	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
+
+end:
+	spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+}
+
+static void
+_spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
+{
+	struct spdk_nvmf_request *req = ctx;
+	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+	struct spdk_nvmf_fabric_connect_data *data = req->data;
+	struct spdk_nvmf_ctrlr *ctrlr;
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_qpair *admin_qpair;
+	struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect I/O Queue for controller id 0x%x\n", data->cntlid);
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, data->subnqn);
+	/* We already checked this in spdk_nvmf_ctrlr_connect */
+	assert(subsystem != NULL);
+
+	ctrlr = spdk_nvmf_subsystem_get_ctrlr(subsystem, data->cntlid);
+	if (ctrlr == NULL) {
+		SPDK_ERRLOG("Unknown controller ID 0x%x\n", data->cntlid);
+		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+		spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
+		return;
+	}
+
+	admin_qpair = ctrlr->admin_qpair;
+	qpair->ctrlr = ctrlr;
+	spdk_thread_send_msg(admin_qpair->group->thread, spdk_nvmf_ctrlr_add_io_qpair, req);
+}
+
+static int
+spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_fabric_connect_data *data = req->data;
+	struct spdk_nvmf_fabric_connect_cmd *cmd = &req->cmd->connect_cmd;
+	struct spdk_nvmf_fabric_connect_rsp *rsp = &req->rsp->connect_rsp;
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_tgt *tgt = qpair->transport->tgt;
+	struct spdk_nvmf_ctrlr *ctrlr;
+	struct spdk_nvmf_subsystem *subsystem;
+	const char *subnqn, *hostnqn;
+	struct spdk_nvme_transport_id listen_trid = {};
+	void *end;
+
+	if (req->length < sizeof(struct spdk_nvmf_fabric_connect_data)) {
+		SPDK_ERRLOG("Connect command data length 0x%x too small\n", req->length);
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "recfmt 0x%x qid %u sqsize %u\n",
+		      cmd->recfmt, cmd->qid, cmd->sqsize);
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect data:\n");
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "  cntlid:  0x%04x\n", data->cntlid);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "  hostid: %08x-%04x-%04x-%02x%02x-%04x%08x ***\n",
+		      ntohl(*(uint32_t *)&data->hostid[0]),
+		      ntohs(*(uint16_t *)&data->hostid[4]),
+		      ntohs(*(uint16_t *)&data->hostid[6]),
+		      data->hostid[8],
+		      data->hostid[9],
+		      ntohs(*(uint16_t *)&data->hostid[10]),
+		      ntohl(*(uint32_t *)&data->hostid[12]));
+
+	if (cmd->recfmt != 0) {
+		SPDK_ERRLOG("Connect command unsupported RECFMT %u\n", cmd->recfmt);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	/* Ensure that subnqn is null terminated */
+	end = memchr(data->subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1);
+	if (!end) {
+		SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
+		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+	subnqn = data->subnqn;
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "  subnqn: \"%s\"\n", subnqn);
+
+	subsystem = spdk_nvmf_tgt_find_subsystem(tgt, subnqn);
+	if (subsystem == NULL) {
+		SPDK_ERRLOG("Could not find subsystem '%s'\n", subnqn);
+		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, subnqn);
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	/* Ensure that hostnqn is null terminated */
+	end = memchr(data->hostnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1);
+	if (!end) {
+		SPDK_ERRLOG("Connect HOSTNQN is not null terminated\n");
+		SPDK_NVMF_INVALID_CONNECT_DATA(rsp, hostnqn);
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+	hostnqn = data->hostnqn;
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "  hostnqn: \"%s\"\n", hostnqn);
+
+	if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
+		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s'\n", subnqn, hostnqn);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (spdk_nvmf_qpair_get_listen_trid(qpair, &listen_trid)) {
+		SPDK_ERRLOG("Subsystem '%s' is unable to enforce access control due to an internal error.\n",
+			    subnqn);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (!spdk_nvmf_subsystem_listener_allowed(subsystem, &listen_trid)) {
+		SPDK_ERRLOG("Subsystem '%s' does not allow host '%s' to connect at this address.\n", subnqn,
+			    hostnqn);
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_HOST;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	/*
+	 * SQSIZE is a 0-based value, so it must be at least 1 (minimum queue depth is 2) and
+	 *  strictly less than max_queue_depth.
+	 */
+	if (cmd->sqsize == 0 || cmd->sqsize >= qpair->transport->opts.max_queue_depth) {
+		SPDK_ERRLOG("Invalid SQSIZE %u (min 1, max %u)\n",
+			    cmd->sqsize, qpair->transport->opts.max_queue_depth - 1);
+		SPDK_NVMF_INVALID_CONNECT_CMD(rsp, sqsize);
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+	qpair->sq_head_max = cmd->sqsize;
+	qpair->qid = cmd->qid;
+
+	if (cmd->qid == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Connect Admin Queue for controller ID 0x%x\n", data->cntlid);
+
+		if (data->cntlid != 0xFFFF) {
+			/* This NVMf target only supports dynamic mode. */
+			SPDK_ERRLOG("The NVMf target only supports dynamic mode (CNTLID = 0x%x).\n", data->cntlid);
+			SPDK_NVMF_INVALID_CONNECT_DATA(rsp, cntlid);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		}
+
+		/* Establish a new ctrlr */
+		ctrlr = spdk_nvmf_ctrlr_create(subsystem, req, cmd, data);
+		if (!ctrlr) {
+			SPDK_ERRLOG("spdk_nvmf_ctrlr_create() failed\n");
+			rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		} else {
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+	} else {
+		spdk_thread_send_msg(subsystem->thread, _spdk_nvmf_ctrlr_add_io_qpair, req);
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+	}
+}
+
+static uint64_t
+nvmf_prop_get_cap(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return ctrlr->vcprop.cap.raw;
+}
+
+static uint64_t
+nvmf_prop_get_vs(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return ctrlr->vcprop.vs.raw;
+}
+
+static uint64_t
+nvmf_prop_get_cc(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return ctrlr->vcprop.cc.raw;
+}
+
+static bool
+nvmf_prop_set_cc(struct spdk_nvmf_ctrlr *ctrlr, uint64_t value)
+{
+	union spdk_nvme_cc_register cc, diff;
+
+	cc.raw = (uint32_t)value;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cur CC: 0x%08x\n", ctrlr->vcprop.cc.raw);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "new CC: 0x%08x\n", cc.raw);
+
+	/*
+	 * Calculate which bits changed between the current and new CC.
+	 * Mark each bit as 0 once it is handled to determine if any unhandled bits were changed.
+	 */
+	diff.raw = cc.raw ^ ctrlr->vcprop.cc.raw;
+
+	if (diff.bits.en) {
+		if (cc.bits.en) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Enable!\n");
+			ctrlr->vcprop.cc.bits.en = 1;
+			ctrlr->vcprop.csts.bits.rdy = 1;
+		} else {
+			SPDK_ERRLOG("CC.EN transition from 1 to 0 (reset) not implemented!\n");
+
+		}
+		diff.bits.en = 0;
+	}
+
+	if (diff.bits.shn) {
+		if (cc.bits.shn == SPDK_NVME_SHN_NORMAL ||
+		    cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Property Set CC Shutdown %u%ub!\n",
+				      cc.bits.shn >> 1, cc.bits.shn & 1);
+			ctrlr->vcprop.cc.bits.shn = cc.bits.shn;
+			ctrlr->vcprop.cc.bits.en = 0;
+			ctrlr->vcprop.csts.bits.rdy = 0;
+			ctrlr->vcprop.csts.bits.shst = SPDK_NVME_SHST_COMPLETE;
+		} else if (cc.bits.shn == 0) {
+			ctrlr->vcprop.cc.bits.shn = 0;
+		} else {
+			SPDK_ERRLOG("Prop Set CC: Invalid SHN value %u%ub\n",
+				    cc.bits.shn >> 1, cc.bits.shn & 1);
+			return false;
+		}
+		diff.bits.shn = 0;
+	}
+
+	if (diff.bits.iosqes) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOSQES = %u (%u bytes)\n",
+			      cc.bits.iosqes, 1u << cc.bits.iosqes);
+		ctrlr->vcprop.cc.bits.iosqes = cc.bits.iosqes;
+		diff.bits.iosqes = 0;
+	}
+
+	if (diff.bits.iocqes) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Prop Set IOCQES = %u (%u bytes)\n",
+			      cc.bits.iocqes, 1u << cc.bits.iocqes);
+		ctrlr->vcprop.cc.bits.iocqes = cc.bits.iocqes;
+		diff.bits.iocqes = 0;
+	}
+
+	if (diff.raw != 0) {
+		SPDK_ERRLOG("Prop Set CC toggled reserved bits 0x%x!\n", diff.raw);
+		return false;
+	}
+
+	return true;
+}
+
+static uint64_t
+nvmf_prop_get_csts(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return ctrlr->vcprop.csts.raw;
+}
+
+struct nvmf_prop {
+	uint32_t ofst;
+	uint8_t size;
+	char name[11];
+	uint64_t (*get_cb)(struct spdk_nvmf_ctrlr *ctrlr);
+	bool (*set_cb)(struct spdk_nvmf_ctrlr *ctrlr, uint64_t value);
+};
+
+#define PROP(field, size, get_cb, set_cb) \
+	{ \
+		offsetof(struct spdk_nvme_registers, field), \
+		SPDK_NVMF_PROP_SIZE_##size, \
+		#field, \
+		get_cb, set_cb \
+	}
+
+static const struct nvmf_prop nvmf_props[] = {
+	PROP(cap,  8, nvmf_prop_get_cap,  NULL),
+	PROP(vs,   4, nvmf_prop_get_vs,   NULL),
+	PROP(cc,   4, nvmf_prop_get_cc,   nvmf_prop_set_cc),
+	PROP(csts, 4, nvmf_prop_get_csts, NULL),
+};
+
+static const struct nvmf_prop *
+find_prop(uint32_t ofst)
+{
+	size_t i;
+
+	for (i = 0; i < SPDK_COUNTOF(nvmf_props); i++) {
+		const struct nvmf_prop *prop = &nvmf_props[i];
+
+		if (prop->ofst == ofst) {
+			return prop;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+spdk_nvmf_property_get(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvmf_fabric_prop_get_cmd *cmd = &req->cmd->prop_get_cmd;
+	struct spdk_nvmf_fabric_prop_get_rsp *response = &req->rsp->prop_get_rsp;
+	const struct nvmf_prop *prop;
+
+	response->status.sc = 0;
+	response->value.u64 = 0;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x\n",
+		      cmd->attrib.size, cmd->ofst);
+
+	if (cmd->attrib.size != SPDK_NVMF_PROP_SIZE_4 &&
+	    cmd->attrib.size != SPDK_NVMF_PROP_SIZE_8) {
+		SPDK_ERRLOG("Invalid size value %d\n", cmd->attrib.size);
+		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	prop = find_prop(cmd->ofst);
+	if (prop == NULL || prop->get_cb == NULL) {
+		/* Reserved properties return 0 when read */
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+	if (cmd->attrib.size != prop->size) {
+		SPDK_ERRLOG("offset 0x%x size mismatch: cmd %u, prop %u\n",
+			    cmd->ofst, cmd->attrib.size, prop->size);
+		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	response->value.u64 = prop->get_cb(ctrlr);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "response value: 0x%" PRIx64 "\n", response->value.u64);
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_property_set(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+	const struct nvmf_prop *prop;
+	uint64_t value;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
+		      cmd->attrib.size, cmd->ofst, cmd->value.u64);
+
+	prop = find_prop(cmd->ofst);
+	if (prop == NULL || prop->set_cb == NULL) {
+		SPDK_ERRLOG("Invalid offset 0x%x\n", cmd->ofst);
+		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "name: %s\n", prop->name);
+	if (cmd->attrib.size != prop->size) {
+		SPDK_ERRLOG("offset 0x%x size mismatch: cmd %u, prop %u\n",
+			    cmd->ofst, cmd->attrib.size, prop->size);
+		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	value = cmd->value.u64;
+	if (prop->size == SPDK_NVMF_PROP_SIZE_4) {
+		value = (uint32_t)value;
+	}
+
+	if (!prop->set_cb(ctrlr, value)) {
+		SPDK_ERRLOG("prop set_cb failed\n");
+		response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_arbitration(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Arbitration (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+	ctrlr->feat.arbitration.raw = cmd->cdw11;
+	ctrlr->feat.arbitration.bits.reserved = 0;
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_power_management(struct spdk_nvmf_request *req)
+{
+	union spdk_nvme_feat_power_management opts;
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Power Management (cdw11 = 0x%0x)\n", cmd->cdw11);
+	opts.raw = cmd->cdw11;
+
+	/* Only PS = 0 is allowed, since we report NPSS = 0 */
+	if (opts.bits.ps != 0) {
+		SPDK_ERRLOG("Invalid power state %u\n", opts.bits.ps);
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	ctrlr->feat.power_management.raw = cmd->cdw11;
+	ctrlr->feat.power_management.bits.reserved = 0;
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static bool
+temp_threshold_opts_valid(const union spdk_nvme_feat_temperature_threshold *opts)
+{
+	/*
+	 * Valid TMPSEL values:
+	 *  0000b - 1000b: temperature sensors
+	 *  1111b: set all implemented temperature sensors
+	 */
+	if (opts->bits.tmpsel >= 9 && opts->bits.tmpsel != 15) {
+		/* 1001b - 1110b: reserved */
+		SPDK_ERRLOG("Invalid TMPSEL %u\n", opts->bits.tmpsel);
+		return false;
+	}
+
+	/*
+	 * Valid THSEL values:
+	 *  00b: over temperature threshold
+	 *  01b: under temperature threshold
+	 */
+	if (opts->bits.thsel > 1) {
+		/* 10b - 11b: reserved */
+		SPDK_ERRLOG("Invalid THSEL %u\n", opts->bits.thsel);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+	union spdk_nvme_feat_temperature_threshold opts;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+	opts.raw = cmd->cdw11;
+
+	if (!temp_threshold_opts_valid(&opts)) {
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	/* TODO: no sensors implemented - ignore new values */
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features_temperature_threshold(struct spdk_nvmf_request *req)
+{
+	union spdk_nvme_feat_temperature_threshold opts;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Temperature Threshold (cdw11 = 0x%0x)\n", cmd->cdw11);
+	opts.raw = cmd->cdw11;
+
+	if (!temp_threshold_opts_valid(&opts)) {
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	/* TODO: no sensors implemented - return 0 for all thresholds */
+	rsp->cdw0 = 0;
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_error_recovery(struct spdk_nvmf_request *req)
+{
+	union spdk_nvme_feat_error_recovery opts;
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Error Recovery (cdw11 = 0x%0x)\n", cmd->cdw11);
+	opts.raw = cmd->cdw11;
+
+	if (opts.bits.dulbe) {
+		/*
+		 * Host is not allowed to set this bit, since we don't advertise it in
+		 * Identify Namespace.
+		 */
+		SPDK_ERRLOG("Host set unsupported DULBE bit\n");
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	ctrlr->feat.error_recovery.raw = cmd->cdw11;
+	ctrlr->feat.error_recovery.bits.reserved = 0;
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_volatile_write_cache(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+	ctrlr->feat.volatile_write_cache.raw = cmd->cdw11;
+	ctrlr->feat.volatile_write_cache.bits.reserved = 0;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Volatile Write Cache %s\n",
+		      ctrlr->feat.volatile_write_cache.bits.wce ? "Enabled" : "Disabled");
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_write_atomicity(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Write Atomicity (cdw11 = 0x%0x)\n", cmd->cdw11);
+
+	ctrlr->feat.write_atomicity.raw = cmd->cdw11;
+	ctrlr->feat.write_atomicity.bits.reserved = 0;
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_host_identifier(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+	SPDK_ERRLOG("Set Features - Host Identifier not allowed\n");
+	response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features_host_identifier(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+	union spdk_nvme_feat_host_identifier opts;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get Features - Host Identifier\n");
+
+	opts.raw = cmd->cdw11;
+	if (!opts.bits.exhid) {
+		/* NVMe over Fabrics requires EXHID=1 (128-bit/16-byte host ID) */
+		SPDK_ERRLOG("Get Features - Host Identifier with EXHID=0 not allowed\n");
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (req->data == NULL || req->length < sizeof(ctrlr->hostid)) {
+		SPDK_ERRLOG("Invalid data buffer for Get Features - Host Identifier\n");
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	memcpy(req->data, ctrlr->hostid, sizeof(ctrlr->hostid));
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
+
+	if (cmd->cdw11 == 0) {
+		rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
+	} else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT) {
+		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT;
+	} else {
+		ctrlr->feat.keep_alive_timer.bits.kato = cmd->cdw11;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n",
+		      ctrlr->feat.keep_alive_timer.bits.kato);
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	uint32_t count;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Number of Queues, cdw11 0x%x\n",
+		      req->cmd->nvme_cmd.cdw11);
+
+	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+	/* verify that the controller is ready to process commands */
+	if (count > 1) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Queue pairs already active!\n");
+		rsp->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+	} else {
+		/*
+		 * Ignore the value requested by the host -
+		 * always return the pre-configured value based on max_qpairs_allowed.
+		 */
+		rsp->cdw0 = ctrlr->feat.number_of_queues.raw;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_set_features_async_event_configuration(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Async Event Configuration, cdw11 0x%08x\n",
+		      cmd->cdw11);
+	ctrlr->feat.async_event_configuration.raw = cmd->cdw11;
+	ctrlr->feat.async_event_configuration.bits.reserved = 0;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n");
+
+	/* Only one asynchronous event is supported for now */
+	if (ctrlr->aer_req != NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "AERL exceeded\n");
+		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (ctrlr->notice_event.bits.async_event_type ==
+	    SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+		rsp->cdw0 = ctrlr->notice_event.raw;
+		ctrlr->notice_event.raw = 0;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	ctrlr->aer_req = req;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static void
+spdk_nvmf_get_firmware_slot_log_page(void *buffer, uint64_t offset, uint32_t length)
+{
+	struct spdk_nvme_firmware_page fw_page;
+	size_t copy_len;
+
+	memset(&fw_page, 0, sizeof(fw_page));
+	fw_page.afi.active_slot = 1;
+	fw_page.afi.next_reset_slot = 0;
+	spdk_strcpy_pad(fw_page.revision[0], FW_VERSION, sizeof(fw_page.revision[0]), ' ');
+
+	if (offset < sizeof(fw_page)) {
+		copy_len = spdk_min(sizeof(fw_page) - offset, length);
+		if (copy_len > 0) {
+			memcpy(buffer, (const char *)&fw_page + offset, copy_len);
+		}
+	}
+}
+
+void
+spdk_nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid)
+{
+	uint16_t max_changes = SPDK_COUNTOF(ctrlr->changed_ns_list.ns_list);
+	uint16_t i;
+	bool found = false;
+
+	for (i = 0; i < ctrlr->changed_ns_list_count; i++) {
+		if (ctrlr->changed_ns_list.ns_list[i] == nsid) {
+			/* nsid is already in the list */
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		if (ctrlr->changed_ns_list_count == max_changes) {
+			/* Out of space - set first entry to FFFFFFFFh and zero-fill the rest. */
+			ctrlr->changed_ns_list.ns_list[0] = 0xFFFFFFFFu;
+			for (i = 1; i < max_changes; i++) {
+				ctrlr->changed_ns_list.ns_list[i] = 0;
+			}
+		} else {
+			ctrlr->changed_ns_list.ns_list[ctrlr->changed_ns_list_count++] = nsid;
+		}
+	}
+
+	spdk_nvmf_ctrlr_async_event_ns_notice(ctrlr);
+}
+
+static void
+spdk_nvmf_get_changed_ns_list_log_page(struct spdk_nvmf_ctrlr *ctrlr,
+				       void *buffer, uint64_t offset, uint32_t length)
+{
+	size_t copy_length;
+
+	if (offset < sizeof(ctrlr->changed_ns_list)) {
+		copy_length = spdk_min(length, sizeof(ctrlr->changed_ns_list) - offset);
+		if (copy_length) {
+			memcpy(buffer, (char *)&ctrlr->changed_ns_list + offset, copy_length);
+		}
+	}
+
+	/* Clear log page each time it is read */
+	ctrlr->changed_ns_list_count = 0;
+	memset(&ctrlr->changed_ns_list, 0, sizeof(ctrlr->changed_ns_list));
+}
+
+/* The structure can be modified if we provide support for other commands in future */
+static const struct spdk_nvme_cmds_and_effect_log_page g_cmds_and_effect_log_page = {
+	.admin_cmds_supported = {
+		/* CSUPP, LBCC, NCC, NIC, CCC, CSE */
+		/* Get Log Page */
+		[SPDK_NVME_OPC_GET_LOG_PAGE]		= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Identify */
+		[SPDK_NVME_OPC_IDENTIFY]		= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Abort */
+		[SPDK_NVME_OPC_ABORT]			= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Set Features */
+		[SPDK_NVME_OPC_SET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Get Features */
+		[SPDK_NVME_OPC_GET_FEATURES]		= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Async Event Request */
+		[SPDK_NVME_OPC_ASYNC_EVENT_REQUEST]	= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* Keep Alive */
+		[SPDK_NVME_OPC_KEEP_ALIVE]		= {1, 0, 0, 0, 0, 0, 0, 0},
+	},
+	.io_cmds_supported = {
+		/* FLUSH */
+		[SPDK_NVME_OPC_FLUSH]			= {1, 1, 0, 0, 0, 0, 0, 0},
+		/* WRITE */
+		[SPDK_NVME_OPC_WRITE]			= {1, 1, 0, 0, 0, 0, 0, 0},
+		/* READ */
+		[SPDK_NVME_OPC_READ]			= {1, 0, 0, 0, 0, 0, 0, 0},
+		/* WRITE ZEROES */
+		[SPDK_NVME_OPC_WRITE_ZEROES]		= {1, 1, 0, 0, 0, 0, 0, 0},
+		/* DATASET MANAGEMENT */
+		[SPDK_NVME_OPC_DATASET_MANAGEMENT]	= {1, 1, 0, 0, 0, 0, 0, 0},
+	},
+};
+
+static void
+spdk_nvmf_get_cmds_and_effects_log_page(void *buffer,
+					uint64_t offset, uint32_t length)
+{
+	uint32_t page_size = sizeof(struct spdk_nvme_cmds_and_effect_log_page);
+	size_t copy_len = 0;
+	size_t zero_len = length;
+
+	if (offset < page_size) {
+		copy_len = spdk_min(page_size - offset, length);
+		zero_len -= copy_len;
+		memcpy(buffer, (char *)(&g_cmds_and_effect_log_page) + offset, copy_len);
+	}
+
+	if (zero_len) {
+		memset((char *)buffer + copy_len, 0, zero_len);
+	}
+}
+
+static int
+spdk_nvmf_ctrlr_get_log_page(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+	uint64_t offset, len;
+	uint32_t numdl, numdu;
+	uint8_t lid;
+
+	if (req->data == NULL) {
+		SPDK_ERRLOG("get log command with no buffer\n");
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	offset = (uint64_t)cmd->cdw12 | ((uint64_t)cmd->cdw13 << 32);
+	if (offset & 3) {
+		SPDK_ERRLOG("Invalid log page offset 0x%" PRIx64 "\n", offset);
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	numdl = (cmd->cdw10 >> 16) & 0xFFFFu;
+	numdu = (cmd->cdw11) & 0xFFFFu;
+	len = ((numdu << 16) + numdl + (uint64_t)1) * 4;
+	if (len > req->length) {
+		SPDK_ERRLOG("Get log page: len (%" PRIu64 ") > buf size (%u)\n",
+			    len, req->length);
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	lid = cmd->cdw10 & 0xFF;
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Get log page: LID=0x%02X offset=0x%" PRIx64 " len=0x%" PRIx64 "\n",
+		      lid, offset, len);
+
+	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+		switch (lid) {
+		case SPDK_NVME_LOG_DISCOVERY:
+			spdk_nvmf_get_discovery_log_page(subsystem->tgt, req->data, offset, len);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		default:
+			goto invalid_log_page;
+		}
+	} else {
+		switch (lid) {
+		case SPDK_NVME_LOG_ERROR:
+		case SPDK_NVME_LOG_HEALTH_INFORMATION:
+			/* TODO: actually fill out log page data */
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		case SPDK_NVME_LOG_FIRMWARE_SLOT:
+			spdk_nvmf_get_firmware_slot_log_page(req->data, offset, len);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		case SPDK_NVME_LOG_COMMAND_EFFECTS_LOG:
+			spdk_nvmf_get_cmds_and_effects_log_page(req->data, offset, len);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		case SPDK_NVME_LOG_CHANGED_NS_LIST:
+			spdk_nvmf_get_changed_ns_list_log_page(ctrlr, req->data, offset, len);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		default:
+			goto invalid_log_page;
+		}
+	}
+
+invalid_log_page:
+	SPDK_ERRLOG("Unsupported Get Log Page 0x%02X\n", lid);
+	response->status.sct = SPDK_NVME_SCT_GENERIC;
+	response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
+			    struct spdk_nvme_cmd *cmd,
+			    struct spdk_nvme_cpl *rsp,
+			    struct spdk_nvme_ns_data *nsdata)
+{
+	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+	struct spdk_nvmf_ns *ns;
+	uint32_t max_num_blocks;
+
+	if (cmd->nsid == 0 || cmd->nsid > subsystem->max_nsid) {
+		SPDK_ERRLOG("Identify Namespace for invalid NSID %u\n", cmd->nsid);
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	ns = _spdk_nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+	if (ns == NULL || ns->bdev == NULL) {
+		/*
+		 * Inactive namespaces should return a zero filled data structure.
+		 * The data buffer is already zeroed by spdk_nvmf_ctrlr_process_admin_cmd(),
+		 * so we can just return early here.
+		 */
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Identify Namespace for inactive NSID %u\n", cmd->nsid);
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	spdk_nvmf_bdev_ctrlr_identify_ns(ns, nsdata);
+
+	/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
+	max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
+			 (1U << nsdata->lbaf[nsdata->flbas.format].lbads);
+	if (nsdata->noiob > max_num_blocks) {
+		nsdata->noiob = max_num_blocks;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
+{
+	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+	struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport;
+
+	/*
+	 * Common fields for discovery and NVM subsystems
+	 */
+	spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
+	assert((transport->opts.max_io_size % 4096) == 0);
+	cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);
+	cdata->cntlid = ctrlr->cntlid;
+	cdata->ver = ctrlr->vcprop.vs;
+	cdata->lpa.edlp = 1;
+	cdata->elpe = 127;
+	cdata->maxcmd = transport->opts.max_queue_depth;
+	cdata->sgls.supported = 1;
+	cdata->sgls.keyed_sgl = 1;
+	cdata->sgls.sgl_offset = 1;
+	spdk_strcpy_pad(cdata->subnqn, subsystem->subnqn, sizeof(cdata->subnqn), '\0');
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr data: maxcmd 0x%x\n", cdata->maxcmd);
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "sgls data: 0x%x\n", from_le32(&cdata->sgls));
+
+	/*
+	 * NVM subsystem fields (reserved for discovery subsystems)
+	 */
+	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) {
+		spdk_strcpy_pad(cdata->mn, MODEL_NUMBER, sizeof(cdata->mn), ' ');
+		spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
+		cdata->kas = 10;
+
+		cdata->rab = 6;
+		cdata->cmic.multi_port = 1;
+		cdata->cmic.multi_host = 1;
+		cdata->oaes.ns_attribute_notices = 1;
+		cdata->ctratt.host_id_exhid_supported = 1;
+		cdata->aerl = 0;
+		cdata->frmw.slot1_ro = 1;
+		cdata->frmw.num_slots = 1;
+
+		cdata->lpa.celp = 1; /* Command Effects log page supported */
+
+		cdata->sqes.min = 6;
+		cdata->sqes.max = 6;
+		cdata->cqes.min = 4;
+		cdata->cqes.max = 4;
+		cdata->nn = subsystem->max_nsid;
+		cdata->vwc.present = 1;
+		cdata->vwc.flush_broadcast = SPDK_NVME_FLUSH_BROADCAST_NOT_SUPPORTED;
+
+		cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
+		cdata->nvmf_specific.iorcsz = sizeof(struct spdk_nvme_cpl) / 16;
+		cdata->nvmf_specific.icdoff = 0; /* offset starts directly after SQE */
+		cdata->nvmf_specific.ctrattr.ctrlr_model = SPDK_NVMF_CTRLR_MODEL_DYNAMIC;
+		cdata->nvmf_specific.msdbd = 1; /* target supports single SGL in capsule */
+
+		/* TODO: this should be set by the transport */
+		cdata->nvmf_specific.ioccsz += transport->opts.in_capsule_data_size / 16;
+
+		cdata->oncs.dsm = spdk_nvmf_ctrlr_dsm_supported(ctrlr);
+		cdata->oncs.write_zeroes = spdk_nvmf_ctrlr_write_zeroes_supported(ctrlr);
+
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ioccsz 0x%x\n",
+			      cdata->nvmf_specific.ioccsz);
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: iorcsz 0x%x\n",
+			      cdata->nvmf_specific.iorcsz);
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: icdoff 0x%x\n",
+			      cdata->nvmf_specific.icdoff);
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: ctrattr 0x%x\n",
+			      *(uint8_t *)&cdata->nvmf_specific.ctrattr);
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ext ctrlr data: msdbd 0x%x\n",
+			      cdata->nvmf_specific.msdbd);
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_active_ns_list(struct spdk_nvmf_subsystem *subsystem,
+					struct spdk_nvme_cmd *cmd,
+					struct spdk_nvme_cpl *rsp,
+					struct spdk_nvme_ns_list *ns_list)
+{
+	struct spdk_nvmf_ns *ns;
+	uint32_t count = 0;
+
+	if (cmd->nsid >= 0xfffffffeUL) {
+		SPDK_ERRLOG("Identify Active Namespace List with invalid NSID %u\n", cmd->nsid);
+		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+		if (ns->opts.nsid <= cmd->nsid) {
+			continue;
+		}
+
+		ns_list->ns_list[count++] = ns->opts.nsid;
+		if (count == SPDK_COUNTOF(ns_list->ns_list)) {
+			break;
+		}
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static void
+_add_ns_id_desc(void **buf_ptr, size_t *buf_remain,
+		enum spdk_nvme_nidt type,
+		const void *data, size_t data_size)
+{
+	struct spdk_nvme_ns_id_desc *desc;
+	size_t desc_size = sizeof(*desc) + data_size;
+
+	/*
+	 * These should never fail in practice, since all valid NS ID descriptors
+	 * should be defined so that they fit in the available 4096-byte buffer.
+	 */
+	assert(data_size > 0);
+	assert(data_size <= UINT8_MAX);
+	assert(desc_size < *buf_remain);
+	if (data_size == 0 || data_size > UINT8_MAX || desc_size > *buf_remain) {
+		return;
+	}
+
+	desc = *buf_ptr;
+	desc->nidt = type;
+	desc->nidl = data_size;
+	memcpy(desc->nid, data, data_size);
+
+	*buf_ptr += desc_size;
+	*buf_remain -= desc_size;
+}
+
+static int
+spdk_nvmf_ctrlr_identify_ns_id_descriptor_list(
+	struct spdk_nvmf_subsystem *subsystem,
+	struct spdk_nvme_cmd *cmd,
+	struct spdk_nvme_cpl *rsp,
+	void *id_desc_list, size_t id_desc_list_size)
+{
+	struct spdk_nvmf_ns *ns;
+	size_t buf_remain = id_desc_list_size;
+	void *buf_ptr = id_desc_list;
+
+	ns = _spdk_nvmf_subsystem_get_ns(subsystem, cmd->nsid);
+	if (ns == NULL || ns->bdev == NULL) {
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+#define ADD_ID_DESC(type, data, size) \
+	do { \
+		if (!spdk_mem_all_zero(data, size)) { \
+			_add_ns_id_desc(&buf_ptr, &buf_remain, type, data, size); \
+		} \
+	} while (0)
+
+	ADD_ID_DESC(SPDK_NVME_NIDT_EUI64, ns->opts.eui64, sizeof(ns->opts.eui64));
+	ADD_ID_DESC(SPDK_NVME_NIDT_NGUID, ns->opts.nguid, sizeof(ns->opts.nguid));
+	ADD_ID_DESC(SPDK_NVME_NIDT_UUID, &ns->opts.uuid, sizeof(ns->opts.uuid));
+
+	/*
+	 * The list is automatically 0-terminated because controller to host buffers in
+	 * admin commands always get zeroed in spdk_nvmf_ctrlr_process_admin_cmd().
+	 */
+
+#undef ADD_ID_DESC
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_identify(struct spdk_nvmf_request *req)
+{
+	uint8_t cns;
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
+
+	if (req->data == NULL || req->length < 4096) {
+		SPDK_ERRLOG("identify command with invalid buffer\n");
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	cns = cmd->cdw10 & 0xFF;
+
+	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY &&
+	    cns != SPDK_NVME_IDENTIFY_CTRLR) {
+		/* Discovery controllers only support Identify Controller */
+		goto invalid_cns;
+	}
+
+	switch (cns) {
+	case SPDK_NVME_IDENTIFY_NS:
+		return spdk_nvmf_ctrlr_identify_ns(ctrlr, cmd, rsp, req->data);
+	case SPDK_NVME_IDENTIFY_CTRLR:
+		return spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, req->data);
+	case SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST:
+		return spdk_nvmf_ctrlr_identify_active_ns_list(subsystem, cmd, rsp, req->data);
+	case SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST:
+		return spdk_nvmf_ctrlr_identify_ns_id_descriptor_list(subsystem, cmd, rsp, req->data, req->length);
+	default:
+		goto invalid_cns;
+	}
+
+invalid_cns:
+	SPDK_ERRLOG("Identify command with unsupported CNS 0x%02x\n", cns);
+	rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+	rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+
+static struct spdk_nvmf_request *
+spdk_nvmf_qpair_abort(struct spdk_nvmf_qpair *qpair, uint16_t cid)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+	struct spdk_nvmf_request *req;
+
+	if (spdk_nvmf_qpair_is_admin_queue(qpair)) {
+		if (ctrlr->aer_req && ctrlr->aer_req->cmd->nvme_cmd.cid == cid) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Aborting AER request\n");
+			req = ctrlr->aer_req;
+			ctrlr->aer_req = NULL;
+			return req;
+		}
+	}
+
+	/* TODO: track list of outstanding requests in qpair? */
+	return NULL;
+}
+
+static void
+spdk_nvmf_ctrlr_abort_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+
+	spdk_nvmf_request_complete(req);
+}
+
+static void
+spdk_nvmf_ctrlr_abort_on_pg(struct spdk_io_channel_iter *i)
+{
+	struct spdk_nvmf_request *req = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	uint16_t sqid = cmd->cdw10 & 0xFFFFu;
+	struct spdk_nvmf_qpair *qpair;
+
+	TAILQ_FOREACH(qpair, &group->qpairs, link) {
+		if (qpair->ctrlr == req->qpair->ctrlr && qpair->qid == sqid) {
+			struct spdk_nvmf_request *req_to_abort;
+			uint16_t cid = cmd->cdw10 >> 16;
+
+			/* Found the qpair */
+
+			req_to_abort = spdk_nvmf_qpair_abort(qpair, cid);
+			if (req_to_abort == NULL) {
+				SPDK_DEBUGLOG(SPDK_LOG_NVMF, "cid %u not found\n", cid);
+				rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+				rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+				spdk_for_each_channel_continue(i, -EINVAL);
+				return;
+			}
+
+			/* Complete the request with aborted status */
+			req_to_abort->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+			req_to_abort->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
+			spdk_nvmf_request_complete(req_to_abort);
+
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "abort ctrlr=%p req=%p sqid=%u cid=%u successful\n",
+				      qpair->ctrlr, req_to_abort, sqid, cid);
+			rsp->cdw0 = 0; /* Command successfully aborted */
+			rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+			rsp->status.sc = SPDK_NVME_SC_SUCCESS;
+			/* Return -1 for the status so the iteration across threads stops. */
+			spdk_for_each_channel_continue(i, -1);
+
+		}
+	}
+
+	spdk_for_each_channel_continue(i, 0);
+}
+
+static int
+spdk_nvmf_ctrlr_abort(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	rsp->cdw0 = 1; /* Command not aborted */
+	rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+	rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+
+	/* Send a message to each poll group, searching for this ctrlr, sqid, and command. */
+	spdk_for_each_channel(req->qpair->ctrlr->subsys->tgt,
+			      spdk_nvmf_ctrlr_abort_on_pg,
+			      req,
+			      spdk_nvmf_ctrlr_abort_done
+			     );
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+get_features_generic(struct spdk_nvmf_request *req, uint32_t cdw0)
+{
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+
+	rsp->cdw0 = cdw0;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+spdk_nvmf_ctrlr_get_features(struct spdk_nvmf_request *req)
+{
+	uint8_t feature;
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+	feature = cmd->cdw10 & 0xff; /* mask out the FID value */
+	switch (feature) {
+	case SPDK_NVME_FEAT_ARBITRATION:
+		return get_features_generic(req, ctrlr->feat.arbitration.raw);
+	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+		return get_features_generic(req, ctrlr->feat.power_management.raw);
+	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+		return spdk_nvmf_ctrlr_get_features_temperature_threshold(req);
+	case SPDK_NVME_FEAT_ERROR_RECOVERY:
+		return get_features_generic(req, ctrlr->feat.error_recovery.raw);
+	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+		return get_features_generic(req, ctrlr->feat.volatile_write_cache.raw);
+	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+		return get_features_generic(req, ctrlr->feat.number_of_queues.raw);
+	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+		return get_features_generic(req, ctrlr->feat.write_atomicity.raw);
+	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+		return get_features_generic(req, ctrlr->feat.async_event_configuration.raw);
+	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+		return get_features_generic(req, ctrlr->feat.keep_alive_timer.raw);
+	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+		return spdk_nvmf_ctrlr_get_features_host_identifier(req);
+	default:
+		SPDK_ERRLOG("Get Features command with unsupported feature ID 0x%02x\n", feature);
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+}
+
+static int
+spdk_nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
+{
+	uint8_t feature;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+	feature = cmd->cdw10 & 0xff; /* mask out the FID value */
+	switch (feature) {
+	case SPDK_NVME_FEAT_ARBITRATION:
+		return spdk_nvmf_ctrlr_set_features_arbitration(req);
+	case SPDK_NVME_FEAT_POWER_MANAGEMENT:
+		return spdk_nvmf_ctrlr_set_features_power_management(req);
+	case SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD:
+		return spdk_nvmf_ctrlr_set_features_temperature_threshold(req);
+	case SPDK_NVME_FEAT_ERROR_RECOVERY:
+		return spdk_nvmf_ctrlr_set_features_error_recovery(req);
+	case SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE:
+		return spdk_nvmf_ctrlr_set_features_volatile_write_cache(req);
+	case SPDK_NVME_FEAT_NUMBER_OF_QUEUES:
+		return spdk_nvmf_ctrlr_set_features_number_of_queues(req);
+	case SPDK_NVME_FEAT_WRITE_ATOMICITY:
+		return spdk_nvmf_ctrlr_set_features_write_atomicity(req);
+	case SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+		return spdk_nvmf_ctrlr_set_features_async_event_configuration(req);
+	case SPDK_NVME_FEAT_KEEP_ALIVE_TIMER:
+		return spdk_nvmf_ctrlr_set_features_keep_alive_timer(req);
+	case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+		return spdk_nvmf_ctrlr_set_features_host_identifier(req);
+	default:
+		SPDK_ERRLOG("Set Features command with unsupported feature ID 0x%02x\n", feature);
+		response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+}
+
+static int
+spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n");
+	/*
+	 * To handle keep alive just clear or reset the
+	 * ctrlr based keep alive duration counter.
+	 * When added, a separate timer based process
+	 * will monitor if the time since last recorded
+	 * keep alive has exceeded the max duration and
+	 * take appropriate action.
+	 */
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+spdk_nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+	if (ctrlr == NULL) {
+		SPDK_ERRLOG("Admin command sent before CONNECT\n");
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (ctrlr->vcprop.cc.bits.en != 1) {
+		SPDK_ERRLOG("Admin command sent to disabled controller\n");
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+		memset(req->data, 0, req->length);
+	}
+
+	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+		/* Discovery controllers only support Get Log Page and Identify */
+		switch (cmd->opc) {
+		case SPDK_NVME_OPC_IDENTIFY:
+		case SPDK_NVME_OPC_GET_LOG_PAGE:
+			break;
+		default:
+			goto invalid_opcode;
+		}
+	}
+
+	switch (cmd->opc) {
+	case SPDK_NVME_OPC_GET_LOG_PAGE:
+		return spdk_nvmf_ctrlr_get_log_page(req);
+	case SPDK_NVME_OPC_IDENTIFY:
+		return spdk_nvmf_ctrlr_identify(req);
+	case SPDK_NVME_OPC_ABORT:
+		return spdk_nvmf_ctrlr_abort(req);
+	case SPDK_NVME_OPC_GET_FEATURES:
+		return spdk_nvmf_ctrlr_get_features(req);
+	case SPDK_NVME_OPC_SET_FEATURES:
+		return spdk_nvmf_ctrlr_set_features(req);
+	case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
+		return spdk_nvmf_ctrlr_async_event_request(req);
+	case SPDK_NVME_OPC_KEEP_ALIVE:
+		return spdk_nvmf_ctrlr_keep_alive(req);
+
+	case SPDK_NVME_OPC_CREATE_IO_SQ:
+	case SPDK_NVME_OPC_CREATE_IO_CQ:
+	case SPDK_NVME_OPC_DELETE_IO_SQ:
+	case SPDK_NVME_OPC_DELETE_IO_CQ:
+		/* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
+		goto invalid_opcode;
+
+	default:
+		goto invalid_opcode;
+	}
+
+invalid_opcode:
+	SPDK_ERRLOG("Unsupported admin opcode 0x%x\n", cmd->opc);
+	response->status.sct = SPDK_NVME_SCT_GENERIC;
+	response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+int
+spdk_nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	struct spdk_nvmf_capsule_cmd *cap_hdr;
+
+	cap_hdr = &req->cmd->nvmf_cmd;
+
+	if (qpair->ctrlr == NULL) {
+		/* No ctrlr established yet; the only valid command is Connect */
+		if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
+			return spdk_nvmf_ctrlr_connect(req);
+		} else {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Got fctype 0x%x, expected Connect\n",
+				      cap_hdr->fctype);
+			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		}
+	} else if (spdk_nvmf_qpair_is_admin_queue(qpair)) {
+		/*
+		 * Controller session is established, and this is an admin queue.
+		 * Disallow Connect and allow other fabrics commands.
+		 */
+		switch (cap_hdr->fctype) {
+		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
+			return spdk_nvmf_property_set(req);
+		case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
+			return spdk_nvmf_property_get(req);
+		default:
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "unknown fctype 0x%02x\n",
+				      cap_hdr->fctype);
+			req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+			req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		}
+	} else {
+		/* Controller session is established, and this is an I/O queue */
+		/* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
+		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+}
+
+int
+spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	struct spdk_nvmf_request *req;
+	struct spdk_nvme_cpl *rsp;
+	union spdk_nvme_async_event_completion event = {0};
+
+	/* Users may disable the event notification */
+	if (!ctrlr->feat.async_event_configuration.bits.ns_attr_notice) {
+		return 0;
+	}
+
+	event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE;
+	event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED;
+	event.bits.log_page_identifier = SPDK_NVME_LOG_CHANGED_NS_LIST;
+
+	/* If there is no outstanding AER request, queue the event.  Then
+	 * if an AER is later submitted, this event can be sent as a
+	 * response.
+	 */
+	if (!ctrlr->aer_req) {
+		if (ctrlr->notice_event.bits.async_event_type ==
+		    SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) {
+			return 0;
+		}
+
+		ctrlr->notice_event.raw = event.raw;
+		return 0;
+	}
+
+	req = ctrlr->aer_req;
+	rsp = &req->rsp->nvme_cpl;
+
+	rsp->cdw0 = event.raw;
+
+	spdk_nvmf_request_complete(req);
+	ctrlr->aer_req = NULL;
+
+	return 0;
+}
+
+void
+spdk_nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+	if (!spdk_nvmf_qpair_is_admin_queue(qpair)) {
+		return;
+	}
+
+	if (ctrlr->aer_req != NULL) {
+		spdk_nvmf_request_free(ctrlr->aer_req);
+		ctrlr->aer_req = NULL;
+	}
+}
+
+void
+spdk_nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	if (!ctrlr->aer_req) {
+		return;
+	}
+
+	spdk_nvmf_request_complete(ctrlr->aer_req);
+	ctrlr->aer_req = NULL;
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_bdev.c b/src/spdk/lib/nvmf/ctrlr_bdev.c
new file mode 100644
index 00000000..7eb4f19a
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_bdev.c
@@ -0,0 +1,531 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+
+#include "spdk/bdev.h"
+#include "spdk/endian.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/trace.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+static bool
+spdk_nvmf_subsystem_bdev_io_type_supported(struct spdk_nvmf_subsystem *subsystem,
+		enum spdk_bdev_io_type io_type)
+{
+	struct spdk_nvmf_ns *ns;
+
+	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+		if (ns->bdev == NULL) {
+			continue;
+		}
+
+		if (!spdk_bdev_io_type_supported(ns->bdev, io_type)) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+				      "Subsystem %s namespace %u (%s) does not support io_type %d\n",
+				      spdk_nvmf_subsystem_get_nqn(subsystem),
+				      ns->opts.nsid, spdk_bdev_get_name(ns->bdev), (int)io_type);
+			return false;
+		}
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "All devices in Subsystem %s support io_type %d\n",
+		      spdk_nvmf_subsystem_get_nqn(subsystem), (int)io_type);
+	return true;
+}
+
+bool
+spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_UNMAP);
+}
+
+bool
+spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr)
+{
+	return spdk_nvmf_subsystem_bdev_io_type_supported(ctrlr->subsys, SPDK_BDEV_IO_TYPE_WRITE_ZEROES);
+}
+
+static void
+nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
+			     void *cb_arg)
+{
+	struct spdk_nvmf_request	*req = cb_arg;
+	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
+	int				sc, sct;
+
+	spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+	response->status.sc = sc;
+	response->status.sct = sct;
+
+	spdk_nvmf_request_complete(req);
+	spdk_bdev_free_io(bdev_io);
+}
+
+void
+spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
+{
+	struct spdk_bdev *bdev = ns->bdev;
+	uint64_t num_blocks;
+
+	num_blocks = spdk_bdev_get_num_blocks(bdev);
+
+	nsdata->nsze = num_blocks;
+	nsdata->ncap = num_blocks;
+	nsdata->nuse = num_blocks;
+	nsdata->nlbaf = 0;
+	nsdata->flbas.format = 0;
+	nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(bdev));
+	nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev);
+	nsdata->nmic.can_share = 1;
+
+	SPDK_STATIC_ASSERT(sizeof(nsdata->nguid) == sizeof(ns->opts.nguid), "size mismatch");
+	memcpy(nsdata->nguid, ns->opts.nguid, sizeof(nsdata->nguid));
+
+	SPDK_STATIC_ASSERT(sizeof(nsdata->eui64) == sizeof(ns->opts.eui64), "size mismatch");
+	memcpy(&nsdata->eui64, ns->opts.eui64, sizeof(nsdata->eui64));
+}
+
+static void
+nvmf_bdev_ctrlr_get_rw_params(const struct spdk_nvme_cmd *cmd, uint64_t *start_lba,
+			      uint64_t *num_blocks)
+{
+	/* SLBA: CDW10 and CDW11 */
+	*start_lba = from_le64(&cmd->cdw10);
+
+	/* NLB: CDW12 bits 15:00, 0's based */
+	*num_blocks = (from_le32(&cmd->cdw12) & 0xFFFFu) + 1;
+}
+
+static bool
+nvmf_bdev_ctrlr_lba_in_range(uint64_t bdev_num_blocks, uint64_t io_start_lba,
+			     uint64_t io_num_blocks)
+{
+	if (io_start_lba + io_num_blocks > bdev_num_blocks ||
+	    io_start_lba + io_num_blocks < io_start_lba) {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+spdk_nvmf_ctrlr_process_io_cmd_resubmit(void *arg)
+{
+	struct spdk_nvmf_request *req = arg;
+
+	spdk_nvmf_ctrlr_process_io_cmd(req);
+}
+
+static void
+nvmf_bdev_ctrl_queue_io(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
+			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
+{
+	int rc;
+
+	req->bdev_io_wait.bdev = bdev;
+	req->bdev_io_wait.cb_fn = cb_fn;
+	req->bdev_io_wait.cb_arg = cb_arg;
+
+	rc = spdk_bdev_queue_io_wait(bdev, ch, &req->bdev_io_wait);
+	if (rc != 0) {
+		assert(false);
+	}
+}
+
+static int
+nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+	uint32_t block_size = spdk_bdev_get_block_size(bdev);
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	uint64_t start_lba;
+	uint64_t num_blocks;
+	int rc;
+
+	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+		SPDK_ERRLOG("end of media\n");
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (spdk_unlikely(num_blocks * block_size > req->length)) {
+		SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+			    num_blocks, block_size, req->length);
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+				    nvmf_bdev_ctrlr_complete_cmd, req);
+	if (spdk_unlikely(rc)) {
+		if (rc == -ENOMEM) {
+			nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+	uint32_t block_size = spdk_bdev_get_block_size(bdev);
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	uint64_t start_lba;
+	uint64_t num_blocks;
+	int rc;
+
+	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+		SPDK_ERRLOG("end of media\n");
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (spdk_unlikely(num_blocks * block_size > req->length)) {
+		SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
+			    num_blocks, block_size, req->length);
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
+				     nvmf_bdev_ctrlr_complete_cmd, req);
+	if (spdk_unlikely(rc)) {
+		if (rc == -ENOMEM) {
+			nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+	uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	uint64_t start_lba;
+	uint64_t num_blocks;
+	int rc;
+
+	nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
+
+	if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
+		SPDK_ERRLOG("end of media\n");
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	rc = spdk_bdev_write_zeroes_blocks(desc, ch, start_lba, num_blocks,
+					   nvmf_bdev_ctrlr_complete_cmd, req);
+	if (spdk_unlikely(rc)) {
+		if (rc == -ENOMEM) {
+			nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+		rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+		rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+static int
+nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			  struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+	int rc;
+
+	/* As for NVMeoF controller, SPDK always set volatile write
+	 * cache bit to 1, return success for those block devices
+	 * which can't support FLUSH command.
+	 */
+	if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_SUCCESS;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(bdev),
+				    nvmf_bdev_ctrlr_complete_cmd, req);
+	if (spdk_unlikely(rc)) {
+		if (rc == -ENOMEM) {
+			nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+		response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+struct nvmf_virtual_ctrlr_unmap {
+	struct spdk_nvmf_request	*req;
+	uint32_t			count;
+	struct spdk_bdev_desc		*desc;
+	struct spdk_bdev		*bdev;
+	struct spdk_io_channel		*ch;
+};
+
+static void
+nvmf_virtual_ctrlr_dsm_cpl(struct spdk_bdev_io *bdev_io, bool success,
+			   void *cb_arg)
+{
+	struct nvmf_virtual_ctrlr_unmap *unmap_ctx = cb_arg;
+	struct spdk_nvmf_request	*req = unmap_ctx->req;
+	struct spdk_nvme_cpl		*response = &req->rsp->nvme_cpl;
+	int				sc, sct;
+
+	unmap_ctx->count--;
+
+	if (response->status.sct == SPDK_NVME_SCT_GENERIC &&
+	    response->status.sc == SPDK_NVME_SC_SUCCESS) {
+		spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+		response->status.sc = sc;
+		response->status.sct = sct;
+	}
+
+	if (unmap_ctx->count == 0) {
+		spdk_nvmf_request_complete(req);
+		free(unmap_ctx);
+	}
+	spdk_bdev_free_io(bdev_io);
+}
+
+static int
+nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+			struct nvmf_virtual_ctrlr_unmap *unmap_ctx);
+static void
+nvmf_bdev_ctrlr_dsm_cmd_resubmit(void *arg)
+{
+	struct nvmf_virtual_ctrlr_unmap *unmap_ctx = arg;
+	struct spdk_nvmf_request *req = unmap_ctx->req;
+	struct spdk_bdev_desc *desc = unmap_ctx->desc;
+	struct spdk_bdev *bdev = unmap_ctx->bdev;
+	struct spdk_io_channel *ch = unmap_ctx->ch;
+
+	nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req, unmap_ctx);
+}
+
+static int
+nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+			struct spdk_io_channel *ch, struct spdk_nvmf_request *req,
+			struct nvmf_virtual_ctrlr_unmap *unmap_ctx)
+{
+	uint32_t attribute;
+	uint16_t nr, i;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+	int rc;
+
+	nr = ((cmd->cdw10 & 0x000000ff) + 1);
+	if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) {
+		SPDK_ERRLOG("Dataset Management number of ranges > SGL length\n");
+		response->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	attribute = cmd->cdw11 & 0x00000007;
+	if (attribute & SPDK_NVME_DSM_ATTR_DEALLOCATE) {
+		struct spdk_nvme_dsm_range *dsm_range;
+		uint64_t lba;
+		uint32_t lba_count;
+
+		if (unmap_ctx == NULL) {
+			unmap_ctx = calloc(1, sizeof(*unmap_ctx));
+			if (!unmap_ctx) {
+				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+				return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+			}
+
+			unmap_ctx->req = req;
+			unmap_ctx->desc = desc;
+			unmap_ctx->ch = ch;
+		}
+
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_SUCCESS;
+
+		dsm_range = (struct spdk_nvme_dsm_range *)req->data;
+		for (i = unmap_ctx->count; i < nr; i++) {
+			lba = dsm_range[i].starting_lba;
+			lba_count = dsm_range[i].length;
+
+			unmap_ctx->count++;
+
+			rc = spdk_bdev_unmap_blocks(desc, ch, lba, lba_count,
+						    nvmf_virtual_ctrlr_dsm_cpl, unmap_ctx);
+			if (rc) {
+				if (rc == -ENOMEM) {
+					nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_bdev_ctrlr_dsm_cmd_resubmit, unmap_ctx);
+					/* Unmap was not yet submitted to bdev */
+					unmap_ctx->count--;
+					return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+				}
+				response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+				unmap_ctx->count--;
+				/* We can't return here - we may have to wait for any other
+				 * unmaps already sent to complete */
+				break;
+			}
+		}
+
+		if (unmap_ctx->count == 0) {
+			free(unmap_ctx);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+		}
+
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+	}
+
+	response->status.sct = SPDK_NVME_SCT_GENERIC;
+	response->status.sc = SPDK_NVME_SC_SUCCESS;
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+}
+
+static int
+nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
+{
+	int rc;
+
+	rc = spdk_bdev_nvme_io_passthru(desc, ch, &req->cmd->nvme_cmd, req->data, req->length,
+					nvmf_bdev_ctrlr_complete_cmd, req);
+	if (spdk_unlikely(rc)) {
+		if (rc == -ENOMEM) {
+			nvmf_bdev_ctrl_queue_io(req, bdev, ch, spdk_nvmf_ctrlr_process_io_cmd_resubmit, req);
+			return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+		}
+		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
+}
+
+int
+spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
+{
+	uint32_t nsid;
+	struct spdk_nvmf_ns *ns;
+	struct spdk_bdev *bdev;
+	struct spdk_bdev_desc *desc;
+	struct spdk_io_channel *ch;
+	struct spdk_nvmf_poll_group *group = req->qpair->group;
+	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
+	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
+	struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
+
+	/* pre-set response details for this command */
+	response->status.sc = SPDK_NVME_SC_SUCCESS;
+	nsid = cmd->nsid;
+
+	if (spdk_unlikely(ctrlr == NULL)) {
+		SPDK_ERRLOG("I/O command sent before CONNECT\n");
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
+		SPDK_ERRLOG("I/O command sent to disabled controller\n");
+		response->status.sct = SPDK_NVME_SCT_GENERIC;
+		response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	ns = _spdk_nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
+	if (ns == NULL || ns->bdev == NULL) {
+		SPDK_ERRLOG("Unsuccessful query for nsid %u\n", cmd->nsid);
+		response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		response->status.dnr = 1;
+		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
+	}
+
+	bdev = ns->bdev;
+	desc = ns->desc;
+	ch = group->sgroups[ctrlr->subsys->id].channels[nsid - 1];
+	switch (cmd->opc) {
+	case SPDK_NVME_OPC_READ:
+		return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
+	case SPDK_NVME_OPC_WRITE:
+		return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
+	case SPDK_NVME_OPC_WRITE_ZEROES:
+		return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
+	case SPDK_NVME_OPC_FLUSH:
+		return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
+	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+		return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req, NULL);
+	default:
+		return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
+	}
+}
diff --git a/src/spdk/lib/nvmf/ctrlr_discovery.c b/src/spdk/lib/nvmf/ctrlr_discovery.c
new file mode 100644
index 00000000..305a6076
--- /dev/null
+++ b/src/spdk/lib/nvmf/ctrlr_discovery.c
@@ -0,0 +1,144 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NVMe over Fabrics discovery service
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/event.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static void
+nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt)
+{
+	uint64_t numrec = 0;
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_listener *listener;
+	struct spdk_nvmf_discovery_log_page_entry *entry;
+	struct spdk_nvmf_discovery_log_page *disc_log;
+	size_t cur_size;
+	uint32_t sid;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Generating log page for genctr %" PRIu64 "\n",
+		      tgt->discovery_genctr);
+
+	cur_size = sizeof(struct spdk_nvmf_discovery_log_page);
+	disc_log = calloc(1, cur_size);
+	if (disc_log == NULL) {
+		SPDK_ERRLOG("Discovery log page memory allocation error\n");
+		return;
+	}
+
+	for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+		subsystem = tgt->subsystems[sid];
+		if (subsystem == NULL) {
+			continue;
+		}
+
+		if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
+			continue;
+		}
+
+		for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+		     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+			size_t new_size = cur_size + sizeof(*entry);
+			void *new_log_page = realloc(disc_log, new_size);
+
+			if (new_log_page == NULL) {
+				SPDK_ERRLOG("Discovery log page memory allocation error\n");
+				break;
+			}
+
+			disc_log = new_log_page;
+			cur_size = new_size;
+
+			entry = &disc_log->entries[numrec];
+			memset(entry, 0, sizeof(*entry));
+			entry->portid = numrec;
+			entry->cntlid = 0xffff;
+			entry->asqsz = listener->transport->opts.max_aq_depth;
+			entry->subtype = subsystem->subtype;
+			snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn);
+
+			spdk_nvmf_transport_listener_discover(listener->transport, &listener->trid, entry);
+
+			numrec++;
+		}
+	}
+
+	disc_log->numrec = numrec;
+	disc_log->genctr = tgt->discovery_genctr;
+
+	free(tgt->discovery_log_page);
+
+	tgt->discovery_log_page = disc_log;
+	tgt->discovery_log_page_size = cur_size;
+}
+
+void
+spdk_nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, void *buffer,
+				 uint64_t offset, uint32_t length)
+{
+	size_t copy_len = 0;
+	size_t zero_len = length;
+
+	if (tgt->discovery_log_page == NULL ||
+	    tgt->discovery_log_page->genctr != tgt->discovery_genctr) {
+		nvmf_update_discovery_log(tgt);
+	}
+
+	/* Copy the valid part of the discovery log page, if any */
+	if (tgt->discovery_log_page && offset < tgt->discovery_log_page_size) {
+		copy_len = spdk_min(tgt->discovery_log_page_size - offset, length);
+		zero_len -= copy_len;
+		memcpy(buffer, (char *)tgt->discovery_log_page + offset, copy_len);
+	}
+
+	/* Zero out the rest of the buffer */
+	if (zero_len) {
+		memset((char *)buffer + copy_len, 0, zero_len);
+	}
+
+	/* We should have copied or zeroed every byte of the output buffer. */
+	assert(copy_len + zero_len == length);
+}
diff --git a/src/spdk/lib/nvmf/nvmf.c b/src/spdk/lib/nvmf/nvmf.c
new file mode 100644
index 00000000..32539f53
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf.c
@@ -0,0 +1,1173 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/bit_array.h"
+#include "spdk/conf.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf.h"
+#include "spdk/trace.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
+
+#define SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
+#define SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
+#define SPDK_NVMF_DEFAULT_IO_UNIT_SIZE 131072
+
+typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
+static void spdk_nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf);
+
+/* supplied to a single call to nvmf_qpair_disconnect */
+struct nvmf_qpair_disconnect_ctx {
+	struct spdk_nvmf_qpair *qpair;
+	struct spdk_nvmf_ctrlr *ctrlr;
+	nvmf_qpair_disconnect_cb cb_fn;
+	struct spdk_thread *thread;
+	void *ctx;
+	uint16_t qid;
+};
+
+/*
+ * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
+ * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
+ * to enable calling nvmf_qpair_disconnect on the next desired qpair.
+ */
+struct nvmf_qpair_disconnect_many_ctx {
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_poll_group *group;
+	spdk_nvmf_poll_group_mod_done cpl_fn;
+	void *cpl_ctx;
+};
+
+static void
+spdk_nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
+			  enum spdk_nvmf_qpair_state state)
+{
+	assert(qpair != NULL);
+	assert(qpair->group->thread == spdk_get_thread());
+
+	qpair->state = state;
+}
+
+void
+spdk_nvmf_tgt_opts_init(struct spdk_nvmf_tgt_opts *opts)
+{
+	opts->max_queue_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH;
+	opts->max_qpairs_per_ctrlr = SPDK_NVMF_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+	opts->in_capsule_data_size = SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE;
+	opts->max_io_size = SPDK_NVMF_DEFAULT_MAX_IO_SIZE;
+	opts->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
+	opts->io_unit_size = SPDK_NVMF_DEFAULT_IO_UNIT_SIZE;
+}
+
+static int
+spdk_nvmf_poll_group_poll(void *ctx)
+{
+	struct spdk_nvmf_poll_group *group = ctx;
+	int rc;
+	int count = 0;
+	struct spdk_nvmf_transport_poll_group *tgroup;
+
+	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+		rc = spdk_nvmf_transport_poll_group_poll(tgroup);
+		if (rc < 0) {
+			return -1;
+		}
+		count += rc;
+	}
+
+	return count;
+}
+
+static int
+spdk_nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
+{
+	struct spdk_nvmf_tgt *tgt = io_device;
+	struct spdk_nvmf_poll_group *group = ctx_buf;
+	struct spdk_nvmf_transport *transport;
+	uint32_t sid;
+
+	TAILQ_INIT(&group->tgroups);
+	TAILQ_INIT(&group->qpairs);
+
+	TAILQ_FOREACH(transport, &tgt->transports, link) {
+		spdk_nvmf_poll_group_add_transport(group, transport);
+	}
+
+	group->num_sgroups = tgt->opts.max_subsystems;
+	group->sgroups = calloc(tgt->opts.max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
+	if (!group->sgroups) {
+		return -1;
+	}
+
+	for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+		struct spdk_nvmf_subsystem *subsystem;
+
+		subsystem = tgt->subsystems[sid];
+		if (!subsystem) {
+			continue;
+		}
+
+		if (spdk_nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
+			spdk_nvmf_tgt_destroy_poll_group(io_device, ctx_buf);
+			return -1;
+		}
+	}
+
+	group->poller = spdk_poller_register(spdk_nvmf_poll_group_poll, group, 0);
+	group->thread = spdk_get_thread();
+
+	return 0;
+}
+
+static void
+spdk_nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
+{
+	struct spdk_nvmf_poll_group *group = ctx_buf;
+	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	uint32_t sid, nsid;
+
+	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
+		TAILQ_REMOVE(&group->tgroups, tgroup, link);
+		spdk_nvmf_transport_poll_group_destroy(tgroup);
+	}
+
+	for (sid = 0; sid < group->num_sgroups; sid++) {
+		sgroup = &group->sgroups[sid];
+
+		for (nsid = 0; nsid < sgroup->num_channels; nsid++) {
+			if (sgroup->channels[nsid]) {
+				spdk_put_io_channel(sgroup->channels[nsid]);
+				sgroup->channels[nsid] = NULL;
+			}
+		}
+
+		free(sgroup->channels);
+	}
+
+	free(group->sgroups);
+}
+
+static void
+_nvmf_tgt_disconnect_next_qpair(void *ctx)
+{
+	struct spdk_nvmf_qpair *qpair;
+	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+	struct spdk_nvmf_poll_group *group = qpair_ctx->group;
+	struct spdk_io_channel *ch;
+	int rc = 0;
+
+	qpair = TAILQ_FIRST(&group->qpairs);
+
+	if (qpair) {
+		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
+	}
+
+	if (!qpair || rc != 0) {
+		/* When the refcount from the channels reaches 0, spdk_nvmf_tgt_destroy_poll_group will be called. */
+		ch = spdk_io_channel_from_ctx(group);
+		spdk_put_io_channel(ch);
+		free(qpair_ctx);
+	}
+}
+
+static void
+spdk_nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
+{
+	struct nvmf_qpair_disconnect_many_ctx *ctx;
+
+	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+	if (!ctx) {
+		SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
+		return;
+	}
+
+	spdk_poller_unregister(&group->poller);
+
+	ctx->group = group;
+	_nvmf_tgt_disconnect_next_qpair(ctx);
+}
+
+struct spdk_nvmf_tgt *
+spdk_nvmf_tgt_create(struct spdk_nvmf_tgt_opts *opts)
+{
+	struct spdk_nvmf_tgt *tgt;
+
+	tgt = calloc(1, sizeof(*tgt));
+	if (!tgt) {
+		return NULL;
+	}
+
+	if (!opts) {
+		spdk_nvmf_tgt_opts_init(&tgt->opts);
+	} else {
+		tgt->opts = *opts;
+	}
+
+	tgt->discovery_genctr = 0;
+	tgt->discovery_log_page = NULL;
+	tgt->discovery_log_page_size = 0;
+	TAILQ_INIT(&tgt->transports);
+
+	tgt->subsystems = calloc(tgt->opts.max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
+	if (!tgt->subsystems) {
+		free(tgt);
+		return NULL;
+	}
+
+	spdk_io_device_register(tgt,
+				spdk_nvmf_tgt_create_poll_group,
+				spdk_nvmf_tgt_destroy_poll_group,
+				sizeof(struct spdk_nvmf_poll_group),
+				"nvmf_tgt");
+
+	return tgt;
+}
+
+static void
+spdk_nvmf_tgt_destroy_cb(void *io_device)
+{
+	struct spdk_nvmf_tgt *tgt = io_device;
+	struct spdk_nvmf_transport *transport, *transport_tmp;
+	spdk_nvmf_tgt_destroy_done_fn		*destroy_cb_fn;
+	void					*destroy_cb_arg;
+	uint32_t i;
+
+	if (tgt->discovery_log_page) {
+		free(tgt->discovery_log_page);
+	}
+
+	if (tgt->subsystems) {
+		for (i = 0; i < tgt->opts.max_subsystems; i++) {
+			if (tgt->subsystems[i]) {
+				spdk_nvmf_subsystem_destroy(tgt->subsystems[i]);
+			}
+		}
+		free(tgt->subsystems);
+	}
+
+	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, transport_tmp) {
+		TAILQ_REMOVE(&tgt->transports, transport, link);
+		spdk_nvmf_transport_destroy(transport);
+	}
+
+	destroy_cb_fn = tgt->destroy_cb_fn;
+	destroy_cb_arg = tgt->destroy_cb_arg;
+
+	free(tgt);
+
+	if (destroy_cb_fn) {
+		destroy_cb_fn(destroy_cb_arg, 0);
+	}
+}
+
+void
+spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
+		      spdk_nvmf_tgt_destroy_done_fn cb_fn,
+		      void *cb_arg)
+{
+	tgt->destroy_cb_fn = cb_fn;
+	tgt->destroy_cb_arg = cb_arg;
+
+	spdk_io_device_unregister(tgt, spdk_nvmf_tgt_destroy_cb);
+}
+
+static void
+spdk_nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
+				      struct spdk_nvmf_subsystem *subsystem)
+{
+	struct spdk_nvmf_host *host;
+	struct spdk_nvmf_listener *listener;
+	const struct spdk_nvme_transport_id *trid;
+	struct spdk_nvmf_ns *ns;
+	struct spdk_nvmf_ns_opts ns_opts;
+	uint32_t max_namespaces;
+	char uuid_str[SPDK_UUID_STRING_LEN];
+	const char *trtype;
+	const char *adrfam;
+
+	if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
+		return;
+	}
+
+	/* { */
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "nvmf_subsystem_create");
+
+	/*     "params" : { */
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+	spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
+	spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
+
+	max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
+	if (max_namespaces != 0) {
+		spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
+	}
+
+	/*     } "params" */
+	spdk_json_write_object_end(w);
+
+	/* } */
+	spdk_json_write_object_end(w);
+
+	for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
+	     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
+		trid = spdk_nvmf_listener_get_trid(listener);
+
+		trtype = spdk_nvme_transport_id_trtype_str(trid->trtype);
+		adrfam = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
+
+		/*     "params" : { */
+		spdk_json_write_named_object_begin(w, "params");
+
+		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+		/*     "listen_address" : { */
+		spdk_json_write_named_object_begin(w, "listen_address");
+
+		spdk_json_write_named_string(w, "trtype", trtype);
+		if (adrfam) {
+			spdk_json_write_named_string(w, "adrfam", adrfam);
+		}
+
+		spdk_json_write_named_string(w, "traddr", trid->traddr);
+		spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+		/*     } "listen_address" */
+		spdk_json_write_object_end(w);
+
+		/*     } "params" */
+		spdk_json_write_object_end(w);
+
+		/* } */
+		spdk_json_write_object_end(w);
+	}
+
+	for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
+	     host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
+
+		/*     "params" : { */
+		spdk_json_write_named_object_begin(w, "params");
+
+		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+		spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
+
+		/*     } "params" */
+		spdk_json_write_object_end(w);
+
+		/* } */
+		spdk_json_write_object_end(w);
+	}
+
+	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
+	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
+		spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
+
+		/*     "params" : { */
+		spdk_json_write_named_object_begin(w, "params");
+
+		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
+
+		/*     "namespace" : { */
+		spdk_json_write_named_object_begin(w, "namespace");
+
+		spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
+		spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
+
+		if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
+			SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
+			spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
+							 from_be64(&ns_opts.nguid[8]));
+		}
+
+		if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
+			SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
+			spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
+		}
+
+		if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
+			spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
+			spdk_json_write_named_string(w, "uuid",  uuid_str);
+		}
+
+		/*     "namespace" */
+		spdk_json_write_object_end(w);
+
+		/*     } "params" */
+		spdk_json_write_object_end(w);
+
+		/* } */
+		spdk_json_write_object_end(w);
+	}
+}
+
+void
+spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
+{
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_transport *transport;
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "set_nvmf_target_options");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_uint32(w, "max_queue_depth", tgt->opts.max_queue_depth);
+	spdk_json_write_named_uint32(w, "max_qpairs_per_ctrlr", tgt->opts.max_qpairs_per_ctrlr);
+	spdk_json_write_named_uint32(w, "in_capsule_data_size", tgt->opts.in_capsule_data_size);
+	spdk_json_write_named_uint32(w, "max_io_size", tgt->opts.max_io_size);
+	spdk_json_write_named_uint32(w, "max_subsystems", tgt->opts.max_subsystems);
+	spdk_json_write_named_uint32(w, "io_unit_size", tgt->opts.io_unit_size);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	/* write transports */
+	TAILQ_FOREACH(transport, &tgt->transports, link) {
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "nvmf_create_transport");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "trtype", spdk_nvme_transport_id_trtype_str(transport->ops->type));
+		spdk_json_write_named_uint32(w, "max_queue_depth", transport->opts.max_queue_depth);
+		spdk_json_write_named_uint32(w, "max_qpairs_per_ctrlr", transport->opts.max_qpairs_per_ctrlr);
+		spdk_json_write_named_uint32(w, "in_capsule_data_size", transport->opts.in_capsule_data_size);
+		spdk_json_write_named_uint32(w, "max_io_size", transport->opts.max_io_size);
+		spdk_json_write_named_uint32(w, "io_unit_size", transport->opts.io_unit_size);
+		spdk_json_write_named_uint32(w, "max_aq_depth", transport->opts.max_aq_depth);
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+
+	subsystem = spdk_nvmf_subsystem_get_first(tgt);
+	while (subsystem) {
+		spdk_nvmf_write_subsystem_config_json(w, subsystem);
+		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
+	}
+}
+
+void
+spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
+		     struct spdk_nvme_transport_id *trid,
+		     spdk_nvmf_tgt_listen_done_fn cb_fn,
+		     void *cb_arg)
+{
+	struct spdk_nvmf_transport *transport;
+	int rc;
+	bool propagate = false;
+
+	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trtype);
+	if (!transport) {
+		struct spdk_nvmf_transport_opts opts;
+
+		opts.max_queue_depth = tgt->opts.max_queue_depth;
+		opts.max_qpairs_per_ctrlr = tgt->opts.max_qpairs_per_ctrlr;
+		opts.in_capsule_data_size = tgt->opts.in_capsule_data_size;
+		opts.max_io_size = tgt->opts.max_io_size;
+		opts.io_unit_size = tgt->opts.io_unit_size;
+		/* use max_queue depth since tgt. opts. doesn't have max_aq_depth */
+		opts.max_aq_depth = tgt->opts.max_queue_depth;
+
+		transport = spdk_nvmf_transport_create(trid->trtype, &opts);
+		if (!transport) {
+			SPDK_ERRLOG("Transport initialization failed\n");
+			cb_fn(cb_arg, -EINVAL);
+			return;
+		}
+
+		propagate = true;
+	}
+
+	rc = spdk_nvmf_transport_listen(transport, trid);
+	if (rc < 0) {
+		SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
+		cb_fn(cb_arg, rc);
+		return;
+	}
+
+	tgt->discovery_genctr++;
+
+	if (propagate) {
+		spdk_nvmf_tgt_add_transport(tgt, transport, cb_fn, cb_arg);
+	} else {
+		cb_fn(cb_arg, 0);
+	}
+}
+
+struct spdk_nvmf_tgt_add_transport_ctx {
+	struct spdk_nvmf_tgt *tgt;
+	struct spdk_nvmf_transport *transport;
+	spdk_nvmf_tgt_add_transport_done_fn cb_fn;
+	void *cb_arg;
+};
+
+static void
+_spdk_nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+	ctx->cb_fn(ctx->cb_arg, status);
+
+	free(ctx);
+}
+
+static void
+_spdk_nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
+{
+	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
+	int rc;
+
+	rc = spdk_nvmf_poll_group_add_transport(group, ctx->transport);
+	spdk_for_each_channel_continue(i, rc);
+}
+
+void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
+				 struct spdk_nvmf_transport *transport,
+				 spdk_nvmf_tgt_add_transport_done_fn cb_fn,
+				 void *cb_arg)
+{
+	struct spdk_nvmf_tgt_add_transport_ctx *ctx;
+
+	if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->type)) {
+		cb_fn(cb_arg, -EEXIST);
+		return; /* transport already created */
+	}
+
+	transport->tgt = tgt;
+	TAILQ_INSERT_TAIL(&tgt->transports, transport, link);
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		cb_fn(cb_arg, -ENOMEM);
+		return;
+	}
+
+	ctx->tgt = tgt;
+	ctx->transport = transport;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	spdk_for_each_channel(tgt,
+			      _spdk_nvmf_tgt_add_transport,
+			      ctx,
+			      _spdk_nvmf_tgt_add_transport_done);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
+{
+	struct spdk_nvmf_subsystem	*subsystem;
+	uint32_t sid;
+
+	if (!subnqn) {
+		return NULL;
+	}
+
+	for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+		subsystem = tgt->subsystems[sid];
+		if (subsystem == NULL) {
+			continue;
+		}
+
+		if (strcmp(subnqn, subsystem->subnqn) == 0) {
+			return subsystem;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, enum spdk_nvme_transport_type type)
+{
+	struct spdk_nvmf_transport *transport;
+
+	TAILQ_FOREACH(transport, &tgt->transports, link) {
+		if (transport->ops->type == type) {
+			return transport;
+		}
+	}
+
+	return NULL;
+}
+
+void
+spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt, new_qpair_fn cb_fn)
+{
+	struct spdk_nvmf_transport *transport, *tmp;
+
+	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
+		spdk_nvmf_transport_accept(transport, cb_fn);
+	}
+}
+
+struct spdk_nvmf_poll_group *
+spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
+{
+	struct spdk_io_channel *ch;
+
+	ch = spdk_get_io_channel(tgt);
+	if (!ch) {
+		SPDK_ERRLOG("Unable to get I/O channel for target\n");
+		return NULL;
+	}
+
+	return spdk_io_channel_get_ctx(ch);
+}
+
+void
+spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group)
+{
+	/* This function will put the io_channel associated with this poll group */
+	spdk_nvmf_tgt_destroy_poll_group_qpairs(group);
+}
+
+int
+spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
+			 struct spdk_nvmf_qpair *qpair)
+{
+	int rc = -1;
+	struct spdk_nvmf_transport_poll_group *tgroup;
+
+	TAILQ_INIT(&qpair->outstanding);
+	qpair->group = group;
+	spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVATING);
+
+	TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
+
+	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+		if (tgroup->transport == qpair->transport) {
+			rc = spdk_nvmf_transport_poll_group_add(tgroup, qpair);
+			break;
+		}
+	}
+
+	if (rc == 0) {
+		spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
+	} else {
+		spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_INACTIVE);
+	}
+
+	return rc;
+}
+
+static
+void _nvmf_ctrlr_destruct(void *ctx)
+{
+	struct spdk_nvmf_ctrlr *ctrlr = ctx;
+
+	spdk_nvmf_ctrlr_destruct(ctrlr);
+}
+
+static void
+_spdk_nvmf_ctrlr_free_from_qpair(void *ctx)
+{
+	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+	struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
+	uint32_t count;
+
+	spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
+	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
+	if (count == 0) {
+		spdk_bit_array_free(&ctrlr->qpair_mask);
+
+		spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
+	}
+
+	if (qpair_ctx->cb_fn) {
+		spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+	}
+	free(qpair_ctx);
+}
+
+static void
+_spdk_nvmf_qpair_destroy(void *ctx, int status)
+{
+	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
+	struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
+	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
+
+	assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
+	spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_INACTIVE);
+	qpair_ctx->qid = qpair->qid;
+
+	TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
+	qpair->group = NULL;
+
+	spdk_nvmf_transport_qpair_fini(qpair);
+
+	if (!ctrlr || !ctrlr->thread) {
+		if (qpair_ctx->cb_fn) {
+			spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
+		}
+		free(qpair_ctx);
+		return;
+	}
+
+	qpair_ctx->ctrlr = ctrlr;
+	spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_free_from_qpair, qpair_ctx);
+
+}
+
+int
+spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
+{
+	struct nvmf_qpair_disconnect_ctx *qpair_ctx;
+
+	/* If we get a qpair in the uninitialized state, we can just destroy it immediately */
+	if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
+		spdk_nvmf_transport_qpair_fini(qpair);
+		if (cb_fn) {
+			cb_fn(ctx);
+		}
+		return 0;
+	}
+
+	/* The queue pair must be disconnected from the thread that owns it */
+	assert(qpair->group->thread == spdk_get_thread());
+
+	if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING ||
+	    qpair->state == SPDK_NVMF_QPAIR_INACTIVE) {
+		/* This can occur if the connection is killed by the target,
+		 * which results in a notification that the connection
+		 * died. Send a message to defer the processing of this
+		 * callback. This allows the stack to unwind in the case
+		 * where a bunch of connections are disconnected in
+		 * a loop. */
+		if (cb_fn) {
+			spdk_thread_send_msg(qpair->group->thread, cb_fn, ctx);
+		}
+		return 0;
+	}
+
+	assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
+	spdk_nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
+
+	qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
+	if (!qpair_ctx) {
+		SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
+		return -ENOMEM;
+	}
+
+	qpair_ctx->qpair = qpair;
+	qpair_ctx->cb_fn = cb_fn;
+	qpair_ctx->thread = qpair->group->thread;
+	qpair_ctx->ctx = ctx;
+
+	/* Check for outstanding I/O */
+	if (!TAILQ_EMPTY(&qpair->outstanding)) {
+		qpair->state_cb = _spdk_nvmf_qpair_destroy;
+		qpair->state_cb_arg = qpair_ctx;
+		spdk_nvmf_qpair_free_aer(qpair);
+		return 0;
+	}
+
+	_spdk_nvmf_qpair_destroy(qpair_ctx, 0);
+
+	return 0;
+}
+
+int
+spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+			      struct spdk_nvme_transport_id *trid)
+{
+	return spdk_nvmf_transport_qpair_get_peer_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+			       struct spdk_nvme_transport_id *trid)
+{
+	return spdk_nvmf_transport_qpair_get_local_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+				struct spdk_nvme_transport_id *trid)
+{
+	return spdk_nvmf_transport_qpair_get_listen_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+				   struct spdk_nvmf_transport *transport)
+{
+	struct spdk_nvmf_transport_poll_group *tgroup;
+
+	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
+		if (tgroup->transport == transport) {
+			/* Transport already in the poll group */
+			return 0;
+		}
+	}
+
+	tgroup = spdk_nvmf_transport_poll_group_create(transport);
+	if (!tgroup) {
+		SPDK_ERRLOG("Unable to create poll group for transport\n");
+		return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
+
+	return 0;
+}
+
+static int
+poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+			    struct spdk_nvmf_subsystem *subsystem)
+{
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	uint32_t new_num_channels, old_num_channels;
+	uint32_t i;
+	struct spdk_nvmf_ns *ns;
+
+	/* Make sure our poll group has memory for this subsystem allocated */
+	if (subsystem->id >= group->num_sgroups) {
+		return -ENOMEM;
+	}
+
+	sgroup = &group->sgroups[subsystem->id];
+
+	/* Make sure the array of channels is the correct size */
+	new_num_channels = subsystem->max_nsid;
+	old_num_channels = sgroup->num_channels;
+
+	if (old_num_channels == 0) {
+		if (new_num_channels > 0) {
+			/* First allocation */
+			sgroup->channels = calloc(new_num_channels, sizeof(sgroup->channels[0]));
+			if (!sgroup->channels) {
+				return -ENOMEM;
+			}
+		}
+	} else if (new_num_channels > old_num_channels) {
+		void *buf;
+
+		/* Make the array larger */
+		buf = realloc(sgroup->channels, new_num_channels * sizeof(sgroup->channels[0]));
+		if (!buf) {
+			return -ENOMEM;
+		}
+
+		sgroup->channels = buf;
+
+		/* Null out the new channels slots */
+		for (i = old_num_channels; i < new_num_channels; i++) {
+			sgroup->channels[i] = NULL;
+		}
+	} else if (new_num_channels < old_num_channels) {
+		void *buf;
+
+		/* Free the extra I/O channels */
+		for (i = new_num_channels; i < old_num_channels; i++) {
+			if (sgroup->channels[i]) {
+				spdk_put_io_channel(sgroup->channels[i]);
+				sgroup->channels[i] = NULL;
+			}
+		}
+
+		/* Make the array smaller */
+		if (new_num_channels > 0) {
+			buf = realloc(sgroup->channels, new_num_channels * sizeof(sgroup->channels[0]));
+			if (!buf) {
+				return -ENOMEM;
+			}
+			sgroup->channels = buf;
+		} else {
+			free(sgroup->channels);
+			sgroup->channels = NULL;
+		}
+	}
+
+	sgroup->num_channels = new_num_channels;
+
+	/* Detect bdevs that were added or removed */
+	for (i = 0; i < sgroup->num_channels; i++) {
+		ns = subsystem->ns[i];
+		if (ns == NULL && sgroup->channels[i] == NULL) {
+			/* Both NULL. Leave empty */
+		} else if (ns == NULL && sgroup->channels[i] != NULL) {
+			/* There was a channel here, but the namespace is gone. */
+			spdk_put_io_channel(sgroup->channels[i]);
+			sgroup->channels[i] = NULL;
+		} else if (ns != NULL && sgroup->channels[i] == NULL) {
+			/* A namespace appeared but there is no channel yet */
+			sgroup->channels[i] = spdk_bdev_get_io_channel(ns->desc);
+			if (sgroup->channels[i] == NULL) {
+				SPDK_ERRLOG("Could not allocate I/O channel.\n");
+				return -ENOMEM;
+			}
+		} else {
+			/* A namespace was present before and didn't change. */
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+				      struct spdk_nvmf_subsystem *subsystem)
+{
+	return poll_group_update_subsystem(group, subsystem);
+}
+
+int
+spdk_nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+				   struct spdk_nvmf_subsystem *subsystem,
+				   spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+	int rc = 0;
+	struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
+
+	TAILQ_INIT(&sgroup->queued);
+
+	rc = poll_group_update_subsystem(group, subsystem);
+	if (rc) {
+		spdk_nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
+		goto fini;
+	}
+
+	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+fini:
+	if (cb_fn) {
+		cb_fn(cb_arg, rc);
+	}
+
+	return rc;
+}
+
+static void
+_nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
+{
+	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_poll_group *group;
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
+	void *cpl_ctx = NULL;
+	uint32_t nsid;
+
+	group = qpair_ctx->group;
+	subsystem = qpair_ctx->subsystem;
+	cpl_fn = qpair_ctx->cpl_fn;
+	cpl_ctx = qpair_ctx->cpl_ctx;
+	sgroup = &group->sgroups[subsystem->id];
+
+	if (status) {
+		goto fini;
+	}
+
+	for (nsid = 0; nsid < sgroup->num_channels; nsid++) {
+		if (sgroup->channels[nsid]) {
+			spdk_put_io_channel(sgroup->channels[nsid]);
+			sgroup->channels[nsid] = NULL;
+		}
+	}
+
+	sgroup->num_channels = 0;
+	free(sgroup->channels);
+	sgroup->channels = NULL;
+fini:
+	free(qpair_ctx);
+	if (cpl_fn) {
+		cpl_fn(cpl_ctx, status);
+	}
+}
+
+static void
+_nvmf_subsystem_disconnect_next_qpair(void *ctx)
+{
+	struct spdk_nvmf_qpair *qpair;
+	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_poll_group *group;
+	int rc = 0;
+
+	group = qpair_ctx->group;
+	subsystem = qpair_ctx->subsystem;
+
+	TAILQ_FOREACH(qpair, &group->qpairs, link) {
+		if (qpair->ctrlr->subsys == subsystem) {
+			break;
+		}
+	}
+
+	if (qpair) {
+		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, qpair_ctx);
+	}
+
+	if (!qpair || rc != 0) {
+		_nvmf_poll_group_remove_subsystem_cb(ctx, rc);
+	}
+	return;
+}
+
+void
+spdk_nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+				      struct spdk_nvmf_subsystem *subsystem,
+				      spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+	struct spdk_nvmf_qpair *qpair;
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	struct nvmf_qpair_disconnect_many_ctx *ctx;
+	int rc = 0;
+
+	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
+
+	if (!ctx) {
+		SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
+		goto fini;
+	}
+
+	ctx->group = group;
+	ctx->subsystem = subsystem;
+	ctx->cpl_fn = cb_fn;
+	ctx->cpl_ctx = cb_arg;
+
+	sgroup = &group->sgroups[subsystem->id];
+	sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+
+	TAILQ_FOREACH(qpair, &group->qpairs, link) {
+		if (qpair->ctrlr->subsys == subsystem) {
+			break;
+		}
+	}
+
+	if (qpair) {
+		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_subsystem_disconnect_next_qpair, ctx);
+	} else {
+		/* call the callback immediately. It will handle any channel iteration */
+		_nvmf_poll_group_remove_subsystem_cb(ctx, 0);
+	}
+
+	if (rc != 0) {
+		free(ctx);
+		goto fini;
+	}
+
+	return;
+fini:
+	if (cb_fn) {
+		cb_fn(cb_arg, rc);
+	}
+}
+
+void
+spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+				     struct spdk_nvmf_subsystem *subsystem,
+				     spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	int rc = 0;
+
+	if (subsystem->id >= group->num_sgroups) {
+		rc = -1;
+		goto fini;
+	}
+
+	sgroup = &group->sgroups[subsystem->id];
+	if (sgroup == NULL) {
+		rc = -1;
+		goto fini;
+	}
+
+	assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE);
+	/* TODO: This currently does not quiesce I/O */
+	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+fini:
+	if (cb_fn) {
+		cb_fn(cb_arg, rc);
+	}
+}
+
+void
+spdk_nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+				      struct spdk_nvmf_subsystem *subsystem,
+				      spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
+{
+	struct spdk_nvmf_request *req, *tmp;
+	struct spdk_nvmf_subsystem_poll_group *sgroup;
+	int rc = 0;
+
+	if (subsystem->id >= group->num_sgroups) {
+		rc = -1;
+		goto fini;
+	}
+
+	sgroup = &group->sgroups[subsystem->id];
+
+	assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
+
+	rc = poll_group_update_subsystem(group, subsystem);
+	if (rc) {
+		goto fini;
+	}
+
+	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+
+	/* Release all queued requests */
+	TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
+		TAILQ_REMOVE(&sgroup->queued, req, link);
+		spdk_nvmf_request_exec(req);
+	}
+fini:
+	if (cb_fn) {
+		cb_fn(cb_arg, rc);
+	}
+}
diff --git a/src/spdk/lib/nvmf/nvmf_fc.h b/src/spdk/lib/nvmf/nvmf_fc.h
new file mode 100644
index 00000000..bf086831
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_fc.h
@@ -0,0 +1,871 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2018 Broadcom.  All Rights Reserved.
+ *   The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_FC_H__
+#define __NVMF_FC_H__
+
+#include "spdk/nvmf.h"
+#include "spdk/assert.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/nvmf_fc_spec.h"
+#include "spdk/event.h"
+#include "spdk/io_channel.h"
+#include "nvmf_internal.h"
+
+#define SPDK_NVMF_FC_TR_ADDR_LEN 64
+
+/*
+ * FC HW port states.
+ */
+enum spdk_fc_port_state {
+	SPDK_FC_PORT_OFFLINE = 0,
+	SPDK_FC_PORT_ONLINE = 1,
+	SPDK_FC_PORT_QUIESCED = 2,
+};
+
+enum spdk_fc_hwqp_state {
+	SPDK_FC_HWQP_OFFLINE = 0,
+	SPDK_FC_HWQP_ONLINE = 1,
+};
+
+/*
+ * NVMF BCM FC Object state
+ * Add all the generic states of the object here.
+ * Specific object states can be added separately
+ */
+enum spdk_nvmf_fc_object_state {
+	SPDK_NVMF_FC_OBJECT_CREATED = 0,
+	SPDK_NVMF_FC_OBJECT_TO_BE_DELETED = 1,
+	SPDK_NVMF_FC_OBJECT_ZOMBIE = 2,      /* Partial Create or Delete  */
+};
+
+/*
+ * FC request state
+ */
+enum spdk_nvmf_fc_request_state {
+	SPDK_NVMF_FC_REQ_INIT = 0,
+	SPDK_NVMF_FC_REQ_READ_BDEV,
+	SPDK_NVMF_FC_REQ_READ_XFER,
+	SPDK_NVMF_FC_REQ_READ_RSP,
+	SPDK_NVMF_FC_REQ_WRITE_BUFFS,
+	SPDK_NVMF_FC_REQ_WRITE_XFER,
+	SPDK_NVMF_FC_REQ_WRITE_BDEV,
+	SPDK_NVMF_FC_REQ_WRITE_RSP,
+	SPDK_NVMF_FC_REQ_NONE_BDEV,
+	SPDK_NVMF_FC_REQ_NONE_RSP,
+	SPDK_NVMF_FC_REQ_SUCCESS,
+	SPDK_NVMF_FC_REQ_FAILED,
+	SPDK_NVMF_FC_REQ_ABORTED,
+	SPDK_NVMF_FC_REQ_PENDING,
+	SPDK_NVMF_FC_REQ_MAX_STATE,
+};
+
+/*
+ * FC HWQP pointer
+ */
+typedef void *spdk_nvmf_fc_lld_hwqp_t;
+
+/*
+ * FC World Wide Name
+ */
+struct spdk_nvmf_fc_wwn {
+	union {
+		uint64_t wwn; /* World Wide Names consist of eight bytes */
+		uint8_t octets[sizeof(uint64_t)];
+	} u;
+};
+
+/*
+ * Generic DMA buffer descriptor
+ */
+struct spdk_nvmf_fc_buffer_desc {
+	void *virt;
+	uint64_t phys;
+	size_t len;
+
+	/* Internal */
+	uint32_t buf_index;
+};
+
+/*
+ * ABTS hadling context
+ */
+struct spdk_nvmf_fc_abts_ctx {
+	bool handled;
+	uint16_t hwqps_responded;
+	uint16_t rpi;
+	uint16_t oxid;
+	uint16_t rxid;
+	struct spdk_nvmf_fc_nport *nport;
+	uint16_t nport_hdl;
+	uint8_t port_hdl;
+	void *abts_poller_args;
+	void *sync_poller_args;
+	int num_hwqps;
+	bool queue_synced;
+	uint64_t u_id;
+	struct spdk_nvmf_fc_hwqp *ls_hwqp;
+	uint16_t fcp_rq_id;
+};
+
+/*
+ * NVME FC transport errors
+ */
+struct spdk_nvmf_fc_errors {
+	uint32_t no_xri;
+	uint32_t nport_invalid;
+	uint32_t unknown_frame;
+	uint32_t wqe_cmplt_err;
+	uint32_t wqe_write_err;
+	uint32_t rq_status_err;
+	uint32_t rq_buf_len_err;
+	uint32_t rq_id_err;
+	uint32_t rq_index_err;
+	uint32_t invalid_cq_type;
+	uint32_t invalid_cq_id;
+	uint32_t fc_req_buf_err;
+	uint32_t aq_buf_alloc_err;
+	uint32_t write_buf_alloc_err;
+	uint32_t read_buf_alloc_err;
+	uint32_t unexpected_err;
+	uint32_t nvme_cmd_iu_err;
+	uint32_t nvme_cmd_xfer_err;
+	uint32_t queue_entry_invalid;
+	uint32_t invalid_conn_err;
+	uint32_t fcp_rsp_failure;
+	uint32_t write_failed;
+	uint32_t read_failed;
+	uint32_t rport_invalid;
+	uint32_t num_aborted;
+	uint32_t num_abts_sent;
+};
+
+/*
+ *  Send Single Request/Response Sequence.
+ */
+struct spdk_nvmf_fc_send_srsr {
+	struct spdk_nvmf_fc_buffer_desc rqst;
+	struct spdk_nvmf_fc_buffer_desc rsp;
+	struct spdk_nvmf_fc_buffer_desc sgl; /* Note: Len = (2 * bcm_sge_t) */
+	uint16_t rpi;
+};
+
+/*
+ * Struct representing a nport
+ */
+struct spdk_nvmf_fc_nport {
+
+	uint16_t nport_hdl;
+	uint8_t port_hdl;
+	uint32_t d_id;
+	enum spdk_nvmf_fc_object_state nport_state;
+	struct spdk_nvmf_fc_wwn fc_nodename;
+	struct spdk_nvmf_fc_wwn fc_portname;
+
+	/* list of remote ports (i.e. initiators) connected to nport */
+	TAILQ_HEAD(, spdk_nvmf_fc_remote_port_info) rem_port_list;
+	uint32_t rport_count;
+
+	void *vendor_data;	/* available for vendor use */
+
+	/* list of associations to nport */
+	TAILQ_HEAD(, spdk_nvmf_fc_association) fc_associations;
+	uint32_t assoc_count;
+	struct spdk_nvmf_fc_port *fc_port;
+	TAILQ_ENTRY(spdk_nvmf_fc_nport) link; /* list of nports on a hw port. */
+};
+
+/*
+ * NVMF FC Connection
+ */
+struct spdk_nvmf_fc_conn {
+	struct spdk_nvmf_qpair qpair;
+
+	uint64_t conn_id;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	uint16_t esrp_ratio;
+	uint16_t rsp_count;
+	uint32_t rsn;
+
+	/* The maximum number of I/O outstanding on this connection at one time */
+	uint16_t max_queue_depth;
+	uint16_t max_rw_depth;
+	/* The current number of I/O outstanding on this connection. This number
+	 * includes all I/O from the time the capsule is first received until it is
+	 * completed.
+	 */
+	uint16_t cur_queue_depth;
+
+	/* number of read/write requests that are outstanding */
+	uint16_t cur_fc_rw_depth;
+
+	/* requests that are waiting to obtain xri/buffer */
+	TAILQ_HEAD(, spdk_nvmf_fc_request) pending_queue;
+
+	struct spdk_nvmf_fc_association *fc_assoc;
+
+	/* additional FC info here - TBD */
+	uint16_t rpi;
+
+	/* for association's connection list */
+	TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_link;
+
+	/* for assocations's available connection list */
+	TAILQ_ENTRY(spdk_nvmf_fc_conn) assoc_avail_link;
+
+	/* for hwqp's connection list */
+	TAILQ_ENTRY(spdk_nvmf_fc_conn) link;
+};
+
+/*
+ * Structure for maintaining the XRI's
+ */
+struct spdk_nvmf_fc_xri {
+	uint32_t xri;   /* The actual xri value */
+	/* Internal */
+	TAILQ_ENTRY(spdk_nvmf_fc_xri) link;
+	bool is_active;
+};
+
+struct spdk_nvmf_fc_poll_group;
+
+/*
+ *  HWQP poller structure passed from Master thread
+ */
+struct spdk_nvmf_fc_hwqp {
+	uint32_t lcore_id;   /* core hwqp is running on (for tracing purposes only) */
+	struct spdk_thread *thread;  /* thread hwqp is running on */
+	uint32_t hwqp_id;    /* A unique id (per physical port) for a hwqp */
+	uint32_t rq_size;    /* receive queue size */
+	spdk_nvmf_fc_lld_hwqp_t queues;          /* vendor HW queue set */
+	struct spdk_nvmf_fc_port *fc_port; /* HW port structure for these queues */
+	struct spdk_nvmf_fc_poll_group *poll_group;
+
+	void *context;			/* Vendor Context */
+
+	TAILQ_HEAD(, spdk_nvmf_fc_conn) connection_list;
+	uint32_t num_conns; /* number of connections to queue */
+	uint16_t cid_cnt;   /* used to generate unique conn. id for RQ */
+	uint32_t free_q_slots; /* free q slots available for connections  */
+	enum spdk_fc_hwqp_state state;  /* Poller state (e.g. online, offline) */
+
+	/* Internal */
+	struct spdk_mempool *fc_request_pool;
+	TAILQ_HEAD(, spdk_nvmf_fc_request) in_use_reqs;
+
+	TAILQ_HEAD(, spdk_nvmf_fc_xri) pending_xri_list;
+
+	struct spdk_nvmf_fc_errors counters;
+	uint32_t send_frame_xri;
+	uint8_t send_frame_seqid;
+
+	/* Pending LS request waiting for XRI. */
+	TAILQ_HEAD(, spdk_nvmf_fc_ls_rqst) ls_pending_queue;
+
+	/* Sync req list */
+	TAILQ_HEAD(, spdk_nvmf_fc_poller_api_queue_sync_args) sync_cbs;
+
+	TAILQ_ENTRY(spdk_nvmf_fc_hwqp) link;
+};
+
+struct spdk_nvmf_fc_ls_rsrc_pool {
+	void *assocs_mptr;
+	uint32_t assocs_count;
+	TAILQ_HEAD(, spdk_nvmf_fc_association) assoc_free_list;
+
+	void *conns_mptr;
+	uint32_t conns_count;
+	TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conn_free_list;
+};
+
+/*
+ * FC HW port.
+ */
+struct spdk_nvmf_fc_port {
+	uint8_t port_hdl;
+	enum spdk_fc_port_state hw_port_status;
+	uint32_t xri_base;
+	uint32_t xri_count;
+	uint16_t fcp_rq_id;
+	struct spdk_ring *xri_ring;
+	struct spdk_nvmf_fc_hwqp ls_queue;
+	uint32_t num_io_queues;
+	struct spdk_nvmf_fc_hwqp *io_queues;
+	/*
+	 * List of nports on this HW port.
+	 */
+	TAILQ_HEAD(, spdk_nvmf_fc_nport)nport_list;
+	int	num_nports;
+	TAILQ_ENTRY(spdk_nvmf_fc_port) link;
+
+	struct spdk_nvmf_fc_ls_rsrc_pool ls_rsrc_pool;
+	struct spdk_mempool *io_rsrc_pool; /* Pools to store bdev_io's for this port */
+	void *port_ctx;
+};
+
+/*
+ * NVMF FC Request
+ */
+struct spdk_nvmf_fc_request {
+	struct spdk_nvmf_request req;
+	struct spdk_nvmf_fc_ersp_iu ersp;
+	uint32_t poller_lcore; /* for tracing purposes only */
+	struct spdk_thread *poller_thread;
+	uint16_t buf_index;
+	struct spdk_nvmf_fc_xri *xri;
+	uint16_t oxid;
+	uint16_t rpi;
+	struct spdk_nvmf_fc_conn *fc_conn;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	int state;
+	uint32_t transfered_len;
+	bool is_aborted;
+	uint32_t magic;
+	uint32_t s_id;
+	uint32_t d_id;
+	TAILQ_ENTRY(spdk_nvmf_fc_request) link;
+	TAILQ_ENTRY(spdk_nvmf_fc_request) pending_link;
+	TAILQ_HEAD(, spdk_nvmf_fc_caller_ctx) abort_cbs;
+};
+
+SPDK_STATIC_ASSERT(!offsetof(struct spdk_nvmf_fc_request, req),
+		   "FC request and NVMF request address don't match.");
+
+/*
+ * NVMF FC Association
+ */
+struct spdk_nvmf_fc_association {
+	uint64_t assoc_id;
+	uint32_t s_id;
+	struct spdk_nvmf_fc_nport *tgtport;
+	struct spdk_nvmf_fc_remote_port_info *rport;
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_nvmf_host *host;
+	enum spdk_nvmf_fc_object_state assoc_state;
+
+	char host_id[FCNVME_ASSOC_HOSTID_LEN];
+	char host_nqn[FCNVME_ASSOC_HOSTNQN_LEN];
+	char sub_nqn[FCNVME_ASSOC_HOSTNQN_LEN];
+
+	struct spdk_nvmf_fc_conn *aq_conn; /* connection for admin queue */
+
+	uint16_t conn_count;
+	TAILQ_HEAD(, spdk_nvmf_fc_conn) fc_conns;
+
+	void *conns_buf;
+	TAILQ_HEAD(, spdk_nvmf_fc_conn) avail_fc_conns;
+
+	TAILQ_ENTRY(spdk_nvmf_fc_association) link;
+
+	/* for port's association free list */
+	TAILQ_ENTRY(spdk_nvmf_fc_association) port_free_assoc_list_link;
+
+	void *ls_del_op_ctx; /* delete assoc. callback list */
+
+	/* req/resp buffers used to send disconnect to initiator */
+	struct spdk_nvmf_fc_send_srsr snd_disconn_bufs;
+};
+
+/*
+ * FC Remote Port
+ */
+struct spdk_nvmf_fc_remote_port_info {
+	uint32_t s_id;
+	uint32_t rpi;
+	uint32_t assoc_count;
+	struct spdk_nvmf_fc_wwn fc_nodename;
+	struct spdk_nvmf_fc_wwn fc_portname;
+	enum spdk_nvmf_fc_object_state rport_state;
+	TAILQ_ENTRY(spdk_nvmf_fc_remote_port_info) link;
+};
+
+/*
+ * Poller API error codes
+ */
+enum spdk_nvmf_fc_poller_api_ret {
+	SPDK_NVMF_FC_POLLER_API_SUCCESS = 0,
+	SPDK_NVMF_FC_POLLER_API_ERROR,
+	SPDK_NVMF_FC_POLLER_API_INVALID_ARG,
+	SPDK_NVMF_FC_POLLER_API_NO_CONN_ID,
+	SPDK_NVMF_FC_POLLER_API_DUP_CONN_ID,
+	SPDK_NVMF_FC_POLLER_API_OXID_NOT_FOUND,
+};
+
+/*
+ * Poller API definitions
+ */
+enum spdk_nvmf_fc_poller_api {
+	SPDK_NVMF_FC_POLLER_API_ADD_CONNECTION,
+	SPDK_NVMF_FC_POLLER_API_DEL_CONNECTION,
+	SPDK_NVMF_FC_POLLER_API_QUIESCE_QUEUE,
+	SPDK_NVMF_FC_POLLER_API_ACTIVATE_QUEUE,
+	SPDK_NVMF_FC_POLLER_API_ABTS_RECEIVED,
+	SPDK_NVMF_FC_POLLER_API_ADAPTER_EVENT,
+	SPDK_NVMF_FC_POLLER_API_AEN,
+	SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC,
+	SPDK_NVMF_FC_POLLER_API_QUEUE_SYNC_DONE,
+};
+
+/*
+ * Poller API callback function proto
+ */
+typedef void (*spdk_nvmf_fc_poller_api_cb)(void *cb_data, enum spdk_nvmf_fc_poller_api_ret ret);
+
+/*
+ * Poller API callback data
+ */
+struct spdk_nvmf_fc_poller_api_cb_info {
+	spdk_nvmf_fc_poller_api_cb cb_func;
+	void *cb_data;
+	enum spdk_nvmf_fc_poller_api_ret ret;
+};
+
+/*
+ * Poller API structures
+ */
+struct spdk_nvmf_fc_poller_api_add_connection_args {
+	struct spdk_nvmf_fc_conn *fc_conn;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_del_connection_args {
+	struct spdk_nvmf_fc_conn *fc_conn;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+	bool send_abts;
+	/* internal */
+	int fc_request_cnt;
+};
+
+struct spdk_nvmf_fc_poller_api_quiesce_queue_args {
+	void   *ctx;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_activate_queue_args {
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_abts_recvd_args {
+	struct spdk_nvmf_fc_abts_ctx *ctx;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+};
+
+struct spdk_nvmf_fc_poller_api_queue_sync_done_args {
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	uint64_t tag;
+};
+
+/*
+ * NVMF LS request structure
+ */
+struct spdk_nvmf_fc_ls_rqst {
+	struct spdk_nvmf_fc_buffer_desc rqstbuf;
+	struct spdk_nvmf_fc_buffer_desc rspbuf;
+	uint32_t rqst_len;
+	uint32_t rsp_len;
+	uint32_t rpi;
+	struct spdk_nvmf_fc_xri *xri;
+	uint16_t oxid;
+	void *private_data; /* for LLD only (LS does not touch) */
+	TAILQ_ENTRY(spdk_nvmf_fc_ls_rqst) ls_pending_link;
+	uint32_t s_id;
+	uint32_t d_id;
+	struct spdk_nvmf_fc_nport *nport;
+	struct spdk_nvmf_fc_remote_port_info *rport;
+	struct spdk_nvmf_tgt *nvmf_tgt;
+};
+
+/*
+ * RQ Buffer LS Overlay Structure
+ */
+#define FCNVME_LS_RSVD_SIZE (FCNVME_MAX_LS_BUFFER_SIZE - \
+	(sizeof(struct spdk_nvmf_fc_ls_rqst) + FCNVME_MAX_LS_REQ_SIZE + FCNVME_MAX_LS_RSP_SIZE))
+
+struct __attribute__((__packed__)) spdk_nvmf_fc_rq_buf_ls_request {
+	uint8_t rqst[FCNVME_MAX_LS_REQ_SIZE];
+	uint8_t resp[FCNVME_MAX_LS_RSP_SIZE];
+	struct spdk_nvmf_fc_ls_rqst ls_rqst;
+	uint8_t rsvd[FCNVME_LS_RSVD_SIZE];
+};
+
+SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fc_rq_buf_ls_request) ==
+		   FCNVME_MAX_LS_BUFFER_SIZE, "LS RQ Buffer overflow");
+
+
+struct spdk_nvmf_fc_poller_api_queue_sync_args {
+	uint64_t u_id;
+	struct spdk_nvmf_fc_hwqp *hwqp;
+	struct spdk_nvmf_fc_poller_api_cb_info cb_info;
+
+	/* Used internally by poller */
+	TAILQ_ENTRY(spdk_nvmf_fc_poller_api_queue_sync_args) link;
+};
+
+/*
+ * dump info
+ */
+struct spdk_nvmf_fc_queue_dump_info {
+	char *buffer;
+	int   offset;
+};
+#define SPDK_FC_HW_DUMP_BUF_SIZE (10 * 4096)
+
+static inline void
+spdk_nvmf_fc_dump_buf_print(struct spdk_nvmf_fc_queue_dump_info *dump_info, char *fmt, ...)
+{
+	uint64_t buffer_size = SPDK_FC_HW_DUMP_BUF_SIZE;
+	int32_t avail = (int32_t)(buffer_size - dump_info->offset);
+
+	if (avail > 0) {
+		va_list ap;
+		int32_t written;
+
+		va_start(ap, fmt);
+		written = vsnprintf(dump_info->buffer + dump_info->offset, avail, fmt, ap);
+		if (written >= avail) {
+			dump_info->offset += avail;
+		} else {
+			dump_info->offset += written;
+		}
+		va_end(ap);
+	}
+}
+
+/*
+ * NVMF FC caller callback definitions
+ */
+typedef void (*spdk_nvmf_fc_caller_cb)(void *hwqp, int32_t status, void *args);
+
+struct spdk_nvmf_fc_caller_ctx {
+	void *ctx;
+	spdk_nvmf_fc_caller_cb cb;
+	void *cb_args;
+	TAILQ_ENTRY(spdk_nvmf_fc_caller_ctx) link;
+};
+
+/*
+ * Low level FC driver function table (functions provided by vendor FC device driver)
+ */
+struct spdk_nvmf_fc_ll_drvr_ops {
+
+	/* initialize the low level driver */
+	int (*lld_init)(void);
+
+	/* low level driver finish */
+	void (*lld_fini)(void);
+
+	/* initialize hw queues */
+	int (*init_q)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+	void (*reinit_q)(spdk_nvmf_fc_lld_hwqp_t queues_prev,
+			 spdk_nvmf_fc_lld_hwqp_t queues_curr);
+
+	/* initialize hw queue buffers */
+	int (*init_q_buffers)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+	/* poll the hw queues for requests */
+	uint32_t (*poll_queue)(struct spdk_nvmf_fc_hwqp *hwqp);
+
+	/* receive data (for data-in requests) */
+	int (*recv_data)(struct spdk_nvmf_fc_request *fc_req);
+
+	/* send data (for data-out requests) */
+	int (*send_data)(struct spdk_nvmf_fc_request *fc_req);
+
+	/* release hw queust buffer */
+	void (*q_buffer_release)(struct spdk_nvmf_fc_hwqp *hwqp, uint16_t buff_idx);
+
+	/* transmist nvme response */
+	int (*xmt_rsp)(struct spdk_nvmf_fc_request *fc_req, uint8_t *ersp_buf, uint32_t ersp_len);
+
+	/* transmist LS response */
+	int (*xmt_ls_rsp)(struct spdk_nvmf_fc_nport *tgtport, struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+	/* issue abts */
+	int (*issue_abort)(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_xri *xri,
+			   bool send_abts, spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+	/* transmit abts response */
+	int (*xmt_bls_rsp)(struct spdk_nvmf_fc_hwqp *hwqp, uint16_t ox_id, uint16_t rx_id, uint16_t rpi,
+			   bool rjt, uint8_t rjt_exp, spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+	/* transmit single request - single response */
+	int (*xmt_srsr_req)(struct spdk_nvmf_fc_hwqp *hwqp, struct spdk_nvmf_fc_send_srsr *srsr,
+			    spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+	/* issue queue marker (abts processing) */
+	int (*issue_q_marker)(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t u_id, uint16_t skip_rq);
+
+	/* assign a new connection to a hwqp (return connection ID) */
+	struct spdk_nvmf_fc_hwqp *(*assign_conn_to_hwqp)(
+		struct spdk_nvmf_fc_hwqp *queues, uint32_t num_queues,
+		uint64_t *conn_id, uint32_t sq_size, bool for_aq);
+
+	/* get the hwqp from the given connection id */
+	struct spdk_nvmf_fc_hwqp *(*get_hwqp_from_conn_id)(struct spdk_nvmf_fc_hwqp *hwqp,
+			uint32_t num_queues, uint64_t conn_id);
+
+	/* release connection ID (done with using it) */
+	void (*release_conn)(struct spdk_nvmf_fc_hwqp *hwqp, uint64_t conn_id, uint32_t sq_size);
+
+	/* dump all queue info into dump_info */
+	void (*dump_all_queues)(struct spdk_nvmf_fc_hwqp *ls_queues,
+				struct spdk_nvmf_fc_hwqp *io_queues,
+				uint32_t num_queues,
+				struct spdk_nvmf_fc_queue_dump_info *dump_info);
+};
+
+extern struct spdk_nvmf_fc_ll_drvr_ops spdk_nvmf_fc_lld_ops;
+
+/*
+ * NVMF FC inline and function prototypes
+ */
+
+static inline struct spdk_nvmf_fc_request *
+spdk_nvmf_fc_get_fc_req(struct spdk_nvmf_request *req)
+{
+	return (struct spdk_nvmf_fc_request *)
+	       ((uintptr_t)req - offsetof(struct spdk_nvmf_fc_request, req));
+}
+
+static inline bool
+spdk_nvmf_fc_is_port_dead(struct spdk_nvmf_fc_hwqp *hwqp)
+{
+	switch (hwqp->fc_port->hw_port_status) {
+	case SPDK_FC_PORT_QUIESCED:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool
+spdk_nvmf_fc_req_in_xfer(struct spdk_nvmf_fc_request *fc_req)
+{
+	switch (fc_req->state) {
+	case SPDK_NVMF_FC_REQ_READ_XFER:
+	case SPDK_NVMF_FC_REQ_READ_RSP:
+	case SPDK_NVMF_FC_REQ_WRITE_XFER:
+	case SPDK_NVMF_FC_REQ_WRITE_RSP:
+	case SPDK_NVMF_FC_REQ_NONE_RSP:
+		return true;
+	default:
+		return false;
+	}
+}
+
+typedef void (*spdk_nvmf_fc_del_assoc_cb)(void *arg, uint32_t err);
+int spdk_nvmf_fc_delete_association(struct spdk_nvmf_fc_nport *tgtport,
+				    uint64_t assoc_id, bool send_abts,
+				    spdk_nvmf_fc_del_assoc_cb del_assoc_cb,
+				    void *cb_data);
+
+void spdk_nvmf_fc_ls_init(struct spdk_nvmf_fc_port *fc_port);
+
+void spdk_nvmf_fc_ls_fini(struct spdk_nvmf_fc_port *fc_port);
+
+struct spdk_nvmf_fc_port *spdk_nvmf_fc_port_list_get(uint8_t port_hdl);
+
+int spdk_nvmf_fc_nport_set_state(struct spdk_nvmf_fc_nport *nport,
+				 enum spdk_nvmf_fc_object_state state);
+
+int spdk_nvmf_fc_assoc_set_state(struct spdk_nvmf_fc_association *assoc,
+				 enum spdk_nvmf_fc_object_state state);
+
+bool spdk_nvmf_fc_nport_add_rem_port(struct spdk_nvmf_fc_nport *nport,
+				     struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+bool spdk_nvmf_fc_nport_remove_rem_port(struct spdk_nvmf_fc_nport *nport,
+					struct spdk_nvmf_fc_remote_port_info *rem_port);
+
+void spdk_nvmf_fc_init_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_reinit_poller_queues(struct spdk_nvmf_fc_hwqp *hwqp,
+				       void *queues_curr);
+
+void spdk_nvmf_fc_init_poller(struct spdk_nvmf_fc_port *fc_port,
+			      struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_add_hwqp_to_poller(struct spdk_nvmf_fc_hwqp *hwqp, bool admin_q);
+
+void spdk_nvmf_fc_remove_hwqp_from_poller(struct spdk_nvmf_fc_hwqp *hwqp);
+
+bool spdk_nvmf_fc_port_is_offline(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_port_set_offline(struct spdk_nvmf_fc_port *fc_port);
+
+bool spdk_nvmf_fc_port_is_online(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_port_set_online(struct spdk_nvmf_fc_port *fc_port);
+
+int spdk_nvmf_fc_hwqp_port_set_online(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_hwqp_port_set_offline(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_rport_set_state(struct spdk_nvmf_fc_remote_port_info *rport,
+				 enum spdk_nvmf_fc_object_state state);
+
+void spdk_nvmf_fc_port_list_add(struct spdk_nvmf_fc_port *fc_port);
+
+struct spdk_nvmf_fc_nport *spdk_nvmf_fc_nport_get(uint8_t port_hdl, uint16_t nport_hdl);
+
+int spdk_nvmf_fc_port_add_nport(struct spdk_nvmf_fc_port *fc_port,
+				struct spdk_nvmf_fc_nport *nport);
+
+uint32_t spdk_nvmf_fc_nport_get_association_count(struct spdk_nvmf_fc_nport *nport);
+
+int spdk_nvmf_fc_port_remove_nport(struct spdk_nvmf_fc_port *fc_port,
+				   struct spdk_nvmf_fc_nport *nport);
+
+uint32_t spdk_nvmf_fc_get_prli_service_params(void);
+
+bool spdk_nvmf_fc_nport_is_rport_empty(struct spdk_nvmf_fc_nport *nport);
+
+void spdk_nvmf_fc_handle_abts_frame(struct spdk_nvmf_fc_nport *nport,
+				    uint16_t rpi, uint16_t oxid,
+				    uint16_t rxid);
+
+void spdk_nvmf_fc_dump_all_queues(struct spdk_nvmf_fc_port *fc_port,
+				  struct spdk_nvmf_fc_queue_dump_info *dump_info);
+
+void spdk_nvmf_fc_handle_ls_rqst(struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+int spdk_nvmf_fc_xmt_ls_rsp(struct spdk_nvmf_fc_nport *tgtport,
+			    struct spdk_nvmf_fc_ls_rqst *ls_rqst);
+
+struct spdk_nvmf_fc_nport *spdk_nvmf_bcm_req_fc_nport_get(struct spdk_nvmf_request *req);
+
+struct spdk_nvmf_fc_association *spdk_nvmf_fc_get_ctrlr_assoc(struct spdk_nvmf_ctrlr *ctrlr);
+
+bool spdk_nvmf_fc_nport_is_association_empty(struct spdk_nvmf_fc_nport *nport);
+
+int spdk_nvmf_fc_xmt_srsr_req(struct spdk_nvmf_fc_hwqp *hwqp,
+			      struct spdk_nvmf_fc_send_srsr *srsr,
+			      spdk_nvmf_fc_caller_cb cb, void *cb_args);
+
+uint32_t spdk_nvmf_fc_get_num_nport_ctrlrs_in_subsystem(uint8_t port_hdl, uint16_t nport_hdl,
+		struct spdk_nvmf_subsystem *subsys);
+
+bool spdk_nvmf_fc_is_spdk_ctrlr_on_nport(uint8_t port_hdl, uint16_t nport_hdl,
+		struct spdk_nvmf_ctrlr *ctrlr);
+
+int spdk_nvmf_fc_get_ctrlr_init_traddr(char *traddr, struct spdk_nvmf_ctrlr *ctrlr);
+
+uint32_t spdk_nvmf_fc_get_hwqp_id(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_fc_req_abort(struct spdk_nvmf_fc_request *fc_req,
+			    bool send_abts, spdk_nvmf_fc_caller_cb cb,
+			    void *cb_args);
+
+int spdk_nvmf_fc_add_port_listen(void *arg1, void *arg2);
+
+int spdk_nvmf_fc_remove_port_listen(void *arg1, void *arg2);
+
+void spdk_nvmf_fc_subsys_connect_cb(void *cb_ctx,
+				    struct spdk_nvmf_request *req);
+
+void spdk_nvmf_fc_subsys_disconnect_cb(void *cb_ctx,
+				       struct spdk_nvmf_qpair *qpair);
+
+uint32_t spdk_nvmf_fc_get_master_lcore(void);
+
+struct spdk_thread *spdk_nvmf_fc_get_master_thread(void);
+
+/*
+ * These functions are used by low level FC driver
+ */
+
+static inline struct spdk_nvmf_fc_conn *
+spdk_nvmf_fc_get_conn(struct spdk_nvmf_qpair *qpair)
+{
+	return (struct spdk_nvmf_fc_conn *)
+	       ((uintptr_t)qpair - offsetof(struct spdk_nvmf_fc_conn, qpair));
+}
+
+static inline uint16_t
+spdk_nvmf_fc_advance_conn_sqhead(struct spdk_nvmf_qpair *qpair)
+{
+	/* advance sq_head pointer - wrap if needed */
+	qpair->sq_head = (qpair->sq_head == qpair->sq_head_max) ?
+			 0 : (qpair->sq_head + 1);
+	return qpair->sq_head;
+}
+
+static inline bool
+spdk_nvmf_fc_use_send_frame(struct spdk_nvmf_request *req)
+{
+	/* For now use for only keepalives. */
+	if (req->qpair->qid == 0 &&
+	    (req->cmd->nvme_cmd.opc == SPDK_NVME_OPC_KEEP_ALIVE)) {
+		return true;
+	}
+	return false;
+}
+
+enum spdk_nvmf_fc_poller_api_ret spdk_nvmf_fc_poller_api_func(
+	struct spdk_nvmf_fc_hwqp *hwqp,
+	enum spdk_nvmf_fc_poller_api api,
+	void *api_args);
+
+int spdk_nvmf_fc_process_frame(struct spdk_nvmf_fc_hwqp *hwqp, uint32_t buff_idx,
+			       struct spdk_nvmf_fc_frame_hdr *frame,
+			       struct spdk_nvmf_fc_buffer_desc *buffer, uint32_t plen);
+
+void spdk_nvmf_fc_process_pending_req(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_process_pending_ls_rqst(struct spdk_nvmf_fc_hwqp *hwqp);
+
+void spdk_nvmf_fc_req_set_state(struct spdk_nvmf_fc_request *fc_req,
+				enum spdk_nvmf_fc_request_state state);
+
+void spdk_nvmf_fc_free_req(struct spdk_nvmf_fc_request *fc_req);
+
+void spdk_nvmf_fc_req_abort_complete(void *arg1);
+
+bool spdk_nvmf_fc_send_ersp_required(struct spdk_nvmf_fc_request *fc_req,
+				     uint32_t rsp_cnt, uint32_t xfer_len);
+
+struct spdk_nvmf_fc_xri *spdk_nvmf_fc_get_xri(struct spdk_nvmf_fc_hwqp *hwqp);
+
+int spdk_nvmf_fc_put_xri(struct spdk_nvmf_fc_hwqp *hwqp,
+			 struct spdk_nvmf_fc_xri *xri);
+
+void spdk_nvmf_fc_release_xri(struct spdk_nvmf_fc_hwqp *hwqp,
+			      struct spdk_nvmf_fc_xri *xri, bool xb, bool abts);
+
+int spdk_nvmf_fc_handle_rsp(struct spdk_nvmf_fc_request *req);
+#endif
diff --git a/src/spdk/lib/nvmf/nvmf_internal.h b/src/spdk/lib/nvmf/nvmf_internal.h
new file mode 100644
index 00000000..c9c7bf36
--- /dev/null
+++ b/src/spdk/lib/nvmf/nvmf_internal.h
@@ -0,0 +1,333 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NVMF_INTERNAL_H__
+#define __NVMF_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/likely.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/assert.h"
+#include "spdk/bdev.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+#include "spdk/thread.h"
+
+#define SPDK_NVMF_MAX_SGL_ENTRIES	16
+
+enum spdk_nvmf_subsystem_state {
+	SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
+	SPDK_NVMF_SUBSYSTEM_ACTIVATING,
+	SPDK_NVMF_SUBSYSTEM_ACTIVE,
+	SPDK_NVMF_SUBSYSTEM_PAUSING,
+	SPDK_NVMF_SUBSYSTEM_PAUSED,
+	SPDK_NVMF_SUBSYSTEM_RESUMING,
+	SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
+};
+
+enum spdk_nvmf_qpair_state {
+	SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
+	SPDK_NVMF_QPAIR_INACTIVE,
+	SPDK_NVMF_QPAIR_ACTIVATING,
+	SPDK_NVMF_QPAIR_ACTIVE,
+	SPDK_NVMF_QPAIR_DEACTIVATING,
+	SPDK_NVMF_QPAIR_ERROR,
+};
+
+typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_tgt {
+	struct spdk_nvmf_tgt_opts		opts;
+
+	uint64_t				discovery_genctr;
+
+	/* Array of subsystem pointers of size max_subsystems indexed by sid */
+	struct spdk_nvmf_subsystem		**subsystems;
+
+	struct spdk_nvmf_discovery_log_page	*discovery_log_page;
+	size_t					discovery_log_page_size;
+	TAILQ_HEAD(, spdk_nvmf_transport)	transports;
+
+	spdk_nvmf_tgt_destroy_done_fn		*destroy_cb_fn;
+	void					*destroy_cb_arg;
+};
+
+struct spdk_nvmf_host {
+	char				*nqn;
+	TAILQ_ENTRY(spdk_nvmf_host)	link;
+};
+
+struct spdk_nvmf_listener {
+	struct spdk_nvme_transport_id	trid;
+	struct spdk_nvmf_transport	*transport;
+	TAILQ_ENTRY(spdk_nvmf_listener)	link;
+};
+
+struct spdk_nvmf_transport_poll_group {
+	struct spdk_nvmf_transport			*transport;
+	TAILQ_ENTRY(spdk_nvmf_transport_poll_group)	link;
+};
+
+struct spdk_nvmf_subsystem_poll_group {
+	/* Array of channels for each namespace indexed by nsid - 1 */
+	struct spdk_io_channel	**channels;
+	uint32_t		num_channels;
+
+	enum spdk_nvmf_subsystem_state state;
+
+	TAILQ_HEAD(, spdk_nvmf_request)	queued;
+};
+
+struct spdk_nvmf_poll_group {
+	struct spdk_thread				*thread;
+	struct spdk_poller				*poller;
+
+	TAILQ_HEAD(, spdk_nvmf_transport_poll_group)	tgroups;
+
+	/* Array of poll groups indexed by subsystem id (sid) */
+	struct spdk_nvmf_subsystem_poll_group		*sgroups;
+	uint32_t					num_sgroups;
+
+	/* All of the queue pairs that belong to this poll group */
+	TAILQ_HEAD(, spdk_nvmf_qpair)			qpairs;
+};
+
+typedef enum _spdk_nvmf_request_exec_status {
+	SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE,
+	SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS,
+} spdk_nvmf_request_exec_status;
+
+union nvmf_h2c_msg {
+	struct spdk_nvmf_capsule_cmd			nvmf_cmd;
+	struct spdk_nvme_cmd				nvme_cmd;
+	struct spdk_nvmf_fabric_prop_set_cmd		prop_set_cmd;
+	struct spdk_nvmf_fabric_prop_get_cmd		prop_get_cmd;
+	struct spdk_nvmf_fabric_connect_cmd		connect_cmd;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size");
+
+union nvmf_c2h_msg {
+	struct spdk_nvme_cpl				nvme_cpl;
+	struct spdk_nvmf_fabric_prop_get_rsp		prop_get_rsp;
+	struct spdk_nvmf_fabric_connect_rsp		connect_rsp;
+};
+SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size");
+
+struct spdk_nvmf_request {
+	struct spdk_nvmf_qpair		*qpair;
+	uint32_t			length;
+	enum spdk_nvme_data_transfer	xfer;
+	void				*data;
+	union nvmf_h2c_msg		*cmd;
+	union nvmf_c2h_msg		*rsp;
+	struct iovec			iov[SPDK_NVMF_MAX_SGL_ENTRIES];
+	uint32_t			iovcnt;
+	struct spdk_bdev_io_wait_entry	bdev_io_wait;
+
+	TAILQ_ENTRY(spdk_nvmf_request)	link;
+};
+
+struct spdk_nvmf_ns {
+	struct spdk_nvmf_subsystem *subsystem;
+	struct spdk_bdev *bdev;
+	struct spdk_bdev_desc *desc;
+	struct spdk_nvmf_ns_opts opts;
+};
+
+struct spdk_nvmf_qpair {
+	enum spdk_nvmf_qpair_state		state;
+	spdk_nvmf_state_change_done		state_cb;
+	void					*state_cb_arg;
+
+	struct spdk_nvmf_transport		*transport;
+	struct spdk_nvmf_ctrlr			*ctrlr;
+	struct spdk_nvmf_poll_group		*group;
+
+	uint16_t				qid;
+	uint16_t				sq_head;
+	uint16_t				sq_head_max;
+
+	TAILQ_HEAD(, spdk_nvmf_request)		outstanding;
+	TAILQ_ENTRY(spdk_nvmf_qpair)		link;
+};
+
+struct spdk_nvmf_ctrlr_feat {
+	union spdk_nvme_feat_arbitration arbitration;
+	union spdk_nvme_feat_power_management power_management;
+	union spdk_nvme_feat_error_recovery error_recovery;
+	union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
+	union spdk_nvme_feat_number_of_queues number_of_queues;
+	union spdk_nvme_feat_write_atomicity write_atomicity;
+	union spdk_nvme_feat_async_event_configuration async_event_configuration;
+	union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
+};
+
+/*
+ * This structure represents an NVMe-oF controller,
+ * which is like a "session" in networking terms.
+ */
+struct spdk_nvmf_ctrlr {
+	uint16_t			cntlid;
+	struct spdk_nvmf_subsystem	*subsys;
+
+	struct {
+		union spdk_nvme_cap_register	cap;
+		union spdk_nvme_vs_register	vs;
+		union spdk_nvme_cc_register	cc;
+		union spdk_nvme_csts_register	csts;
+	} vcprop; /* virtual controller properties */
+
+	struct spdk_nvmf_ctrlr_feat feat;
+
+	struct spdk_nvmf_qpair	*admin_qpair;
+	struct spdk_thread	*thread;
+	struct spdk_bit_array	*qpair_mask;
+
+	struct spdk_nvmf_request *aer_req;
+	union spdk_nvme_async_event_completion notice_event;
+	uint8_t hostid[16];
+
+	uint16_t changed_ns_list_count;
+	struct spdk_nvme_ns_list changed_ns_list;
+
+	TAILQ_ENTRY(spdk_nvmf_ctrlr)		link;
+};
+
+struct spdk_nvmf_subsystem {
+	struct spdk_thread		*thread;
+	uint32_t			id;
+	enum spdk_nvmf_subsystem_state	state;
+
+	char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
+	enum spdk_nvmf_subtype subtype;
+	uint16_t next_cntlid;
+	bool allow_any_host;
+
+	struct spdk_nvmf_tgt			*tgt;
+
+	char sn[SPDK_NVME_CTRLR_SN_LEN + 1];
+
+	/* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
+	struct spdk_nvmf_ns			**ns;
+	uint32_t				max_nsid;
+	/* This is the maximum allowed nsid to a subsystem */
+	uint32_t				max_allowed_nsid;
+
+	TAILQ_HEAD(, spdk_nvmf_ctrlr)		ctrlrs;
+
+	TAILQ_HEAD(, spdk_nvmf_host)		hosts;
+
+	TAILQ_HEAD(, spdk_nvmf_listener)	listeners;
+
+	TAILQ_ENTRY(spdk_nvmf_subsystem)	entries;
+};
+
+typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
+
+struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
+		enum spdk_nvme_transport_type);
+
+int spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
+				       struct spdk_nvmf_transport *transport);
+int spdk_nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
+		struct spdk_nvmf_subsystem *subsystem);
+int spdk_nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
+				       struct spdk_nvmf_subsystem *subsystem,
+				       spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
+		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
+		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
+		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
+void spdk_nvmf_request_exec(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_free(struct spdk_nvmf_request *req);
+int spdk_nvmf_request_complete(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt,
+				      void *buffer, uint64_t offset,
+				      uint32_t length);
+
+void spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
+int spdk_nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req);
+int spdk_nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
+int spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
+bool spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
+bool spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
+void spdk_nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
+
+void spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata);
+
+int spdk_nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+				  struct spdk_nvmf_ctrlr *ctrlr);
+void spdk_nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+				      struct spdk_nvmf_ctrlr *ctrlr);
+struct spdk_nvmf_ctrlr *spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+		uint16_t cntlid);
+int spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
+
+/*
+ * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
+ * This function should be called when attempting to recover in error paths when it is OK for
+ * the host to send a subsequent AER.
+ */
+void spdk_nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
+
+/*
+ * Free aer simply frees the rdma resources for the aer without informing the host.
+ * This function should be called when deleting a qpair when one wants to make sure
+ * the qpair is completely empty before freeing the request. The reason we free the
+ * AER without sending a completion is to prevent the host from sending another AER.
+ */
+void spdk_nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
+
+static inline struct spdk_nvmf_ns *
+_spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
+	if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
+		return NULL;
+	}
+
+	return subsystem->ns[nsid - 1];
+}
+
+static inline bool
+spdk_nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
+{
+	return qpair->qid == 0;
+}
+
+#endif /* __NVMF_INTERNAL_H__ */
diff --git a/src/spdk/lib/nvmf/rdma.c b/src/spdk/lib/nvmf/rdma.c
new file mode 100644
index 00000000..333e703f
--- /dev/null
+++ b/src/spdk/lib/nvmf/rdma.c
@@ -0,0 +1,2930 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/config.h"
+#include "spdk/assert.h"
+#include "spdk/thread.h"
+#include "spdk/nvmf.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+
+/*
+ RDMA Connection Resource Defaults
+ */
+#define NVMF_DEFAULT_TX_SGE		1
+#define NVMF_DEFAULT_RX_SGE		2
+#define NVMF_DEFAULT_DATA_SGE		16
+
+/* The RDMA completion queue size */
+#define NVMF_RDMA_CQ_SIZE	4096
+
+/* AIO backend requires block size aligned data buffers,
+ * extra 4KiB aligned data buffer should work for most devices.
+ */
+#define SHIFT_4KB			12
+#define NVMF_DATA_BUFFER_ALIGNMENT	(1 << SHIFT_4KB)
+#define NVMF_DATA_BUFFER_MASK		(NVMF_DATA_BUFFER_ALIGNMENT - 1)
+
+enum spdk_nvmf_rdma_request_state {
+	/* The request is not currently in use */
+	RDMA_REQUEST_STATE_FREE = 0,
+
+	/* Initial state when request first received */
+	RDMA_REQUEST_STATE_NEW,
+
+	/* The request is queued until a data buffer is available. */
+	RDMA_REQUEST_STATE_NEED_BUFFER,
+
+	/* The request is waiting on RDMA queue depth availability
+	 * to transfer data between the host and the controller.
+	 */
+	RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING,
+
+	/* The request is currently transferring data from the host to the controller. */
+	RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+
+	/* The request is ready to execute at the block device */
+	RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+
+	/* The request is currently executing at the block device */
+	RDMA_REQUEST_STATE_EXECUTING,
+
+	/* The request finished executing at the block device */
+	RDMA_REQUEST_STATE_EXECUTED,
+
+	/* The request is ready to send a completion */
+	RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+
+	/* The request is currently transferring data from the controller to the host. */
+	RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+
+	/* The request currently has an outstanding completion without an
+	 * associated data transfer.
+	 */
+	RDMA_REQUEST_STATE_COMPLETING,
+
+	/* The request completed and can be marked free. */
+	RDMA_REQUEST_STATE_COMPLETED,
+
+	/* Terminator */
+	RDMA_REQUEST_NUM_STATES,
+};
+
+#define OBJECT_NVMF_RDMA_IO				0x40
+
+#define									TRACE_GROUP_NVMF_RDMA 0x4
+#define TRACE_RDMA_REQUEST_STATE_NEW					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x0)
+#define TRACE_RDMA_REQUEST_STATE_NEED_BUFFER				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x1)
+#define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x2)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER	SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x3)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x4)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x5)
+#define TRACE_RDMA_REQUEST_STATE_EXECUTED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x6)
+#define TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE			SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x7)
+#define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST	SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x8)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETING				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x9)
+#define TRACE_RDMA_REQUEST_STATE_COMPLETED				SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xA)
+#define TRACE_RDMA_QP_CREATE						SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xB)
+#define TRACE_RDMA_IBV_ASYNC_EVENT					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xC)
+#define TRACE_RDMA_CM_ASYNC_EVENT					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xD)
+#define TRACE_RDMA_QP_STATE_CHANGE					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xE)
+#define TRACE_RDMA_QP_DISCONNECT					SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xF)
+#define TRACE_RDMA_QP_DESTROY						SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x10)
+
+SPDK_TRACE_REGISTER_FN(nvmf_trace)
+{
+	spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r');
+	spdk_trace_register_description("RDMA_REQ_NEW", "",
+					TRACE_RDMA_REQUEST_STATE_NEW,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", "",
+					TRACE_RDMA_REQUEST_STATE_NEED_BUFFER,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_TX_PENDING_H_TO_C", "",
+					TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_TX_H_TO_C", "",
+					TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE", "",
+					TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_EXECUTING", "",
+					TRACE_RDMA_REQUEST_STATE_EXECUTING,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_EXECUTED", "",
+					TRACE_RDMA_REQUEST_STATE_EXECUTED,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPLETE", "",
+					TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_COMPLETING_CONTROLLER_TO_HOST", "",
+					TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_COMPLETING_INCAPSULE", "",
+					TRACE_RDMA_REQUEST_STATE_COMPLETING,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+	spdk_trace_register_description("RDMA_REQ_COMPLETED", "",
+					TRACE_RDMA_REQUEST_STATE_COMPLETED,
+					OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid:   ");
+
+	spdk_trace_register_description("RDMA_QP_CREATE", "", TRACE_RDMA_QP_CREATE,
+					OWNER_NONE, OBJECT_NONE, 0, 0, "");
+	spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", "", TRACE_RDMA_IBV_ASYNC_EVENT,
+					OWNER_NONE, OBJECT_NONE, 0, 0, "type:   ");
+	spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", "", TRACE_RDMA_CM_ASYNC_EVENT,
+					OWNER_NONE, OBJECT_NONE, 0, 0, "type:   ");
+	spdk_trace_register_description("RDMA_QP_STATE_CHANGE", "", TRACE_RDMA_QP_STATE_CHANGE,
+					OWNER_NONE, OBJECT_NONE, 0, 1, "state:  ");
+	spdk_trace_register_description("RDMA_QP_DISCONNECT", "", TRACE_RDMA_QP_DISCONNECT,
+					OWNER_NONE, OBJECT_NONE, 0, 0, "");
+	spdk_trace_register_description("RDMA_QP_DESTROY", "", TRACE_RDMA_QP_DESTROY,
+					OWNER_NONE, OBJECT_NONE, 0, 0, "");
+}
+
+/* This structure holds commands as they are received off the wire.
+ * It must be dynamically paired with a full request object
+ * (spdk_nvmf_rdma_request) to service a request. It is separate
+ * from the request because RDMA does not appear to order
+ * completions, so occasionally we'll get a new incoming
+ * command when there aren't any free request objects.
+ */
+struct spdk_nvmf_rdma_recv {
+	struct ibv_recv_wr		wr;
+	struct ibv_sge			sgl[NVMF_DEFAULT_RX_SGE];
+
+	struct spdk_nvmf_rdma_qpair	*qpair;
+
+	/* In-capsule data buffer */
+	uint8_t				*buf;
+
+	TAILQ_ENTRY(spdk_nvmf_rdma_recv) link;
+};
+
+struct spdk_nvmf_rdma_request {
+	struct spdk_nvmf_request		req;
+	bool					data_from_pool;
+
+	enum spdk_nvmf_rdma_request_state	state;
+
+	struct spdk_nvmf_rdma_recv		*recv;
+
+	struct {
+		struct	ibv_send_wr		wr;
+		struct	ibv_sge			sgl[NVMF_DEFAULT_TX_SGE];
+	} rsp;
+
+	struct {
+		struct ibv_send_wr		wr;
+		struct ibv_sge			sgl[SPDK_NVMF_MAX_SGL_ENTRIES];
+		void				*buffers[SPDK_NVMF_MAX_SGL_ENTRIES];
+	} data;
+
+	TAILQ_ENTRY(spdk_nvmf_rdma_request)	link;
+	TAILQ_ENTRY(spdk_nvmf_rdma_request)	state_link;
+};
+
+struct spdk_nvmf_rdma_qpair {
+	struct spdk_nvmf_qpair			qpair;
+
+	struct spdk_nvmf_rdma_port		*port;
+	struct spdk_nvmf_rdma_poller		*poller;
+
+	struct rdma_cm_id			*cm_id;
+	struct rdma_cm_id			*listen_id;
+
+	/* The maximum number of I/O outstanding on this connection at one time */
+	uint16_t				max_queue_depth;
+
+	/* The maximum number of active RDMA READ and WRITE operations at one time */
+	uint16_t				max_rw_depth;
+
+	/* Receives that are waiting for a request object */
+	TAILQ_HEAD(, spdk_nvmf_rdma_recv)	incoming_queue;
+
+	/* Queues to track the requests in all states */
+	TAILQ_HEAD(, spdk_nvmf_rdma_request)	state_queue[RDMA_REQUEST_NUM_STATES];
+
+	/* Number of requests in each state */
+	uint32_t				state_cntr[RDMA_REQUEST_NUM_STATES];
+
+	int                                     max_sge;
+
+	/* Array of size "max_queue_depth" containing RDMA requests. */
+	struct spdk_nvmf_rdma_request		*reqs;
+
+	/* Array of size "max_queue_depth" containing RDMA recvs. */
+	struct spdk_nvmf_rdma_recv		*recvs;
+
+	/* Array of size "max_queue_depth" containing 64 byte capsules
+	 * used for receive.
+	 */
+	union nvmf_h2c_msg			*cmds;
+	struct ibv_mr				*cmds_mr;
+
+	/* Array of size "max_queue_depth" containing 16 byte completions
+	 * to be sent back to the user.
+	 */
+	union nvmf_c2h_msg			*cpls;
+	struct ibv_mr				*cpls_mr;
+
+	/* Array of size "max_queue_depth * InCapsuleDataSize" containing
+	 * buffers to be used for in capsule data.
+	 */
+	void					*bufs;
+	struct ibv_mr				*bufs_mr;
+
+	TAILQ_ENTRY(spdk_nvmf_rdma_qpair)	link;
+
+	/* Mgmt channel */
+	struct spdk_io_channel			*mgmt_channel;
+	struct spdk_nvmf_rdma_mgmt_channel	*ch;
+
+	/* IBV queue pair attributes: they are used to manage
+	 * qp state and recover from errors.
+	 */
+	struct ibv_qp_init_attr			ibv_init_attr;
+	struct ibv_qp_attr			ibv_attr;
+
+	bool					qpair_disconnected;
+
+	/* Reference counter for how many unprocessed messages
+	 * from other threads are currently outstanding. The
+	 * qpair cannot be destroyed until this is 0. This is
+	 * atomically incremented from any thread, but only
+	 * decremented and read from the thread that owns this
+	 * qpair.
+	 */
+	uint32_t				refcnt;
+};
+
+struct spdk_nvmf_rdma_poller {
+	struct spdk_nvmf_rdma_device		*device;
+	struct spdk_nvmf_rdma_poll_group	*group;
+
+	struct ibv_cq				*cq;
+
+	TAILQ_HEAD(, spdk_nvmf_rdma_qpair)	qpairs;
+
+	TAILQ_ENTRY(spdk_nvmf_rdma_poller)	link;
+};
+
+struct spdk_nvmf_rdma_poll_group {
+	struct spdk_nvmf_transport_poll_group	group;
+
+	TAILQ_HEAD(, spdk_nvmf_rdma_poller)	pollers;
+};
+
+/* Assuming rdma_cm uses just one protection domain per ibv_context. */
+struct spdk_nvmf_rdma_device {
+	struct ibv_device_attr			attr;
+	struct ibv_context			*context;
+
+	struct spdk_mem_map			*map;
+	struct ibv_pd				*pd;
+
+	TAILQ_ENTRY(spdk_nvmf_rdma_device)	link;
+};
+
+struct spdk_nvmf_rdma_port {
+	struct spdk_nvme_transport_id		trid;
+	struct rdma_cm_id			*id;
+	struct spdk_nvmf_rdma_device		*device;
+	uint32_t				ref;
+	TAILQ_ENTRY(spdk_nvmf_rdma_port)	link;
+};
+
+struct spdk_nvmf_rdma_transport {
+	struct spdk_nvmf_transport	transport;
+
+	struct rdma_event_channel	*event_channel;
+
+	struct spdk_mempool		*data_buf_pool;
+
+	pthread_mutex_t			lock;
+
+	/* fields used to poll RDMA/IB events */
+	nfds_t			npoll_fds;
+	struct pollfd		*poll_fds;
+
+	TAILQ_HEAD(, spdk_nvmf_rdma_device)	devices;
+	TAILQ_HEAD(, spdk_nvmf_rdma_port)	ports;
+};
+
+struct spdk_nvmf_rdma_mgmt_channel {
+	/* Requests that are waiting to obtain a data buffer */
+	TAILQ_HEAD(, spdk_nvmf_rdma_request)	pending_data_buf_queue;
+};
+
+static inline void
+spdk_nvmf_rdma_qpair_inc_refcnt(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	__sync_fetch_and_add(&rqpair->refcnt, 1);
+}
+
+static inline uint32_t
+spdk_nvmf_rdma_qpair_dec_refcnt(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	uint32_t old_refcnt, new_refcnt;
+
+	do {
+		old_refcnt = rqpair->refcnt;
+		assert(old_refcnt > 0);
+		new_refcnt = old_refcnt - 1;
+	} while (__sync_bool_compare_and_swap(&rqpair->refcnt, old_refcnt, new_refcnt) == false);
+
+	return new_refcnt;
+}
+
+/* API to IBV QueuePair */
+static const char *str_ibv_qp_state[] = {
+	"IBV_QPS_RESET",
+	"IBV_QPS_INIT",
+	"IBV_QPS_RTR",
+	"IBV_QPS_RTS",
+	"IBV_QPS_SQD",
+	"IBV_QPS_SQE",
+	"IBV_QPS_ERR"
+};
+
+static enum ibv_qp_state
+spdk_nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
+	enum ibv_qp_state old_state, new_state;
+	int rc;
+
+	/* All the attributes needed for recovery */
+	static int spdk_nvmf_ibv_attr_mask =
+	IBV_QP_STATE |
+	IBV_QP_PKEY_INDEX |
+	IBV_QP_PORT |
+	IBV_QP_ACCESS_FLAGS |
+	IBV_QP_AV |
+	IBV_QP_PATH_MTU |
+	IBV_QP_DEST_QPN |
+	IBV_QP_RQ_PSN |
+	IBV_QP_MAX_DEST_RD_ATOMIC |
+	IBV_QP_MIN_RNR_TIMER |
+	IBV_QP_SQ_PSN |
+	IBV_QP_TIMEOUT |
+	IBV_QP_RETRY_CNT |
+	IBV_QP_RNR_RETRY |
+	IBV_QP_MAX_QP_RD_ATOMIC;
+
+	old_state = rqpair->ibv_attr.qp_state;
+	rc = ibv_query_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
+			  spdk_nvmf_ibv_attr_mask, &rqpair->ibv_init_attr);
+
+	if (rc)
+	{
+		SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n");
+		assert(false);
+	}
+
+	new_state = rqpair->ibv_attr.qp_state;
+	if (old_state != new_state)
+	{
+		spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0,
+				  (uintptr_t)rqpair->cm_id, new_state);
+	}
+	return new_state;
+}
+
+static int
+spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
+			     enum ibv_qp_state new_state)
+{
+	int rc;
+	enum ibv_qp_state state;
+	static int attr_mask_rc[] = {
+		[IBV_QPS_RESET] = IBV_QP_STATE,
+		[IBV_QPS_INIT] = (IBV_QP_STATE |
+				  IBV_QP_PKEY_INDEX |
+				  IBV_QP_PORT |
+				  IBV_QP_ACCESS_FLAGS),
+		[IBV_QPS_RTR] = (IBV_QP_STATE |
+				 IBV_QP_AV |
+				 IBV_QP_PATH_MTU |
+				 IBV_QP_DEST_QPN |
+				 IBV_QP_RQ_PSN |
+				 IBV_QP_MAX_DEST_RD_ATOMIC |
+				 IBV_QP_MIN_RNR_TIMER),
+		[IBV_QPS_RTS] = (IBV_QP_STATE |
+				 IBV_QP_SQ_PSN |
+				 IBV_QP_TIMEOUT |
+				 IBV_QP_RETRY_CNT |
+				 IBV_QP_RNR_RETRY |
+				 IBV_QP_MAX_QP_RD_ATOMIC),
+		[IBV_QPS_SQD] = IBV_QP_STATE,
+		[IBV_QPS_SQE] = IBV_QP_STATE,
+		[IBV_QPS_ERR] = IBV_QP_STATE,
+	};
+
+	switch (new_state) {
+	case IBV_QPS_RESET:
+	case IBV_QPS_INIT:
+	case IBV_QPS_RTR:
+	case IBV_QPS_RTS:
+	case IBV_QPS_SQD:
+	case IBV_QPS_SQE:
+	case IBV_QPS_ERR:
+		break;
+	default:
+		SPDK_ERRLOG("QP#%d: bad state requested: %u\n",
+			    rqpair->qpair.qid, new_state);
+		return -1;
+	}
+	rqpair->ibv_attr.cur_qp_state = rqpair->ibv_attr.qp_state;
+	rqpair->ibv_attr.qp_state = new_state;
+	rqpair->ibv_attr.ah_attr.port_num = rqpair->ibv_attr.port_num;
+
+	rc = ibv_modify_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
+			   attr_mask_rc[new_state]);
+
+	if (rc) {
+		SPDK_ERRLOG("QP#%d: failed to set state to: %s, %d (%s)\n",
+			    rqpair->qpair.qid, str_ibv_qp_state[new_state], errno, strerror(errno));
+		return rc;
+	}
+
+	state = spdk_nvmf_rdma_update_ibv_state(rqpair);
+
+	if (state != new_state) {
+		SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
+			    rqpair->qpair.qid, str_ibv_qp_state[new_state],
+			    str_ibv_qp_state[state]);
+		return -1;
+	}
+	SPDK_NOTICELOG("IBV QP#%u changed to: %s\n", rqpair->qpair.qid,
+		       str_ibv_qp_state[state]);
+	return 0;
+}
+
+static void
+spdk_nvmf_rdma_request_set_state(struct spdk_nvmf_rdma_request *rdma_req,
+				 enum spdk_nvmf_rdma_request_state state)
+{
+	struct spdk_nvmf_qpair		*qpair;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+
+	qpair = rdma_req->req.qpair;
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	TAILQ_REMOVE(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+	rqpair->state_cntr[rdma_req->state]--;
+
+	rdma_req->state = state;
+
+	TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+	rqpair->state_cntr[rdma_req->state]++;
+}
+
+static int
+spdk_nvmf_rdma_mgmt_channel_create(void *io_device, void *ctx_buf)
+{
+	struct spdk_nvmf_rdma_mgmt_channel *ch = ctx_buf;
+
+	TAILQ_INIT(&ch->pending_data_buf_queue);
+	return 0;
+}
+
+static void
+spdk_nvmf_rdma_mgmt_channel_destroy(void *io_device, void *ctx_buf)
+{
+	struct spdk_nvmf_rdma_mgmt_channel *ch = ctx_buf;
+
+	if (!TAILQ_EMPTY(&ch->pending_data_buf_queue)) {
+		SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n");
+	}
+}
+
+static int
+spdk_nvmf_rdma_cur_rw_depth(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	return rqpair->state_cntr[RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER] +
+	       rqpair->state_cntr[RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST];
+}
+
+static int
+spdk_nvmf_rdma_cur_queue_depth(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	return rqpair->max_queue_depth -
+	       rqpair->state_cntr[RDMA_REQUEST_STATE_FREE];
+}
+
+static void
+spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+	if (spdk_nvmf_rdma_cur_queue_depth(rqpair)) {
+		rqpair->qpair_disconnected = true;
+		return;
+	}
+
+	if (rqpair->refcnt > 0) {
+		return;
+	}
+
+	if (rqpair->poller) {
+		TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
+	}
+
+	if (rqpair->cmds_mr) {
+		ibv_dereg_mr(rqpair->cmds_mr);
+	}
+
+	if (rqpair->cpls_mr) {
+		ibv_dereg_mr(rqpair->cpls_mr);
+	}
+
+	if (rqpair->bufs_mr) {
+		ibv_dereg_mr(rqpair->bufs_mr);
+	}
+
+	if (rqpair->cm_id) {
+		rdma_destroy_qp(rqpair->cm_id);
+		rdma_destroy_id(rqpair->cm_id);
+	}
+
+	if (rqpair->mgmt_channel) {
+		spdk_put_io_channel(rqpair->mgmt_channel);
+	}
+
+	/* Free all memory */
+	spdk_dma_free(rqpair->cmds);
+	spdk_dma_free(rqpair->cpls);
+	spdk_dma_free(rqpair->bufs);
+	free(rqpair->reqs);
+	free(rqpair->recvs);
+	free(rqpair);
+}
+
+static int
+spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	int				rc, i;
+	struct spdk_nvmf_rdma_recv	*rdma_recv;
+	struct spdk_nvmf_rdma_request	*rdma_req;
+	struct spdk_nvmf_transport      *transport;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+	rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+	transport = &rtransport->transport;
+
+	memset(&rqpair->ibv_init_attr, 0, sizeof(struct ibv_qp_init_attr));
+	rqpair->ibv_init_attr.qp_context	= rqpair;
+	rqpair->ibv_init_attr.qp_type		= IBV_QPT_RC;
+	rqpair->ibv_init_attr.send_cq		= rqpair->poller->cq;
+	rqpair->ibv_init_attr.recv_cq		= rqpair->poller->cq;
+	rqpair->ibv_init_attr.cap.max_send_wr	= rqpair->max_queue_depth *
+			2; /* SEND, READ, and WRITE operations */
+	rqpair->ibv_init_attr.cap.max_recv_wr	= rqpair->max_queue_depth; /* RECV operations */
+	rqpair->ibv_init_attr.cap.max_send_sge	= rqpair->max_sge;
+	rqpair->ibv_init_attr.cap.max_recv_sge	= NVMF_DEFAULT_RX_SGE;
+
+	rc = rdma_create_qp(rqpair->cm_id, rqpair->port->device->pd, &rqpair->ibv_init_attr);
+	if (rc) {
+		SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
+		rdma_destroy_id(rqpair->cm_id);
+		rqpair->cm_id = NULL;
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return -1;
+	}
+
+	spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
+
+	rqpair->reqs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->reqs));
+	rqpair->recvs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->recvs));
+	rqpair->cmds = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cmds),
+					0x1000, NULL);
+	rqpair->cpls = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cpls),
+					0x1000, NULL);
+
+
+	if (transport->opts.in_capsule_data_size > 0) {
+		rqpair->bufs = spdk_dma_zmalloc(rqpair->max_queue_depth *
+						transport->opts.in_capsule_data_size,
+						0x1000, NULL);
+	}
+
+	if (!rqpair->reqs || !rqpair->recvs || !rqpair->cmds ||
+	    !rqpair->cpls || (transport->opts.in_capsule_data_size && !rqpair->bufs)) {
+		SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n");
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return -1;
+	}
+
+	rqpair->cmds_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cmds,
+				     rqpair->max_queue_depth * sizeof(*rqpair->cmds),
+				     IBV_ACCESS_LOCAL_WRITE);
+	rqpair->cpls_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cpls,
+				     rqpair->max_queue_depth * sizeof(*rqpair->cpls),
+				     0);
+
+	if (transport->opts.in_capsule_data_size) {
+		rqpair->bufs_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->bufs,
+					     rqpair->max_queue_depth *
+					     transport->opts.in_capsule_data_size,
+					     IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+	}
+
+	if (!rqpair->cmds_mr || !rqpair->cpls_mr || (transport->opts.in_capsule_data_size &&
+			!rqpair->bufs_mr)) {
+		SPDK_ERRLOG("Unable to register required memory for RDMA queue.\n");
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return -1;
+	}
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n",
+		      rqpair->cmds, rqpair->max_queue_depth * sizeof(*rqpair->cmds), rqpair->cmds_mr->lkey);
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n",
+		      rqpair->cpls, rqpair->max_queue_depth * sizeof(*rqpair->cpls), rqpair->cpls_mr->lkey);
+	if (rqpair->bufs && rqpair->bufs_mr) {
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n",
+			      rqpair->bufs, rqpair->max_queue_depth *
+			      transport->opts.in_capsule_data_size, rqpair->bufs_mr->lkey);
+	}
+
+	/* Initialise request state queues and counters of the queue pair */
+	for (i = RDMA_REQUEST_STATE_FREE; i < RDMA_REQUEST_NUM_STATES; i++) {
+		TAILQ_INIT(&rqpair->state_queue[i]);
+		rqpair->state_cntr[i] = 0;
+	}
+
+	for (i = 0; i < rqpair->max_queue_depth; i++) {
+		struct ibv_recv_wr *bad_wr = NULL;
+
+		rdma_recv = &rqpair->recvs[i];
+		rdma_recv->qpair = rqpair;
+
+		/* Set up memory to receive commands */
+		if (rqpair->bufs) {
+			rdma_recv->buf = (void *)((uintptr_t)rqpair->bufs + (i *
+						  transport->opts.in_capsule_data_size));
+		}
+
+		rdma_recv->sgl[0].addr = (uintptr_t)&rqpair->cmds[i];
+		rdma_recv->sgl[0].length = sizeof(rqpair->cmds[i]);
+		rdma_recv->sgl[0].lkey = rqpair->cmds_mr->lkey;
+		rdma_recv->wr.num_sge = 1;
+
+		if (rdma_recv->buf && rqpair->bufs_mr) {
+			rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf;
+			rdma_recv->sgl[1].length = transport->opts.in_capsule_data_size;
+			rdma_recv->sgl[1].lkey = rqpair->bufs_mr->lkey;
+			rdma_recv->wr.num_sge++;
+		}
+
+		rdma_recv->wr.wr_id = (uintptr_t)rdma_recv;
+		rdma_recv->wr.sg_list = rdma_recv->sgl;
+
+		rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_recv->wr, &bad_wr);
+		if (rc) {
+			SPDK_ERRLOG("Unable to post capsule for RDMA RECV\n");
+			spdk_nvmf_rdma_qpair_destroy(rqpair);
+			return -1;
+		}
+	}
+
+	for (i = 0; i < rqpair->max_queue_depth; i++) {
+		rdma_req = &rqpair->reqs[i];
+
+		rdma_req->req.qpair = &rqpair->qpair;
+		rdma_req->req.cmd = NULL;
+
+		/* Set up memory to send responses */
+		rdma_req->req.rsp = &rqpair->cpls[i];
+
+		rdma_req->rsp.sgl[0].addr = (uintptr_t)&rqpair->cpls[i];
+		rdma_req->rsp.sgl[0].length = sizeof(rqpair->cpls[i]);
+		rdma_req->rsp.sgl[0].lkey = rqpair->cpls_mr->lkey;
+
+		rdma_req->rsp.wr.wr_id = (uintptr_t)rdma_req;
+		rdma_req->rsp.wr.next = NULL;
+		rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+		rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED;
+		rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl;
+		rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl);
+
+		/* Set up memory for data buffers */
+		rdma_req->data.wr.wr_id = (uint64_t)rdma_req;
+		rdma_req->data.wr.next = NULL;
+		rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED;
+		rdma_req->data.wr.sg_list = rdma_req->data.sgl;
+		rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl);
+
+		/* Initialize request state to FREE */
+		rdma_req->state = RDMA_REQUEST_STATE_FREE;
+		TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
+		rqpair->state_cntr[rdma_req->state]++;
+	}
+
+	return 0;
+}
+
+static int
+request_transfer_in(struct spdk_nvmf_request *req)
+{
+	int				rc;
+	struct spdk_nvmf_rdma_request	*rdma_req;
+	struct spdk_nvmf_qpair		*qpair;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	struct ibv_send_wr		*bad_wr = NULL;
+
+	qpair = req->qpair;
+	rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
+
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA READ POSTED. Request: %p Connection: %p\n", req, qpair);
+
+	rdma_req->data.wr.opcode = IBV_WR_RDMA_READ;
+	rdma_req->data.wr.next = NULL;
+	rc = ibv_post_send(rqpair->cm_id->qp, &rdma_req->data.wr, &bad_wr);
+	if (rc) {
+		SPDK_ERRLOG("Unable to transfer data from host to target\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
+{
+	int				rc;
+	struct spdk_nvmf_rdma_request	*rdma_req;
+	struct spdk_nvmf_qpair		*qpair;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	struct spdk_nvme_cpl		*rsp;
+	struct ibv_recv_wr		*bad_recv_wr = NULL;
+	struct ibv_send_wr		*send_wr, *bad_send_wr = NULL;
+
+	*data_posted = 0;
+	qpair = req->qpair;
+	rsp = &req->rsp->nvme_cpl;
+	rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	/* Advance our sq_head pointer */
+	if (qpair->sq_head == qpair->sq_head_max) {
+		qpair->sq_head = 0;
+	} else {
+		qpair->sq_head++;
+	}
+	rsp->sqhd = qpair->sq_head;
+
+	/* Post the capsule to the recv buffer */
+	assert(rdma_req->recv != NULL);
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv,
+		      rqpair);
+	rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
+	if (rc) {
+		SPDK_ERRLOG("Unable to re-post rx descriptor\n");
+		return rc;
+	}
+	rdma_req->recv = NULL;
+
+	/* Build the response which consists of an optional
+	 * RDMA WRITE to transfer data, plus an RDMA SEND
+	 * containing the response.
+	 */
+	send_wr = &rdma_req->rsp.wr;
+
+	if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
+	    req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA WRITE POSTED. Request: %p Connection: %p\n", req, qpair);
+
+		rdma_req->data.wr.opcode = IBV_WR_RDMA_WRITE;
+
+		rdma_req->data.wr.next = send_wr;
+		*data_posted = 1;
+		send_wr = &rdma_req->data.wr;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA SEND POSTED. Request: %p Connection: %p\n", req, qpair);
+
+	/* Send the completion */
+	rc = ibv_post_send(rqpair->cm_id->qp, send_wr, &bad_send_wr);
+	if (rc) {
+		SPDK_ERRLOG("Unable to send response capsule\n");
+	}
+
+	return rc;
+}
+
+static int
+spdk_nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	struct spdk_nvmf_rdma_accept_private_data	accept_data;
+	struct rdma_conn_param				ctrlr_event_data = {};
+	int						rc;
+
+	accept_data.recfmt = 0;
+	accept_data.crqsize = rqpair->max_queue_depth;
+
+	ctrlr_event_data.private_data = &accept_data;
+	ctrlr_event_data.private_data_len = sizeof(accept_data);
+	if (id->ps == RDMA_PS_TCP) {
+		ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */
+		ctrlr_event_data.initiator_depth = rqpair->max_rw_depth;
+	}
+
+	rc = rdma_accept(id, &ctrlr_event_data);
+	if (rc) {
+		SPDK_ERRLOG("Error %d on rdma_accept\n", errno);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Sent back the accept\n");
+	}
+
+	return rc;
+}
+
+static void
+spdk_nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error)
+{
+	struct spdk_nvmf_rdma_reject_private_data	rej_data;
+
+	rej_data.recfmt = 0;
+	rej_data.sts = error;
+
+	rdma_reject(id, &rej_data, sizeof(rej_data));
+}
+
+static int
+nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event,
+		  new_qpair_fn cb_fn)
+{
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_qpair	*rqpair = NULL;
+	struct spdk_nvmf_rdma_port	*port;
+	struct rdma_conn_param		*rdma_param = NULL;
+	const struct spdk_nvmf_rdma_request_private_data *private_data = NULL;
+	uint16_t			max_queue_depth;
+	uint16_t			max_rw_depth;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	assert(event->id != NULL); /* Impossible. Can't even reject the connection. */
+	assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */
+
+	rdma_param = &event->param.conn;
+	if (rdma_param->private_data == NULL ||
+	    rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+		SPDK_ERRLOG("connect request: no private data provided\n");
+		spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH);
+		return -1;
+	}
+
+	private_data = rdma_param->private_data;
+	if (private_data->recfmt != 0) {
+		SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n");
+		spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT);
+		return -1;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Connect Recv on fabric intf name %s, dev_name %s\n",
+		      event->id->verbs->device->name, event->id->verbs->device->dev_name);
+
+	port = event->listen_id->context;
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Listen Id was %p with verbs %p. ListenAddr: %p\n",
+		      event->listen_id, event->listen_id->verbs, port);
+
+	/* Figure out the supported queue depth. This is a multi-step process
+	 * that takes into account hardware maximums, host provided values,
+	 * and our target's internal memory limits */
+
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Calculating Queue Depth\n");
+
+	/* Start with the maximum queue depth allowed by the target */
+	max_queue_depth = rtransport->transport.opts.max_queue_depth;
+	max_rw_depth = rtransport->transport.opts.max_queue_depth;
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Target Max Queue Depth: %d\n",
+		      rtransport->transport.opts.max_queue_depth);
+
+	/* Next check the local NIC's hardware limitations */
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+		      "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n",
+		      port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom);
+	max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr);
+	max_rw_depth = spdk_min(max_rw_depth, port->device->attr.max_qp_rd_atom);
+
+	/* Next check the remote NIC's hardware limitations */
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA,
+		      "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n",
+		      rdma_param->initiator_depth, rdma_param->responder_resources);
+	if (rdma_param->initiator_depth > 0) {
+		max_rw_depth = spdk_min(max_rw_depth, rdma_param->initiator_depth);
+	}
+
+	/* Finally check for the host software requested values, which are
+	 * optional. */
+	if (rdma_param->private_data != NULL &&
+	    rdma_param->private_data_len >= sizeof(struct spdk_nvmf_rdma_request_private_data)) {
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Receive Queue Size: %d\n", private_data->hrqsize);
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Send Queue Size: %d\n", private_data->hsqsize);
+		max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize);
+		max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1);
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n",
+		      max_queue_depth, max_rw_depth);
+
+	rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair));
+	if (rqpair == NULL) {
+		SPDK_ERRLOG("Could not allocate new connection.\n");
+		spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+		return -1;
+	}
+
+	rqpair->port = port;
+	rqpair->max_queue_depth = max_queue_depth;
+	rqpair->max_rw_depth = max_rw_depth;
+	rqpair->cm_id = event->id;
+	rqpair->listen_id = event->listen_id;
+	rqpair->qpair.transport = transport;
+	rqpair->max_sge = spdk_min(port->device->attr.max_sge, SPDK_NVMF_MAX_SGL_ENTRIES);
+	TAILQ_INIT(&rqpair->incoming_queue);
+	event->id->context = &rqpair->qpair;
+
+	cb_fn(&rqpair->qpair);
+
+	return 0;
+}
+
+static void
+_nvmf_rdma_disconnect(void *ctx)
+{
+	struct spdk_nvmf_qpair *qpair = ctx;
+	struct spdk_nvmf_rdma_qpair *rqpair;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+	spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
+}
+
+static void
+_nvmf_rdma_disconnect_retry(void *ctx)
+{
+	struct spdk_nvmf_qpair *qpair = ctx;
+	struct spdk_nvmf_poll_group *group;
+
+	/* Read the group out of the qpair. This is normally set and accessed only from
+	 * the thread that created the group. Here, we're not on that thread necessarily.
+	 * The data member qpair->group begins it's life as NULL and then is assigned to
+	 * a pointer and never changes. So fortunately reading this and checking for
+	 * non-NULL is thread safe in the x86_64 memory model. */
+	group = qpair->group;
+
+	if (group == NULL) {
+		/* The qpair hasn't been assigned to a group yet, so we can't
+		 * process a disconnect. Send a message to ourself and try again. */
+		spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_disconnect_retry, qpair);
+		return;
+	}
+
+	spdk_thread_send_msg(group->thread, _nvmf_rdma_disconnect, qpair);
+}
+
+static int
+nvmf_rdma_disconnect(struct rdma_cm_event *evt)
+{
+	struct spdk_nvmf_qpair		*qpair;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+
+	if (evt->id == NULL) {
+		SPDK_ERRLOG("disconnect request: missing cm_id\n");
+		return -1;
+	}
+
+	qpair = evt->id->context;
+	if (qpair == NULL) {
+		SPDK_ERRLOG("disconnect request: no active connection\n");
+		return -1;
+	}
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair->cm_id, 0);
+
+	spdk_nvmf_rdma_update_ibv_state(rqpair);
+	spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+
+	_nvmf_rdma_disconnect_retry(qpair);
+
+	return 0;
+}
+
+#ifdef DEBUG
+static const char *CM_EVENT_STR[] = {
+	"RDMA_CM_EVENT_ADDR_RESOLVED",
+	"RDMA_CM_EVENT_ADDR_ERROR",
+	"RDMA_CM_EVENT_ROUTE_RESOLVED",
+	"RDMA_CM_EVENT_ROUTE_ERROR",
+	"RDMA_CM_EVENT_CONNECT_REQUEST",
+	"RDMA_CM_EVENT_CONNECT_RESPONSE",
+	"RDMA_CM_EVENT_CONNECT_ERROR",
+	"RDMA_CM_EVENT_UNREACHABLE",
+	"RDMA_CM_EVENT_REJECTED",
+	"RDMA_CM_EVENT_ESTABLISHED",
+	"RDMA_CM_EVENT_DISCONNECTED",
+	"RDMA_CM_EVENT_DEVICE_REMOVAL",
+	"RDMA_CM_EVENT_MULTICAST_JOIN",
+	"RDMA_CM_EVENT_MULTICAST_ERROR",
+	"RDMA_CM_EVENT_ADDR_CHANGE",
+	"RDMA_CM_EVENT_TIMEWAIT_EXIT"
+};
+#endif /* DEBUG */
+
+static void
+spdk_nvmf_process_cm_event(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct rdma_cm_event		*event;
+	int				rc;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	if (rtransport->event_channel == NULL) {
+		return;
+	}
+
+	while (1) {
+		rc = rdma_get_cm_event(rtransport->event_channel, &event);
+		if (rc == 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]);
+
+			spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event);
+
+			switch (event->event) {
+			case RDMA_CM_EVENT_ADDR_RESOLVED:
+			case RDMA_CM_EVENT_ADDR_ERROR:
+			case RDMA_CM_EVENT_ROUTE_RESOLVED:
+			case RDMA_CM_EVENT_ROUTE_ERROR:
+				/* No action required. The target never attempts to resolve routes. */
+				break;
+			case RDMA_CM_EVENT_CONNECT_REQUEST:
+				rc = nvmf_rdma_connect(transport, event, cb_fn);
+				if (rc < 0) {
+					SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc);
+					break;
+				}
+				break;
+			case RDMA_CM_EVENT_CONNECT_RESPONSE:
+				/* The target never initiates a new connection. So this will not occur. */
+				break;
+			case RDMA_CM_EVENT_CONNECT_ERROR:
+				/* Can this happen? The docs say it can, but not sure what causes it. */
+				break;
+			case RDMA_CM_EVENT_UNREACHABLE:
+			case RDMA_CM_EVENT_REJECTED:
+				/* These only occur on the client side. */
+				break;
+			case RDMA_CM_EVENT_ESTABLISHED:
+				/* TODO: Should we be waiting for this event anywhere? */
+				break;
+			case RDMA_CM_EVENT_DISCONNECTED:
+			case RDMA_CM_EVENT_DEVICE_REMOVAL:
+				rc = nvmf_rdma_disconnect(event);
+				if (rc < 0) {
+					SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
+					break;
+				}
+				break;
+			case RDMA_CM_EVENT_MULTICAST_JOIN:
+			case RDMA_CM_EVENT_MULTICAST_ERROR:
+				/* Multicast is not used */
+				break;
+			case RDMA_CM_EVENT_ADDR_CHANGE:
+				/* Not utilizing this event */
+				break;
+			case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+				/* For now, do nothing. The target never re-uses queue pairs. */
+				break;
+			default:
+				SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event);
+				break;
+			}
+
+			rdma_ack_cm_event(event);
+		} else {
+			if (errno != EAGAIN && errno != EWOULDBLOCK) {
+				SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno));
+			}
+			break;
+		}
+	}
+}
+
+static int
+spdk_nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
+			  enum spdk_mem_map_notify_action action,
+			  void *vaddr, size_t size)
+{
+	struct spdk_nvmf_rdma_device *device = cb_ctx;
+	struct ibv_pd *pd = device->pd;
+	struct ibv_mr *mr;
+
+	switch (action) {
+	case SPDK_MEM_MAP_NOTIFY_REGISTER:
+		mr = ibv_reg_mr(pd, vaddr, size,
+				IBV_ACCESS_LOCAL_WRITE |
+				IBV_ACCESS_REMOTE_READ |
+				IBV_ACCESS_REMOTE_WRITE);
+		if (mr == NULL) {
+			SPDK_ERRLOG("ibv_reg_mr() failed\n");
+			return -1;
+		} else {
+			spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
+		}
+		break;
+	case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
+		mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
+		spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
+		if (mr) {
+			ibv_dereg_mr(mr);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+typedef enum spdk_nvme_data_transfer spdk_nvme_data_transfer_t;
+
+static spdk_nvme_data_transfer_t
+spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
+{
+	enum spdk_nvme_data_transfer xfer;
+	struct spdk_nvme_cmd *cmd = &rdma_req->req.cmd->nvme_cmd;
+	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+	rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+	rdma_req->rsp.wr.imm_data = 0;
+#endif
+
+	/* Figure out data transfer direction */
+	if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
+		xfer = spdk_nvme_opc_get_data_transfer(rdma_req->req.cmd->nvmf_cmd.fctype);
+	} else {
+		xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
+
+		/* Some admin commands are special cases */
+		if ((rdma_req->req.qpair->qid == 0) &&
+		    ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) ||
+		     (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) {
+			switch (cmd->cdw10 & 0xff) {
+			case SPDK_NVME_FEAT_LBA_RANGE_TYPE:
+			case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
+			case SPDK_NVME_FEAT_HOST_IDENTIFIER:
+				break;
+			default:
+				xfer = SPDK_NVME_DATA_NONE;
+			}
+		}
+	}
+
+	if (xfer == SPDK_NVME_DATA_NONE) {
+		return xfer;
+	}
+
+	/* Even for commands that may transfer data, they could have specified 0 length.
+	 * We want those to show up with xfer SPDK_NVME_DATA_NONE.
+	 */
+	switch (sgl->generic.type) {
+	case SPDK_NVME_SGL_TYPE_DATA_BLOCK:
+	case SPDK_NVME_SGL_TYPE_BIT_BUCKET:
+	case SPDK_NVME_SGL_TYPE_SEGMENT:
+	case SPDK_NVME_SGL_TYPE_LAST_SEGMENT:
+	case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK:
+		if (sgl->unkeyed.length == 0) {
+			xfer = SPDK_NVME_DATA_NONE;
+		}
+		break;
+	case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK:
+		if (sgl->keyed.length == 0) {
+			xfer = SPDK_NVME_DATA_NONE;
+		}
+		break;
+	}
+
+	return xfer;
+}
+
+static int
+spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
+				 struct spdk_nvmf_rdma_device *device,
+				 struct spdk_nvmf_rdma_request *rdma_req)
+{
+	void		*buf = NULL;
+	uint32_t	length = rdma_req->req.length;
+	uint32_t	i = 0;
+
+	rdma_req->req.iovcnt = 0;
+	while (length) {
+		buf = spdk_mempool_get(rtransport->data_buf_pool);
+		if (!buf) {
+			goto nomem;
+		}
+
+		rdma_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
+						~NVMF_DATA_BUFFER_MASK);
+		rdma_req->req.iov[i].iov_len  = spdk_min(length, rtransport->transport.opts.io_unit_size);
+		rdma_req->req.iovcnt++;
+		rdma_req->data.buffers[i] = buf;
+		rdma_req->data.wr.sg_list[i].addr = (uintptr_t)(rdma_req->req.iov[i].iov_base);
+		rdma_req->data.wr.sg_list[i].length = rdma_req->req.iov[i].iov_len;
+		rdma_req->data.wr.sg_list[i].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
+						     (uint64_t)buf, NULL))->lkey;
+
+		length -= rdma_req->req.iov[i].iov_len;
+		i++;
+	}
+
+	rdma_req->data_from_pool = true;
+
+	return 0;
+
+nomem:
+	while (i) {
+		i--;
+		spdk_mempool_put(rtransport->data_buf_pool, rdma_req->req.iov[i].iov_base);
+		rdma_req->req.iov[i].iov_base = NULL;
+		rdma_req->req.iov[i].iov_len = 0;
+
+		rdma_req->data.wr.sg_list[i].addr = 0;
+		rdma_req->data.wr.sg_list[i].length = 0;
+		rdma_req->data.wr.sg_list[i].lkey = 0;
+	}
+	rdma_req->req.iovcnt = 0;
+	return -ENOMEM;
+}
+
+static int
+spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
+				 struct spdk_nvmf_rdma_device *device,
+				 struct spdk_nvmf_rdma_request *rdma_req)
+{
+	struct spdk_nvme_cmd			*cmd;
+	struct spdk_nvme_cpl			*rsp;
+	struct spdk_nvme_sgl_descriptor		*sgl;
+
+	cmd = &rdma_req->req.cmd->nvme_cmd;
+	rsp = &rdma_req->req.rsp->nvme_cpl;
+	sgl = &cmd->dptr.sgl1;
+
+	if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK &&
+	    (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS ||
+	     sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) {
+		if (sgl->keyed.length > rtransport->transport.opts.max_io_size) {
+			SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
+				    sgl->keyed.length, rtransport->transport.opts.max_io_size);
+			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+			return -1;
+		}
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+		if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+			if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
+				rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
+				rdma_req->rsp.wr.imm_data = sgl->keyed.key;
+			}
+		}
+#endif
+
+		/* fill request length and populate iovs */
+		rdma_req->req.length = sgl->keyed.length;
+
+		if (spdk_nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req) < 0) {
+			/* No available buffers. Queue this request up. */
+			SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
+			return 0;
+		}
+
+		/* backward compatible */
+		rdma_req->req.data = rdma_req->req.iov[0].iov_base;
+
+		/* rdma wr specifics */
+		rdma_req->data.wr.num_sge = rdma_req->req.iovcnt;
+		rdma_req->data.wr.wr.rdma.rkey = sgl->keyed.key;
+		rdma_req->data.wr.wr.rdma.remote_addr = sgl->address;
+
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
+			      rdma_req->req.iovcnt);
+
+		return 0;
+	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
+		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+		uint64_t offset = sgl->address;
+		uint32_t max_len = rtransport->transport.opts.in_capsule_data_size;
+
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
+			      offset, sgl->unkeyed.length);
+
+		if (offset > max_len) {
+			SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
+				    offset, max_len);
+			rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
+			return -1;
+		}
+		max_len -= (uint32_t)offset;
+
+		if (sgl->unkeyed.length > max_len) {
+			SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
+				    sgl->unkeyed.length, max_len);
+			rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+			return -1;
+		}
+
+		rdma_req->req.data = rdma_req->recv->buf + offset;
+		rdma_req->data_from_pool = false;
+		rdma_req->req.length = sgl->unkeyed.length;
+
+		rdma_req->req.iov[0].iov_base = rdma_req->req.data;
+		rdma_req->req.iov[0].iov_len = rdma_req->req.length;
+		rdma_req->req.iovcnt = 1;
+
+		return 0;
+	}
+
+	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
+		    sgl->generic.type, sgl->generic.subtype);
+	rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
+	return -1;
+}
+
+static bool
+spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
+			       struct spdk_nvmf_rdma_request *rdma_req)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	struct spdk_nvmf_rdma_device	*device;
+	struct spdk_nvme_cpl		*rsp = &rdma_req->req.rsp->nvme_cpl;
+	int				rc;
+	struct spdk_nvmf_rdma_recv	*rdma_recv;
+	enum spdk_nvmf_rdma_request_state prev_state;
+	bool				progress = false;
+	int				data_posted;
+	int				cur_rdma_rw_depth;
+
+	rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+	device = rqpair->port->device;
+
+	assert(rdma_req->state != RDMA_REQUEST_STATE_FREE);
+
+	/* If the queue pair is in an error state, force the request to the completed state
+	 * to release resources. */
+	if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+		if (rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER) {
+			TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+		}
+		spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+	}
+
+	/* The loop here is to allow for several back-to-back state changes. */
+	do {
+		prev_state = rdma_req->state;
+
+		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p entering state %d\n", rdma_req, prev_state);
+
+		switch (rdma_req->state) {
+		case RDMA_REQUEST_STATE_FREE:
+			/* Some external code must kick a request into RDMA_REQUEST_STATE_NEW
+			 * to escape this state. */
+			break;
+		case RDMA_REQUEST_STATE_NEW:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			rdma_recv = rdma_req->recv;
+
+			/* The first element of the SGL is the NVMe command */
+			rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr;
+			memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp));
+
+			TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
+
+			if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+				break;
+			}
+
+			/* The next state transition depends on the data transfer needs of this request. */
+			rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
+
+			/* If no data to transfer, ready to execute. */
+			if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+				break;
+			}
+
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEED_BUFFER);
+			TAILQ_INSERT_TAIL(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+			break;
+		case RDMA_REQUEST_STATE_NEED_BUFFER:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+			assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE);
+
+			if (rdma_req != TAILQ_FIRST(&rqpair->ch->pending_data_buf_queue)) {
+				/* This request needs to wait in line to obtain a buffer */
+				break;
+			}
+
+			/* Try to get a data buffer */
+			rc = spdk_nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req);
+			if (rc < 0) {
+				TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+				rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+				break;
+			}
+
+			if (!rdma_req->req.data) {
+				/* No buffers available. */
+				break;
+			}
+
+			TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+
+			/* If data is transferring from host to controller and the data didn't
+			 * arrive using in capsule data, we need to do a transfer from the host.
+			 */
+			if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+				break;
+			}
+
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+			break;
+		case RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+			if (rdma_req != TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING])) {
+				/* This request needs to wait in line to perform RDMA */
+				break;
+			}
+			cur_rdma_rw_depth = spdk_nvmf_rdma_cur_rw_depth(rqpair);
+
+			if (cur_rdma_rw_depth >= rqpair->max_rw_depth) {
+				/* R/W queue is full, need to wait */
+				break;
+			}
+
+			if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
+				rc = request_transfer_in(&rdma_req->req);
+				if (!rc) {
+					spdk_nvmf_rdma_request_set_state(rdma_req,
+									 RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+				} else {
+					rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+					spdk_nvmf_rdma_request_set_state(rdma_req,
+									 RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+				}
+			} else if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+				/* The data transfer will be kicked off from
+				 * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
+				 */
+				spdk_nvmf_rdma_request_set_state(rdma_req,
+								 RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+			} else {
+				SPDK_ERRLOG("Cannot perform data transfer, unknown state: %u\n",
+					    rdma_req->req.xfer);
+				assert(0);
+			}
+			break;
+		case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			/* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE
+			 * to escape this state. */
+			break;
+		case RDMA_REQUEST_STATE_READY_TO_EXECUTE:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTING);
+			spdk_nvmf_request_exec(&rdma_req->req);
+			break;
+		case RDMA_REQUEST_STATE_EXECUTING:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			/* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED
+			 * to escape this state. */
+			break;
+		case RDMA_REQUEST_STATE_EXECUTED:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+			} else {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+			}
+			break;
+		case RDMA_REQUEST_STATE_READY_TO_COMPLETE:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			rc = request_transfer_out(&rdma_req->req, &data_posted);
+			assert(rc == 0); /* No good way to handle this currently */
+			if (rc) {
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+			} else {
+				spdk_nvmf_rdma_request_set_state(rdma_req,
+								 data_posted ?
+								 RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST :
+								 RDMA_REQUEST_STATE_COMPLETING);
+			}
+			break;
+		case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			/* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+			 * to escape this state. */
+			break;
+		case RDMA_REQUEST_STATE_COMPLETING:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+			/* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
+			 * to escape this state. */
+			break;
+		case RDMA_REQUEST_STATE_COMPLETED:
+			spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0,
+					  (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
+
+			if (rdma_req->data_from_pool) {
+				/* Put the buffer/s back in the pool */
+				for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) {
+					spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data.buffers[i]);
+					rdma_req->req.iov[i].iov_base = NULL;
+					rdma_req->data.buffers[i] = NULL;
+				}
+				rdma_req->data_from_pool = false;
+			}
+			rdma_req->req.length = 0;
+			rdma_req->req.iovcnt = 0;
+			rdma_req->req.data = NULL;
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE);
+			break;
+		case RDMA_REQUEST_NUM_STATES:
+		default:
+			assert(0);
+			break;
+		}
+
+		if (rdma_req->state != prev_state) {
+			progress = true;
+		}
+	} while (rdma_req->state != prev_state);
+
+	return progress;
+}
+
+/* Public API callbacks begin here */
+
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
+#define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
+#define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072
+#define SPDK_NVMF_RDMA_DEFAULT_IO_BUFFER_SIZE 131072
+
+static void
+spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
+{
+	opts->max_queue_depth =      SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH;
+	opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR;
+	opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE;
+	opts->max_io_size =          SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE;
+	opts->io_unit_size =         SPDK_NVMF_RDMA_DEFAULT_IO_BUFFER_SIZE;
+	opts->max_aq_depth =         SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH;
+}
+
+static int spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport);
+
+static struct spdk_nvmf_transport *
+spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
+{
+	int rc;
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_device	*device, *tmp;
+	struct ibv_context		**contexts;
+	uint32_t			i;
+	int				flag;
+	uint32_t			sge_count;
+
+	const struct spdk_mem_map_ops nvmf_rdma_map_ops = {
+		.notify_cb = spdk_nvmf_rdma_mem_notify,
+		.are_contiguous = NULL
+	};
+
+	rtransport = calloc(1, sizeof(*rtransport));
+	if (!rtransport) {
+		return NULL;
+	}
+
+	if (pthread_mutex_init(&rtransport->lock, NULL)) {
+		SPDK_ERRLOG("pthread_mutex_init() failed\n");
+		free(rtransport);
+		return NULL;
+	}
+
+	spdk_io_device_register(rtransport, spdk_nvmf_rdma_mgmt_channel_create,
+				spdk_nvmf_rdma_mgmt_channel_destroy,
+				sizeof(struct spdk_nvmf_rdma_mgmt_channel),
+				"rdma_transport");
+
+	TAILQ_INIT(&rtransport->devices);
+	TAILQ_INIT(&rtransport->ports);
+
+	rtransport->transport.ops = &spdk_nvmf_transport_rdma;
+
+	SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n"
+		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
+		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
+		     "  in_capsule_data_size=%d, max_aq_depth=%d\n",
+		     opts->max_queue_depth,
+		     opts->max_io_size,
+		     opts->max_qpairs_per_ctrlr,
+		     opts->io_unit_size,
+		     opts->in_capsule_data_size,
+		     opts->max_aq_depth);
+
+	/* I/O unit size cannot be larger than max I/O size */
+	if (opts->io_unit_size > opts->max_io_size) {
+		opts->io_unit_size = opts->max_io_size;
+	}
+
+	sge_count = opts->max_io_size / opts->io_unit_size;
+	if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
+		SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	rtransport->event_channel = rdma_create_event_channel();
+	if (rtransport->event_channel == NULL) {
+		SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno));
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	flag = fcntl(rtransport->event_channel->fd, F_GETFL);
+	if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
+			    rtransport->event_channel->fd, spdk_strerror(errno));
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	rtransport->data_buf_pool = spdk_mempool_create("spdk_nvmf_rdma",
+				    opts->max_queue_depth * 4, /* The 4 is arbitrarily chosen. Needs to be configurable. */
+				    opts->max_io_size + NVMF_DATA_BUFFER_ALIGNMENT,
+				    SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+				    SPDK_ENV_SOCKET_ID_ANY);
+	if (!rtransport->data_buf_pool) {
+		SPDK_ERRLOG("Unable to allocate buffer pool for poll group\n");
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	contexts = rdma_get_devices(NULL);
+	if (contexts == NULL) {
+		SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno);
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	i = 0;
+	rc = 0;
+	while (contexts[i] != NULL) {
+		device = calloc(1, sizeof(*device));
+		if (!device) {
+			SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n");
+			rc = -ENOMEM;
+			break;
+		}
+		device->context = contexts[i];
+		rc = ibv_query_device(device->context, &device->attr);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
+			free(device);
+			break;
+
+		}
+
+#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
+		if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) {
+			SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,");
+			SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id);
+		}
+
+		/**
+		 * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE.
+		 * The Soft-RoCE RXE driver does not currently support send with invalidate,
+		 * but incorrectly reports that it does. There are changes making their way
+		 * through the kernel now that will enable this feature. When they are merged,
+		 * we can conditionally enable this feature.
+		 *
+		 * TODO: enable this for versions of the kernel rxe driver that support it.
+		 */
+		if (device->attr.vendor_id == 0) {
+			device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS);
+		}
+#endif
+
+		/* set up device context async ev fd as NON_BLOCKING */
+		flag = fcntl(device->context->async_fd, F_GETFL);
+		rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n");
+			free(device);
+			break;
+		}
+
+		device->pd = ibv_alloc_pd(device->context);
+		if (!device->pd) {
+			SPDK_ERRLOG("Unable to allocate protection domain.\n");
+			free(device);
+			rc = -1;
+			break;
+		}
+
+		device->map = spdk_mem_map_alloc(0, &nvmf_rdma_map_ops, device);
+		if (!device->map) {
+			SPDK_ERRLOG("Unable to allocate memory map for new poll group\n");
+			ibv_dealloc_pd(device->pd);
+			free(device);
+			rc = -1;
+			break;
+		}
+
+		TAILQ_INSERT_TAIL(&rtransport->devices, device, link);
+		i++;
+	}
+	rdma_free_devices(contexts);
+
+	if (rc < 0) {
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	/* Set up poll descriptor array to monitor events from RDMA and IB
+	 * in a single poll syscall
+	 */
+	rtransport->npoll_fds = i + 1;
+	i = 0;
+	rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd));
+	if (rtransport->poll_fds == NULL) {
+		SPDK_ERRLOG("poll_fds allocation failed\n");
+		spdk_nvmf_rdma_destroy(&rtransport->transport);
+		return NULL;
+	}
+
+	rtransport->poll_fds[i].fd = rtransport->event_channel->fd;
+	rtransport->poll_fds[i++].events = POLLIN;
+
+	TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+		rtransport->poll_fds[i].fd = device->context->async_fd;
+		rtransport->poll_fds[i++].events = POLLIN;
+	}
+
+	return &rtransport->transport;
+}
+
+static int
+spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport)
+{
+	struct spdk_nvmf_rdma_transport	*rtransport;
+	struct spdk_nvmf_rdma_port	*port, *port_tmp;
+	struct spdk_nvmf_rdma_device	*device, *device_tmp;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
+		TAILQ_REMOVE(&rtransport->ports, port, link);
+		rdma_destroy_id(port->id);
+		free(port);
+	}
+
+	if (rtransport->poll_fds != NULL) {
+		free(rtransport->poll_fds);
+	}
+
+	if (rtransport->event_channel != NULL) {
+		rdma_destroy_event_channel(rtransport->event_channel);
+	}
+
+	TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
+		TAILQ_REMOVE(&rtransport->devices, device, link);
+		if (device->map) {
+			spdk_mem_map_free(&device->map);
+		}
+		if (device->pd) {
+			ibv_dealloc_pd(device->pd);
+		}
+		free(device);
+	}
+
+	if (rtransport->data_buf_pool != NULL) {
+		if (spdk_mempool_count(rtransport->data_buf_pool) !=
+		    (transport->opts.max_queue_depth * 4)) {
+			SPDK_ERRLOG("transport buffer pool count is %zu but should be %u\n",
+				    spdk_mempool_count(rtransport->data_buf_pool),
+				    transport->opts.max_queue_depth * 4);
+		}
+	}
+
+	spdk_mempool_free(rtransport->data_buf_pool);
+	spdk_io_device_unregister(rtransport, NULL);
+	pthread_mutex_destroy(&rtransport->lock);
+	free(rtransport);
+
+	return 0;
+}
+
+static int
+spdk_nvmf_rdma_listen(struct spdk_nvmf_transport *transport,
+		      const struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_rdma_transport	*rtransport;
+	struct spdk_nvmf_rdma_device	*device;
+	struct spdk_nvmf_rdma_port	*port_tmp, *port;
+	struct addrinfo			*res;
+	struct addrinfo			hints;
+	int				family;
+	int				rc;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	port = calloc(1, sizeof(*port));
+	if (!port) {
+		return -ENOMEM;
+	}
+
+	/* Selectively copy the trid. Things like NQN don't matter here - that
+	 * mapping is enforced elsewhere.
+	 */
+	port->trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
+	port->trid.adrfam = trid->adrfam;
+	snprintf(port->trid.traddr, sizeof(port->trid.traddr), "%s", trid->traddr);
+	snprintf(port->trid.trsvcid, sizeof(port->trid.trsvcid), "%s", trid->trsvcid);
+
+	pthread_mutex_lock(&rtransport->lock);
+	assert(rtransport->event_channel != NULL);
+	TAILQ_FOREACH(port_tmp, &rtransport->ports, link) {
+		if (spdk_nvme_transport_id_compare(&port_tmp->trid, &port->trid) == 0) {
+			port_tmp->ref++;
+			free(port);
+			/* Already listening at this address */
+			pthread_mutex_unlock(&rtransport->lock);
+			return 0;
+		}
+	}
+
+	rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP);
+	if (rc < 0) {
+		SPDK_ERRLOG("rdma_create_id() failed\n");
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return rc;
+	}
+
+	switch (port->trid.adrfam) {
+	case SPDK_NVMF_ADRFAM_IPV4:
+		family = AF_INET;
+		break;
+	case SPDK_NVMF_ADRFAM_IPV6:
+		family = AF_INET6;
+		break;
+	default:
+		SPDK_ERRLOG("Unhandled ADRFAM %d\n", port->trid.adrfam);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return -EINVAL;
+	}
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = family;
+	hints.ai_flags = AI_NUMERICSERV;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = 0;
+
+	rc = getaddrinfo(port->trid.traddr, port->trid.trsvcid, &hints, &res);
+	if (rc) {
+		SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return -EINVAL;
+	}
+
+	rc = rdma_bind_addr(port->id, res->ai_addr);
+	freeaddrinfo(res);
+
+	if (rc < 0) {
+		SPDK_ERRLOG("rdma_bind_addr() failed\n");
+		rdma_destroy_id(port->id);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return rc;
+	}
+
+	if (!port->id->verbs) {
+		SPDK_ERRLOG("ibv_context is null\n");
+		rdma_destroy_id(port->id);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return -1;
+	}
+
+	rc = rdma_listen(port->id, 10); /* 10 = backlog */
+	if (rc < 0) {
+		SPDK_ERRLOG("rdma_listen() failed\n");
+		rdma_destroy_id(port->id);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return rc;
+	}
+
+	TAILQ_FOREACH(device, &rtransport->devices, link) {
+		if (device->context == port->id->verbs) {
+			port->device = device;
+			break;
+		}
+	}
+	if (!port->device) {
+		SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n",
+			    port->id->verbs);
+		rdma_destroy_id(port->id);
+		free(port);
+		pthread_mutex_unlock(&rtransport->lock);
+		return -EINVAL;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_RDMA, "*** NVMf Target Listening on %s port %d ***\n",
+		     port->trid.traddr, ntohs(rdma_get_src_port(port->id)));
+
+	port->ref = 1;
+
+	TAILQ_INSERT_TAIL(&rtransport->ports, port, link);
+	pthread_mutex_unlock(&rtransport->lock);
+
+	return 0;
+}
+
+static int
+spdk_nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
+			   const struct spdk_nvme_transport_id *_trid)
+{
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_port *port, *tmp;
+	struct spdk_nvme_transport_id trid = {};
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	/* Selectively copy the trid. Things like NQN don't matter here - that
+	 * mapping is enforced elsewhere.
+	 */
+	trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
+	trid.adrfam = _trid->adrfam;
+	snprintf(trid.traddr, sizeof(port->trid.traddr), "%s", _trid->traddr);
+	snprintf(trid.trsvcid, sizeof(port->trid.trsvcid), "%s", _trid->trsvcid);
+
+	pthread_mutex_lock(&rtransport->lock);
+	TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) {
+		if (spdk_nvme_transport_id_compare(&port->trid, &trid) == 0) {
+			assert(port->ref > 0);
+			port->ref--;
+			if (port->ref == 0) {
+				TAILQ_REMOVE(&rtransport->ports, port, link);
+				rdma_destroy_id(port->id);
+				free(port);
+			}
+			break;
+		}
+	}
+
+	pthread_mutex_unlock(&rtransport->lock);
+	return 0;
+}
+
+static bool
+spdk_nvmf_rdma_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
+{
+	int cur_queue_depth, cur_rdma_rw_depth;
+	struct spdk_nvmf_rdma_qpair *rqpair;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+	cur_queue_depth = spdk_nvmf_rdma_cur_queue_depth(rqpair);
+	cur_rdma_rw_depth = spdk_nvmf_rdma_cur_rw_depth(rqpair);
+
+	if (cur_queue_depth == 0 && cur_rdma_rw_depth == 0) {
+		return true;
+	}
+	return false;
+}
+
+static void
+spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
+				     struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	struct spdk_nvmf_rdma_recv	*rdma_recv, *recv_tmp;
+	struct spdk_nvmf_rdma_request	*rdma_req, *req_tmp;
+
+	/* We process I/O in the data transfer pending queue at the highest priority. */
+	TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING],
+			   state_link, req_tmp) {
+		if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+			break;
+		}
+	}
+
+	/* The second highest priority is I/O waiting on memory buffers. */
+	TAILQ_FOREACH_SAFE(rdma_req, &rqpair->ch->pending_data_buf_queue, link,
+			   req_tmp) {
+		if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+			break;
+		}
+	}
+
+	if (rqpair->qpair_disconnected) {
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return;
+	}
+
+	/* Do not process newly received commands if qp is in ERROR state,
+	 * wait till the recovery is complete.
+	 */
+	if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+		return;
+	}
+
+	/* The lowest priority is processing newly received commands */
+	TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) {
+		if (TAILQ_EMPTY(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE])) {
+			break;
+		}
+
+		rdma_req = TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE]);
+		rdma_req->recv = rdma_recv;
+		spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW);
+		if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
+			break;
+		}
+	}
+}
+
+static void
+spdk_nvmf_rdma_drain_state_queue(struct spdk_nvmf_rdma_qpair *rqpair,
+				 enum spdk_nvmf_rdma_request_state state)
+{
+	struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
+	struct spdk_nvmf_rdma_transport *rtransport;
+
+	TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[state], state_link, req_tmp) {
+		rtransport = SPDK_CONTAINEROF(rdma_req->req.qpair->transport,
+					      struct spdk_nvmf_rdma_transport, transport);
+		spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+		spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+	}
+}
+
+static void
+spdk_nvmf_rdma_qpair_recover(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	enum ibv_qp_state state, next_state;
+	int recovered;
+	struct spdk_nvmf_rdma_transport *rtransport;
+
+	if (!spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
+		/* There must be outstanding requests down to media.
+		 * If so, wait till they're complete.
+		 */
+		assert(!TAILQ_EMPTY(&rqpair->qpair.outstanding));
+		return;
+	}
+
+	state = rqpair->ibv_attr.qp_state;
+	next_state = state;
+
+	SPDK_NOTICELOG("RDMA qpair %u is in state: %s\n",
+		       rqpair->qpair.qid,
+		       str_ibv_qp_state[state]);
+
+	if (!(state == IBV_QPS_ERR || state == IBV_QPS_RESET)) {
+		SPDK_ERRLOG("Can't recover RDMA qpair %u from the state: %s\n",
+			    rqpair->qpair.qid,
+			    str_ibv_qp_state[state]);
+		spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
+		return;
+	}
+
+	recovered = 0;
+	while (!recovered) {
+		switch (state) {
+		case IBV_QPS_ERR:
+			next_state = IBV_QPS_RESET;
+			break;
+		case IBV_QPS_RESET:
+			next_state = IBV_QPS_INIT;
+			break;
+		case IBV_QPS_INIT:
+			next_state = IBV_QPS_RTR;
+			break;
+		case IBV_QPS_RTR:
+			next_state = IBV_QPS_RTS;
+			break;
+		case IBV_QPS_RTS:
+			recovered = 1;
+			break;
+		default:
+			SPDK_ERRLOG("RDMA qpair %u unexpected state for recovery: %u\n",
+				    rqpair->qpair.qid, state);
+			goto error;
+		}
+		/* Do not transition into same state */
+		if (next_state == state) {
+			break;
+		}
+
+		if (spdk_nvmf_rdma_set_ibv_state(rqpair, next_state)) {
+			goto error;
+		}
+
+		state = next_state;
+	}
+
+	rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+				      struct spdk_nvmf_rdma_transport,
+				      transport);
+
+	spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+
+	return;
+error:
+	SPDK_NOTICELOG("RDMA qpair %u: recovery failed, disconnecting...\n",
+		       rqpair->qpair.qid);
+	spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
+}
+
+/* Clean up only the states that can be aborted at any time */
+static void
+_spdk_nvmf_rdma_qp_cleanup_safe_states(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	struct spdk_nvmf_rdma_request	*rdma_req, *req_tmp;
+
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEW);
+	TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_NEED_BUFFER], link, req_tmp) {
+		TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
+	}
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEED_BUFFER);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTED);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETED);
+}
+
+/* This cleans up all memory. It is only safe to use if the rest of the software stack
+ * has been shut down */
+static void
+_spdk_nvmf_rdma_qp_cleanup_all_states(struct spdk_nvmf_rdma_qpair *rqpair)
+{
+	_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_EXECUTING);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
+}
+
+static void
+_spdk_nvmf_rdma_qp_error(void *arg)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair = arg;
+	enum ibv_qp_state		state;
+
+	spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+	state = rqpair->ibv_attr.qp_state;
+	if (state != IBV_QPS_ERR) {
+		/* Error was already recovered */
+		return;
+	}
+
+	if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
+		spdk_nvmf_ctrlr_abort_aer(rqpair->qpair.ctrlr);
+	}
+
+	_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+	/* Attempt recovery. This will exit without recovering if I/O requests
+	 * are still outstanding */
+	spdk_nvmf_rdma_qpair_recover(rqpair);
+}
+
+static void
+_spdk_nvmf_rdma_qp_last_wqe(void *arg)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair = arg;
+	enum ibv_qp_state		state;
+
+	spdk_nvmf_rdma_qpair_dec_refcnt(rqpair);
+
+	state = rqpair->ibv_attr.qp_state;
+	if (state != IBV_QPS_ERR) {
+		/* Error was already recovered */
+		return;
+	}
+
+	/* Clear out the states that are safe to clear any time, plus the
+	 * RDMA data transfer states. */
+	_spdk_nvmf_rdma_qp_cleanup_safe_states(rqpair);
+
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
+	spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
+
+	spdk_nvmf_rdma_qpair_recover(rqpair);
+}
+
+static void
+spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
+{
+	int				rc;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	struct ibv_async_event		event;
+	enum ibv_qp_state		state;
+
+	rc = ibv_get_async_event(device->context, &event);
+
+	if (rc) {
+		SPDK_ERRLOG("Failed to get async_event (%d): %s\n",
+			    errno, spdk_strerror(errno));
+		return;
+	}
+
+	SPDK_NOTICELOG("Async event: %s\n",
+		       ibv_event_type_str(event.event_type));
+
+	switch (event.event_type) {
+	case IBV_EVENT_QP_FATAL:
+		rqpair = event.element.qp->qp_context;
+		spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+				  (uintptr_t)rqpair->cm_id, event.event_type);
+		spdk_nvmf_rdma_update_ibv_state(rqpair);
+		spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+		spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
+		break;
+	case IBV_EVENT_QP_LAST_WQE_REACHED:
+		rqpair = event.element.qp->qp_context;
+		spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+				  (uintptr_t)rqpair->cm_id, event.event_type);
+		spdk_nvmf_rdma_update_ibv_state(rqpair);
+		spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+		spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_last_wqe, rqpair);
+		break;
+	case IBV_EVENT_SQ_DRAINED:
+		/* This event occurs frequently in both error and non-error states.
+		 * Check if the qpair is in an error state before sending a message.
+		 * Note that we're not on the correct thread to access the qpair, but
+		 * the operations that the below calls make all happen to be thread
+		 * safe. */
+		rqpair = event.element.qp->qp_context;
+		spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+				  (uintptr_t)rqpair->cm_id, event.event_type);
+		state = spdk_nvmf_rdma_update_ibv_state(rqpair);
+		if (state == IBV_QPS_ERR) {
+			spdk_nvmf_rdma_qpair_inc_refcnt(rqpair);
+			spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
+		}
+		break;
+	case IBV_EVENT_QP_REQ_ERR:
+	case IBV_EVENT_QP_ACCESS_ERR:
+	case IBV_EVENT_COMM_EST:
+	case IBV_EVENT_PATH_MIG:
+	case IBV_EVENT_PATH_MIG_ERR:
+		rqpair = event.element.qp->qp_context;
+		spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
+				  (uintptr_t)rqpair->cm_id, event.event_type);
+		spdk_nvmf_rdma_update_ibv_state(rqpair);
+		break;
+	case IBV_EVENT_CQ_ERR:
+	case IBV_EVENT_DEVICE_FATAL:
+	case IBV_EVENT_PORT_ACTIVE:
+	case IBV_EVENT_PORT_ERR:
+	case IBV_EVENT_LID_CHANGE:
+	case IBV_EVENT_PKEY_CHANGE:
+	case IBV_EVENT_SM_CHANGE:
+	case IBV_EVENT_SRQ_ERR:
+	case IBV_EVENT_SRQ_LIMIT_REACHED:
+	case IBV_EVENT_CLIENT_REREGISTER:
+	case IBV_EVENT_GID_CHANGE:
+	default:
+		spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type);
+		break;
+	}
+	ibv_ack_async_event(&event);
+}
+
+static void
+spdk_nvmf_rdma_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+	int	nfds, i = 0;
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_device *device, *tmp;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+	nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);
+
+	if (nfds <= 0) {
+		return;
+	}
+
+	/* The first poll descriptor is RDMA CM event */
+	if (rtransport->poll_fds[i++].revents & POLLIN) {
+		spdk_nvmf_process_cm_event(transport, cb_fn);
+		nfds--;
+	}
+
+	if (nfds == 0) {
+		return;
+	}
+
+	/* Second and subsequent poll descriptors are IB async events */
+	TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
+		if (rtransport->poll_fds[i++].revents & POLLIN) {
+			spdk_nvmf_process_ib_event(device);
+			nfds--;
+		}
+	}
+	/* check all flagged fd's have been served */
+	assert(nfds == 0);
+}
+
+static void
+spdk_nvmf_rdma_discover(struct spdk_nvmf_transport *transport,
+			struct spdk_nvme_transport_id *trid,
+			struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+	entry->trtype = SPDK_NVMF_TRTYPE_RDMA;
+	entry->adrfam = trid->adrfam;
+	entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED;
+
+	spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
+	spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
+
+	entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED;
+	entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE;
+	entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM;
+}
+
+static struct spdk_nvmf_transport_poll_group *
+spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+	struct spdk_nvmf_rdma_transport		*rtransport;
+	struct spdk_nvmf_rdma_poll_group	*rgroup;
+	struct spdk_nvmf_rdma_poller		*poller;
+	struct spdk_nvmf_rdma_device		*device;
+
+	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
+
+	rgroup = calloc(1, sizeof(*rgroup));
+	if (!rgroup) {
+		return NULL;
+	}
+
+	TAILQ_INIT(&rgroup->pollers);
+
+	pthread_mutex_lock(&rtransport->lock);
+	TAILQ_FOREACH(device, &rtransport->devices, link) {
+		poller = calloc(1, sizeof(*poller));
+		if (!poller) {
+			SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n");
+			free(rgroup);
+			pthread_mutex_unlock(&rtransport->lock);
+			return NULL;
+		}
+
+		poller->device = device;
+		poller->group = rgroup;
+
+		TAILQ_INIT(&poller->qpairs);
+
+		poller->cq = ibv_create_cq(device->context, NVMF_RDMA_CQ_SIZE, poller, NULL, 0);
+		if (!poller->cq) {
+			SPDK_ERRLOG("Unable to create completion queue\n");
+			free(poller);
+			free(rgroup);
+			pthread_mutex_unlock(&rtransport->lock);
+			return NULL;
+		}
+
+		TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
+	}
+
+	pthread_mutex_unlock(&rtransport->lock);
+	return &rgroup->group;
+}
+
+static void
+spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+	struct spdk_nvmf_rdma_poll_group	*rgroup;
+	struct spdk_nvmf_rdma_poller		*poller, *tmp;
+	struct spdk_nvmf_rdma_qpair		*qpair, *tmp_qpair;
+
+	rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+
+	if (!rgroup) {
+		return;
+	}
+
+	TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
+		TAILQ_REMOVE(&rgroup->pollers, poller, link);
+
+		if (poller->cq) {
+			ibv_destroy_cq(poller->cq);
+		}
+		TAILQ_FOREACH_SAFE(qpair, &poller->qpairs, link, tmp_qpair) {
+			_spdk_nvmf_rdma_qp_cleanup_all_states(qpair);
+			spdk_nvmf_rdma_qpair_destroy(qpair);
+		}
+
+		free(poller);
+	}
+
+	free(rgroup);
+}
+
+static int
+spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+			      struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvmf_rdma_transport		*rtransport;
+	struct spdk_nvmf_rdma_poll_group	*rgroup;
+	struct spdk_nvmf_rdma_qpair		*rqpair;
+	struct spdk_nvmf_rdma_device		*device;
+	struct spdk_nvmf_rdma_poller		*poller;
+	int					rc;
+
+	rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
+	rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	device = rqpair->port->device;
+
+	TAILQ_FOREACH(poller, &rgroup->pollers, link) {
+		if (poller->device == device) {
+			break;
+		}
+	}
+
+	if (!poller) {
+		SPDK_ERRLOG("No poller found for device.\n");
+		return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&poller->qpairs, rqpair, link);
+	rqpair->poller = poller;
+
+	rc = spdk_nvmf_rdma_qpair_initialize(qpair);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair);
+		return -1;
+	}
+
+	rqpair->mgmt_channel = spdk_get_io_channel(rtransport);
+	if (!rqpair->mgmt_channel) {
+		spdk_nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return -1;
+	}
+
+	rqpair->ch = spdk_io_channel_get_ctx(rqpair->mgmt_channel);
+	assert(rqpair->ch != NULL);
+
+	rc = spdk_nvmf_rdma_event_accept(rqpair->cm_id, rqpair);
+	if (rc) {
+		/* Try to reject, but we probably can't */
+		spdk_nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
+		spdk_nvmf_rdma_qpair_destroy(rqpair);
+		return -1;
+	}
+
+	spdk_nvmf_rdma_update_ibv_state(rqpair);
+
+	return 0;
+}
+
+static int
+spdk_nvmf_rdma_request_free(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_rdma_request	*rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
+	struct spdk_nvmf_rdma_transport	*rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+			struct spdk_nvmf_rdma_transport, transport);
+
+	if (rdma_req->data_from_pool) {
+		/* Put the buffer/s back in the pool */
+		for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) {
+			spdk_mempool_put(rtransport->data_buf_pool, rdma_req->data.buffers[i]);
+			rdma_req->req.iov[i].iov_base = NULL;
+			rdma_req->data.buffers[i] = NULL;
+		}
+		rdma_req->data_from_pool = false;
+	}
+	rdma_req->req.length = 0;
+	rdma_req->req.iovcnt = 0;
+	rdma_req->req.data = NULL;
+	spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE);
+	return 0;
+}
+
+static int
+spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_rdma_transport	*rtransport = SPDK_CONTAINEROF(req->qpair->transport,
+			struct spdk_nvmf_rdma_transport, transport);
+	struct spdk_nvmf_rdma_request	*rdma_req = SPDK_CONTAINEROF(req,
+			struct spdk_nvmf_rdma_request, req);
+	struct spdk_nvmf_rdma_qpair     *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
+			struct spdk_nvmf_rdma_qpair, qpair);
+
+	if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) {
+		/* The connection is alive, so process the request as normal */
+		spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
+	} else {
+		/* The connection is dead. Move the request directly to the completed state. */
+		spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+	}
+
+	spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+	if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE && rqpair->ibv_attr.qp_state == IBV_QPS_ERR) {
+		/* If the NVMe-oF layer thinks the connection is active, but the RDMA layer thinks
+		 * the connection is dead, perform error recovery. */
+		spdk_nvmf_rdma_qpair_recover(rqpair);
+	}
+
+	return 0;
+}
+
+static void
+spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
+{
+	struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	spdk_nvmf_rdma_qpair_destroy(rqpair);
+}
+
+static struct spdk_nvmf_rdma_request *
+get_rdma_req_from_wc(struct ibv_wc *wc)
+{
+	struct spdk_nvmf_rdma_request *rdma_req;
+
+	rdma_req = (struct spdk_nvmf_rdma_request *)wc->wr_id;
+	assert(rdma_req != NULL);
+
+#ifdef DEBUG
+	struct spdk_nvmf_rdma_qpair *rqpair;
+	rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	assert(rdma_req - rqpair->reqs >= 0);
+	assert(rdma_req - rqpair->reqs < (ptrdiff_t)rqpair->max_queue_depth);
+#endif
+
+	return rdma_req;
+}
+
+static struct spdk_nvmf_rdma_recv *
+get_rdma_recv_from_wc(struct ibv_wc *wc)
+{
+	struct spdk_nvmf_rdma_recv *rdma_recv;
+
+	assert(wc->byte_len >= sizeof(struct spdk_nvmf_capsule_cmd));
+
+	rdma_recv = (struct spdk_nvmf_rdma_recv *)wc->wr_id;
+	assert(rdma_recv != NULL);
+
+#ifdef DEBUG
+	struct spdk_nvmf_rdma_qpair *rqpair = rdma_recv->qpair;
+
+	assert(rdma_recv - rqpair->recvs >= 0);
+	assert(rdma_recv - rqpair->recvs < (ptrdiff_t)rqpair->max_queue_depth);
+#endif
+
+	return rdma_recv;
+}
+
+#ifdef DEBUG
+static int
+spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
+{
+	return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST ||
+	       rdma_req->state == RDMA_REQUEST_STATE_COMPLETING;
+}
+#endif
+
+static int
+spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
+			   struct spdk_nvmf_rdma_poller *rpoller)
+{
+	struct ibv_wc wc[32];
+	struct spdk_nvmf_rdma_request	*rdma_req;
+	struct spdk_nvmf_rdma_recv	*rdma_recv;
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+	int reaped, i;
+	int count = 0;
+	bool error = false;
+
+	/* Poll for completing operations. */
+	reaped = ibv_poll_cq(rpoller->cq, 32, wc);
+	if (reaped < 0) {
+		SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
+			    errno, spdk_strerror(errno));
+		return -1;
+	}
+
+	for (i = 0; i < reaped; i++) {
+		/* Handle error conditions */
+		if (wc[i].status) {
+			SPDK_WARNLOG("CQ error on CQ %p, Request 0x%lu (%d): %s\n",
+				     rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
+			error = true;
+
+			switch (wc[i].opcode) {
+			case IBV_WC_SEND:
+				rdma_req = get_rdma_req_from_wc(&wc[i]);
+				rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+				/* We're going to attempt an error recovery, so force the request into
+				 * the completed state. */
+				spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+				spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+				break;
+			case IBV_WC_RECV:
+				rdma_recv = get_rdma_recv_from_wc(&wc[i]);
+				rqpair = rdma_recv->qpair;
+
+				/* Dump this into the incoming queue. This gets cleaned up when
+				 * the queue pair disconnects or recovers. */
+				TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
+				break;
+			case IBV_WC_RDMA_WRITE:
+			case IBV_WC_RDMA_READ:
+				/* If the data transfer fails still force the queue into the error state,
+				 * but the rdma_req objects should only be manipulated in response to
+				 * SEND and RECV operations. */
+				rdma_req = get_rdma_req_from_wc(&wc[i]);
+				rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+				break;
+			default:
+				SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
+				continue;
+			}
+
+			/* Set the qpair to the error state. This will initiate a recovery. */
+			spdk_nvmf_rdma_set_ibv_state(rqpair, IBV_QPS_ERR);
+			continue;
+		}
+
+		switch (wc[i].opcode) {
+		case IBV_WC_SEND:
+			rdma_req = get_rdma_req_from_wc(&wc[i]);
+			rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+			assert(spdk_nvmf_rdma_req_is_completing(rdma_req));
+
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
+			spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+			count++;
+
+			/* Try to process other queued requests */
+			spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+			break;
+
+		case IBV_WC_RDMA_WRITE:
+			rdma_req = get_rdma_req_from_wc(&wc[i]);
+			rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+			/* Try to process other queued requests */
+			spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+			break;
+
+		case IBV_WC_RDMA_READ:
+			rdma_req = get_rdma_req_from_wc(&wc[i]);
+			rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+			assert(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
+			spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_EXECUTE);
+			spdk_nvmf_rdma_request_process(rtransport, rdma_req);
+
+			/* Try to process other queued requests */
+			spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+			break;
+
+		case IBV_WC_RECV:
+			rdma_recv = get_rdma_recv_from_wc(&wc[i]);
+			rqpair = rdma_recv->qpair;
+
+			TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
+			/* Try to process other queued requests */
+			spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
+			break;
+
+		default:
+			SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
+			continue;
+		}
+	}
+
+	if (error == true) {
+		return -1;
+	}
+
+	return count;
+}
+
+static int
+spdk_nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+	struct spdk_nvmf_rdma_transport *rtransport;
+	struct spdk_nvmf_rdma_poll_group *rgroup;
+	struct spdk_nvmf_rdma_poller	*rpoller;
+	int				count, rc;
+
+	rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
+	rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
+
+	count = 0;
+	TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
+		rc = spdk_nvmf_rdma_poller_poll(rtransport, rpoller);
+		if (rc < 0) {
+			return rc;
+		}
+		count += rc;
+	}
+
+	return count;
+}
+
+static int
+spdk_nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
+			       struct spdk_nvme_transport_id *trid,
+			       bool peer)
+{
+	struct sockaddr *saddr;
+	uint16_t port;
+
+	trid->trtype = SPDK_NVME_TRANSPORT_RDMA;
+
+	if (peer) {
+		saddr = rdma_get_peer_addr(id);
+	} else {
+		saddr = rdma_get_local_addr(id);
+	}
+	switch (saddr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr;
+
+		trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
+		inet_ntop(AF_INET, &saddr_in->sin_addr,
+			  trid->traddr, sizeof(trid->traddr));
+		if (peer) {
+			port = ntohs(rdma_get_dst_port(id));
+		} else {
+			port = ntohs(rdma_get_src_port(id));
+		}
+		snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr;
+		trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
+		inet_ntop(AF_INET6, &saddr_in->sin6_addr,
+			  trid->traddr, sizeof(trid->traddr));
+		if (peer) {
+			port = ntohs(rdma_get_dst_port(id));
+		} else {
+			port = ntohs(rdma_get_src_port(id));
+		}
+		snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
+		break;
+	}
+	default:
+		return -1;
+
+	}
+
+	return 0;
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+				   struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true);
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+				    struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false);
+}
+
+static int
+spdk_nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+				     struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_rdma_qpair	*rqpair;
+
+	rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
+
+	return spdk_nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false);
+}
+
+const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = {
+	.type = SPDK_NVME_TRANSPORT_RDMA,
+	.opts_init = spdk_nvmf_rdma_opts_init,
+	.create = spdk_nvmf_rdma_create,
+	.destroy = spdk_nvmf_rdma_destroy,
+
+	.listen = spdk_nvmf_rdma_listen,
+	.stop_listen = spdk_nvmf_rdma_stop_listen,
+	.accept = spdk_nvmf_rdma_accept,
+
+	.listener_discover = spdk_nvmf_rdma_discover,
+
+	.poll_group_create = spdk_nvmf_rdma_poll_group_create,
+	.poll_group_destroy = spdk_nvmf_rdma_poll_group_destroy,
+	.poll_group_add = spdk_nvmf_rdma_poll_group_add,
+	.poll_group_poll = spdk_nvmf_rdma_poll_group_poll,
+
+	.req_free = spdk_nvmf_rdma_request_free,
+	.req_complete = spdk_nvmf_rdma_request_complete,
+
+	.qpair_fini = spdk_nvmf_rdma_close_qpair,
+	.qpair_is_idle = spdk_nvmf_rdma_qpair_is_idle,
+	.qpair_get_peer_trid = spdk_nvmf_rdma_qpair_get_peer_trid,
+	.qpair_get_local_trid = spdk_nvmf_rdma_qpair_get_local_trid,
+	.qpair_get_listen_trid = spdk_nvmf_rdma_qpair_get_listen_trid,
+
+};
+
+SPDK_LOG_REGISTER_COMPONENT("rdma", SPDK_LOG_RDMA)
diff --git a/src/spdk/lib/nvmf/request.c b/src/spdk/lib/nvmf/request.c
new file mode 100644
index 00000000..88b6b9a9
--- /dev/null
+++ b/src/spdk/lib/nvmf/request.c
@@ -0,0 +1,190 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/nvme.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/trace.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk_internal/log.h"
+
+static void
+spdk_nvmf_qpair_request_cleanup(struct spdk_nvmf_qpair *qpair)
+{
+	if (qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING) {
+		assert(qpair->state_cb != NULL);
+
+		if (TAILQ_EMPTY(&qpair->outstanding)) {
+			qpair->state_cb(qpair->state_cb_arg, 0);
+		}
+	} else {
+		assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
+	}
+}
+
+int
+spdk_nvmf_request_free(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+
+	TAILQ_REMOVE(&qpair->outstanding, req, link);
+	if (spdk_nvmf_transport_req_free(req)) {
+		SPDK_ERRLOG("Unable to free transport level request resources.\n");
+	}
+
+	spdk_nvmf_qpair_request_cleanup(qpair);
+
+	return 0;
+}
+
+int
+spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
+	struct spdk_nvmf_qpair *qpair;
+
+	rsp->sqid = 0;
+	rsp->status.p = 0;
+	rsp->cid = req->cmd->nvme_cmd.cid;
+
+	qpair = req->qpair;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+		      "cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n",
+		      rsp->cid, rsp->cdw0, rsp->rsvd1,
+		      *(uint16_t *)&rsp->status);
+
+	TAILQ_REMOVE(&qpair->outstanding, req, link);
+	if (spdk_nvmf_transport_req_complete(req)) {
+		SPDK_ERRLOG("Transport request completion error!\n");
+	}
+
+	spdk_nvmf_qpair_request_cleanup(qpair);
+
+	return 0;
+}
+
+static void
+nvmf_trace_command(union nvmf_h2c_msg *h2c_msg, bool is_admin_queue)
+{
+	struct spdk_nvmf_capsule_cmd *cap_hdr = &h2c_msg->nvmf_cmd;
+	struct spdk_nvme_cmd *cmd = &h2c_msg->nvme_cmd;
+	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
+	uint8_t opc;
+
+	if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
+		opc = cap_hdr->fctype;
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "%s Fabrics cmd: fctype 0x%02x cid %u\n",
+			      is_admin_queue ? "Admin" : "I/O",
+			      cap_hdr->fctype, cap_hdr->cid);
+	} else {
+		opc = cmd->opc;
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "%s cmd: opc 0x%02x fuse %u cid %u nsid %u cdw10 0x%08x\n",
+			      is_admin_queue ? "Admin" : "I/O",
+			      cmd->opc, cmd->fuse, cmd->cid, cmd->nsid, cmd->cdw10);
+		if (cmd->mptr) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "mptr 0x%" PRIx64 "\n", cmd->mptr);
+		}
+		if (cmd->psdt != SPDK_NVME_PSDT_SGL_MPTR_CONTIG &&
+		    cmd->psdt != SPDK_NVME_PSDT_SGL_MPTR_SGL) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "psdt %u\n", cmd->psdt);
+		}
+	}
+
+	if (spdk_nvme_opc_get_data_transfer(opc) != SPDK_NVME_DATA_NONE) {
+		if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF,
+				      "SGL: Keyed%s: addr 0x%" PRIx64 " key 0x%x len 0x%x\n",
+				      sgl->generic.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY ? " (Inv)" : "",
+				      sgl->address, sgl->keyed.key, sgl->keyed.length);
+		} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "SGL: Data block: %s 0x%" PRIx64 " len 0x%x\n",
+				      sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET ? "offs" : "addr",
+				      sgl->address, sgl->unkeyed.length);
+		} else {
+			SPDK_DEBUGLOG(SPDK_LOG_NVMF, "SGL type 0x%x subtype 0x%x\n",
+				      sgl->generic.type, sgl->generic.subtype);
+		}
+	}
+}
+
+void
+spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
+{
+	struct spdk_nvmf_qpair *qpair = req->qpair;
+	spdk_nvmf_request_exec_status status;
+
+	nvmf_trace_command(req->cmd, spdk_nvmf_qpair_is_admin_queue(qpair));
+
+	if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
+		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
+		/* Place the request on the outstanding list so we can keep track of it */
+		TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+		spdk_nvmf_request_complete(req);
+		return;
+	}
+
+	/* Check if the subsystem is paused (if there is a subsystem) */
+	if (qpair->ctrlr) {
+		struct spdk_nvmf_subsystem_poll_group *sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
+		if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+			/* The subsystem is not currently active. Queue this request. */
+			TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
+			return;
+		}
+
+	}
+
+	/* Place the request on the outstanding list so we can keep track of it */
+	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
+
+	if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC)) {
+		status = spdk_nvmf_ctrlr_process_fabrics_cmd(req);
+	} else if (spdk_unlikely(spdk_nvmf_qpair_is_admin_queue(qpair))) {
+		status = spdk_nvmf_ctrlr_process_admin_cmd(req);
+	} else {
+		status = spdk_nvmf_ctrlr_process_io_cmd(req);
+	}
+
+	if (status == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
+		spdk_nvmf_request_complete(req);
+	}
+}
diff --git a/src/spdk/lib/nvmf/subsystem.c b/src/spdk/lib/nvmf/subsystem.c
new file mode 100644
index 00000000..9e28f3c6
--- /dev/null
+++ b/src/spdk/lib/nvmf/subsystem.c
@@ -0,0 +1,1269 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/event.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+#include "spdk/nvmf_spec.h"
+#include "spdk/uuid.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/utf.h"
+
+/*
+ * States for parsing valid domains in NQNs according to RFC 1034
+ */
+enum spdk_nvmf_nqn_domain_states {
+	/* First character of a domain must be a letter */
+	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
+
+	/* Subsequent characters can be any of letter, digit, or hyphen */
+	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
+
+	/* A domain label must end with either a letter or digit */
+	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
+};
+
+/* Returns true if is a valid ASCII string as defined by the NVMe spec */
+static bool
+spdk_nvmf_valid_ascii_string(const void *buf, size_t size)
+{
+	const uint8_t *str = buf;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		if (str[i] < 0x20 || str[i] > 0x7E) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool
+spdk_nvmf_valid_nqn(const char *nqn)
+{
+	size_t len;
+	struct spdk_uuid uuid_value;
+	uint32_t i;
+	int bytes_consumed;
+	uint32_t domain_label_length;
+	char *reverse_domain_end;
+	uint32_t reverse_domain_end_index;
+	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+
+	/* Check for length requirements */
+	len = strlen(nqn);
+	if (len > SPDK_NVMF_NQN_MAX_LEN) {
+		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
+		return false;
+	}
+
+	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
+	if (len < SPDK_NVMF_NQN_MIN_LEN) {
+		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
+		return false;
+	}
+
+	/* Check for discovery controller nqn */
+	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
+		return true;
+	}
+
+	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
+	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
+		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
+			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
+			return false;
+		}
+
+		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
+			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
+			return false;
+		}
+		return true;
+	}
+
+	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
+
+	if (strncmp(nqn, "nqn.", 4) != 0) {
+		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
+		return false;
+	}
+
+	/* Check for yyyy-mm. */
+	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
+	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
+		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
+		return false;
+	}
+
+	reverse_domain_end = strchr(nqn, ':');
+	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
+	} else {
+		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
+			    nqn);
+		return false;
+	}
+
+	/* Check for valid reverse domain */
+	domain_label_length = 0;
+	for (i = 12; i < reverse_domain_end_index; i++) {
+		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
+			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
+			return false;
+		}
+
+		switch (domain_state) {
+
+		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
+			if (isalpha(nqn[i])) {
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+				domain_label_length++;
+				break;
+			} else {
+				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
+				return false;
+			}
+		}
+
+		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
+			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+				domain_label_length++;
+				break;
+			} else if (nqn[i] == '-') {
+				if (i == reverse_domain_end_index - 1) {
+					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+						    nqn);
+					return false;
+				}
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+				domain_label_length++;
+				break;
+			} else if (nqn[i] == '.') {
+				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+					    nqn);
+				return false;
+			} else {
+				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+					    nqn);
+				return false;
+			}
+		}
+
+		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
+			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
+				domain_label_length++;
+				break;
+			} else if (nqn[i] == '-') {
+				if (i == reverse_domain_end_index - 1) {
+					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
+						    nqn);
+					return false;
+				}
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
+				domain_label_length++;
+				break;
+			} else if (nqn[i] == '.') {
+				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
+				domain_label_length = 0;
+				break;
+			} else {
+				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
+					    nqn);
+				return false;
+			}
+		}
+		}
+	}
+
+	i = reverse_domain_end_index + 1;
+	while (i < len) {
+		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
+		if (bytes_consumed <= 0) {
+			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
+			return false;
+		}
+
+		i += bytes_consumed;
+	}
+	return true;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
+			   const char *nqn,
+			   enum spdk_nvmf_subtype type,
+			   uint32_t num_ns)
+{
+	struct spdk_nvmf_subsystem	*subsystem;
+	uint32_t			sid;
+
+	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
+		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
+		return NULL;
+	}
+
+	if (!spdk_nvmf_valid_nqn(nqn)) {
+		return NULL;
+	}
+
+	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY && num_ns != 0) {
+		SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
+		return NULL;
+	}
+
+	/* Find a free subsystem id (sid) */
+	for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+		if (tgt->subsystems[sid] == NULL) {
+			break;
+		}
+	}
+	if (sid >= tgt->opts.max_subsystems) {
+		return NULL;
+	}
+
+	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
+	if (subsystem == NULL) {
+		return NULL;
+	}
+
+	subsystem->thread = spdk_get_thread();
+	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+	subsystem->tgt = tgt;
+	subsystem->id = sid;
+	subsystem->subtype = type;
+	subsystem->max_nsid = num_ns;
+	subsystem->max_allowed_nsid = num_ns;
+	subsystem->next_cntlid = 0;
+	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
+	TAILQ_INIT(&subsystem->listeners);
+	TAILQ_INIT(&subsystem->hosts);
+	TAILQ_INIT(&subsystem->ctrlrs);
+
+	if (num_ns != 0) {
+		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
+		if (subsystem->ns == NULL) {
+			SPDK_ERRLOG("Namespace memory allocation failed\n");
+			free(subsystem);
+			return NULL;
+		}
+	}
+
+	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
+	subsystem->sn[sizeof(subsystem->sn) - 1] = '\n';
+
+	tgt->subsystems[sid] = subsystem;
+	tgt->discovery_genctr++;
+
+	return subsystem;
+}
+
+static void
+_spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
+{
+	TAILQ_REMOVE(&subsystem->hosts, host, link);
+	free(host->nqn);
+	free(host);
+}
+
+static int _spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid);
+
+void
+spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
+{
+	struct spdk_nvmf_listener	*listener, *listener_tmp;
+	struct spdk_nvmf_host		*host, *host_tmp;
+	struct spdk_nvmf_ctrlr		*ctrlr, *ctrlr_tmp;
+	struct spdk_nvmf_ns		*ns;
+
+	if (!subsystem) {
+		return;
+	}
+
+	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE);
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "subsystem is %p\n", subsystem);
+
+	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
+		TAILQ_REMOVE(&subsystem->listeners, listener, link);
+		free(listener);
+	}
+
+	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
+		_spdk_nvmf_subsystem_remove_host(subsystem, host);
+	}
+
+	TAILQ_FOREACH_SAFE(ctrlr, &subsystem->ctrlrs, link, ctrlr_tmp) {
+		spdk_nvmf_ctrlr_destruct(ctrlr);
+	}
+
+	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
+	while (ns != NULL) {
+		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
+
+		_spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
+		ns = next_ns;
+	}
+
+	free(subsystem->ns);
+
+	subsystem->tgt->subsystems[subsystem->id] = NULL;
+	subsystem->tgt->discovery_genctr++;
+
+	free(subsystem);
+}
+
+static int
+spdk_nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
+			      enum spdk_nvmf_subsystem_state state)
+{
+	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
+
+	switch (state) {
+	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_PAUSING:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_PAUSED:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_RESUMING:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
+		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
+		break;
+	default:
+		assert(false);
+		return -1;
+	}
+
+	actual_old_state = __sync_val_compare_and_swap(&subsystem->state, expected_old_state, state);
+	if (actual_old_state != expected_old_state) {
+		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
+		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
+			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+		}
+		/* This is for the case when activating the subsystem fails. */
+		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
+		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
+			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+		}
+		actual_old_state = __sync_val_compare_and_swap(&subsystem->state, expected_old_state, state);
+	}
+	assert(actual_old_state == expected_old_state);
+	return actual_old_state - expected_old_state;
+}
+
+struct subsystem_state_change_ctx {
+	struct spdk_nvmf_subsystem *subsystem;
+
+	enum spdk_nvmf_subsystem_state requested_state;
+
+	spdk_nvmf_subsystem_state_change_done cb_fn;
+	void *cb_arg;
+};
+
+static void
+subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+	if (status == 0) {
+		status = spdk_nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
+		if (status) {
+			status = -1;
+		}
+	}
+
+	if (ctx->cb_fn) {
+		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+	}
+	free(ctx);
+}
+
+static void
+subsystem_state_change_continue(void *ctx, int status)
+{
+	struct spdk_io_channel_iter *i = ctx;
+	spdk_for_each_channel_continue(i, status);
+}
+
+static void
+subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
+{
+	struct subsystem_state_change_ctx *ctx;
+	struct spdk_io_channel *ch;
+	struct spdk_nvmf_poll_group *group;
+
+	ctx = spdk_io_channel_iter_get_ctx(i);
+	ch = spdk_io_channel_iter_get_channel(i);
+	group = spdk_io_channel_get_ctx(ch);
+
+	switch (ctx->requested_state) {
+	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+		spdk_nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+		break;
+	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
+			spdk_nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
+			spdk_nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+		}
+		break;
+	case SPDK_NVMF_SUBSYSTEM_PAUSED:
+		spdk_nvmf_poll_group_pause_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
+		break;
+	default:
+		assert(false);
+		break;
+	}
+}
+
+static int
+spdk_nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
+				 enum spdk_nvmf_subsystem_state requested_state,
+				 spdk_nvmf_subsystem_state_change_done cb_fn,
+				 void *cb_arg)
+{
+	struct subsystem_state_change_ctx *ctx;
+	enum spdk_nvmf_subsystem_state intermediate_state;
+	int rc;
+
+	switch (requested_state) {
+	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
+		intermediate_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
+		break;
+	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
+		if (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
+			intermediate_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
+		} else {
+			intermediate_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
+		}
+		break;
+	case SPDK_NVMF_SUBSYSTEM_PAUSED:
+		intermediate_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
+		break;
+	default:
+		assert(false);
+		return -EINVAL;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		return -ENOMEM;
+	}
+
+	rc = spdk_nvmf_subsystem_set_state(subsystem, intermediate_state);
+	if (rc) {
+		free(ctx);
+		return rc;
+	}
+
+	ctx->subsystem = subsystem;
+	ctx->requested_state = requested_state;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	spdk_for_each_channel(subsystem->tgt,
+			      subsystem_state_change_on_pg,
+			      ctx,
+			      subsystem_state_change_done);
+
+	return 0;
+}
+
+int
+spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
+			  spdk_nvmf_subsystem_state_change_done cb_fn,
+			  void *cb_arg)
+{
+	return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
+			 spdk_nvmf_subsystem_state_change_done cb_fn,
+			 void *cb_arg)
+{
+	return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
+			  spdk_nvmf_subsystem_state_change_done cb_fn,
+			  void *cb_arg)
+{
+	return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
+}
+
+int
+spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
+			   spdk_nvmf_subsystem_state_change_done cb_fn,
+			   void *cb_arg)
+{
+	return spdk_nvmf_subsystem_state_change(subsystem, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
+{
+	struct spdk_nvmf_subsystem	*subsystem;
+	uint32_t sid;
+
+	for (sid = 0; sid < tgt->opts.max_subsystems; sid++) {
+		subsystem = tgt->subsystems[sid];
+		if (subsystem) {
+			return subsystem;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_nvmf_subsystem *
+spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
+{
+	uint32_t sid;
+	struct spdk_nvmf_tgt *tgt;
+
+	if (!subsystem) {
+		return NULL;
+	}
+
+	tgt = subsystem->tgt;
+
+	for (sid = subsystem->id + 1; sid < tgt->opts.max_subsystems; sid++) {
+		subsystem = tgt->subsystems[sid];
+		if (subsystem) {
+			return subsystem;
+		}
+	}
+
+	return NULL;
+}
+
+static struct spdk_nvmf_host *
+_spdk_nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+	struct spdk_nvmf_host *host = NULL;
+
+	TAILQ_FOREACH(host, &subsystem->hosts, link) {
+		if (strcmp(hostnqn, host->nqn) == 0) {
+			return host;
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+	struct spdk_nvmf_host *host;
+
+	if (!spdk_nvmf_valid_nqn(hostnqn)) {
+		return -EINVAL;
+	}
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -EAGAIN;
+	}
+
+	if (_spdk_nvmf_subsystem_find_host(subsystem, hostnqn)) {
+		/* This subsystem already allows the specified host. */
+		return 0;
+	}
+
+	host = calloc(1, sizeof(*host));
+	if (!host) {
+		return -ENOMEM;
+	}
+	host->nqn = strdup(hostnqn);
+	if (!host->nqn) {
+		free(host);
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
+	subsystem->tgt->discovery_genctr++;
+
+	return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+	struct spdk_nvmf_host *host;
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -EAGAIN;
+	}
+
+	host = _spdk_nvmf_subsystem_find_host(subsystem, hostnqn);
+	if (host == NULL) {
+		return -ENOENT;
+	}
+
+	_spdk_nvmf_subsystem_remove_host(subsystem, host);
+	return 0;
+}
+
+int
+spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
+{
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -EAGAIN;
+	}
+
+	subsystem->allow_any_host = allow_any_host;
+
+	return 0;
+}
+
+bool
+spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
+{
+	return subsystem->allow_any_host;
+}
+
+bool
+spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
+{
+	if (!hostnqn) {
+		return false;
+	}
+
+	if (subsystem->allow_any_host) {
+		return true;
+	}
+
+	return _spdk_nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
+}
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
+{
+	return TAILQ_FIRST(&subsystem->hosts);
+}
+
+
+struct spdk_nvmf_host *
+spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
+				  struct spdk_nvmf_host *prev_host)
+{
+	return TAILQ_NEXT(prev_host, link);
+}
+
+const char *
+spdk_nvmf_host_get_nqn(struct spdk_nvmf_host *host)
+{
+	return host->nqn;
+}
+
+static struct spdk_nvmf_listener *
+_spdk_nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
+				   const struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_listener *listener;
+
+	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+		if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) {
+			return listener;
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
+				 struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_transport *transport;
+	struct spdk_nvmf_listener *listener;
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -EAGAIN;
+	}
+
+	if (_spdk_nvmf_subsystem_find_listener(subsystem, trid)) {
+		/* Listener already exists in this subsystem */
+		return 0;
+	}
+
+	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trtype);
+	if (transport == NULL) {
+		SPDK_ERRLOG("Unknown transport type %d\n", trid->trtype);
+		return -EINVAL;
+	}
+
+	listener = calloc(1, sizeof(*listener));
+	if (!listener) {
+		return -ENOMEM;
+	}
+
+	listener->trid = *trid;
+	listener->transport = transport;
+
+	TAILQ_INSERT_HEAD(&subsystem->listeners, listener, link);
+
+	return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
+				    const struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_listener *listener;
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -EAGAIN;
+	}
+
+	listener = _spdk_nvmf_subsystem_find_listener(subsystem, trid);
+	if (listener == NULL) {
+		return -ENOENT;
+	}
+
+	TAILQ_REMOVE(&subsystem->listeners, listener, link);
+	free(listener);
+
+	return 0;
+}
+
+bool
+spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
+				     struct spdk_nvme_transport_id *trid)
+{
+	struct spdk_nvmf_listener *listener;
+
+	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
+		return true;
+	}
+
+	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
+		if (spdk_nvme_transport_id_compare(&listener->trid, trid) == 0) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+struct spdk_nvmf_listener *
+spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
+{
+	return TAILQ_FIRST(&subsystem->listeners);
+}
+
+struct spdk_nvmf_listener *
+spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
+				      struct spdk_nvmf_listener *prev_listener)
+{
+	return TAILQ_NEXT(prev_listener, link);
+}
+
+const struct spdk_nvme_transport_id *
+spdk_nvmf_listener_get_trid(struct spdk_nvmf_listener *listener)
+{
+	return &listener->trid;
+}
+
+struct subsystem_update_ns_ctx {
+	struct spdk_nvmf_subsystem *subsystem;
+
+	spdk_nvmf_subsystem_state_change_done cb_fn;
+	void *cb_arg;
+};
+
+static void
+subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
+{
+	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+	if (ctx->cb_fn) {
+		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
+	}
+	free(ctx);
+}
+
+static void
+subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
+{
+	int rc;
+	struct subsystem_update_ns_ctx *ctx;
+	struct spdk_nvmf_poll_group *group;
+	struct spdk_nvmf_subsystem *subsystem;
+
+	ctx = spdk_io_channel_iter_get_ctx(i);
+	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
+	subsystem = ctx->subsystem;
+
+	rc = spdk_nvmf_poll_group_update_subsystem(group, subsystem);
+	spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+spdk_nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
+			      void *ctx)
+{
+	spdk_for_each_channel(subsystem->tgt,
+			      subsystem_update_ns_on_pg,
+			      ctx,
+			      cpl);
+
+	return 0;
+}
+
+static void
+spdk_nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+	struct spdk_nvmf_ctrlr *ctrlr;
+
+	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+		spdk_nvmf_ctrlr_ns_changed(ctrlr, nsid);
+	}
+}
+
+static int
+_spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+	struct spdk_nvmf_ns *ns;
+
+	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED ||
+	       subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE);
+
+	if (nsid == 0 || nsid > subsystem->max_nsid) {
+		return -1;
+	}
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return -1;
+	}
+
+	ns = subsystem->ns[nsid - 1];
+	if (!ns) {
+		return -1;
+	}
+
+	subsystem->ns[nsid - 1] = NULL;
+
+	spdk_bdev_module_release_bdev(ns->bdev);
+	spdk_bdev_close(ns->desc);
+	free(ns);
+
+	spdk_nvmf_subsystem_ns_changed(subsystem, nsid);
+
+	return 0;
+}
+
+int
+spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid,
+			      spdk_nvmf_subsystem_state_change_done cb_fn, void *cb_arg)
+{
+	int rc;
+	struct subsystem_update_ns_ctx *ctx;
+
+	rc = _spdk_nvmf_subsystem_remove_ns(subsystem, nsid);
+	if (rc < 0) {
+		return rc;
+	}
+
+	ctx = calloc(1, sizeof(*ctx));
+
+	if (ctx == NULL) {
+		return -ENOMEM;
+	}
+
+	ctx->subsystem = subsystem;
+	ctx->cb_fn = cb_fn;
+	ctx->cb_arg = cb_arg;
+
+	spdk_nvmf_subsystem_update_ns(subsystem, subsystem_update_ns_done, ctx);
+
+	return 0;
+}
+
+static void
+_spdk_nvmf_ns_hot_remove_done(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
+{
+	if (status != 0) {
+		SPDK_ERRLOG("Failed to make changes to NVMe-oF subsystem with id %u\n", subsystem->id);
+	}
+	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
+}
+
+static void
+_spdk_nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
+			 void *cb_arg, int status)
+{
+	struct spdk_nvmf_ns *ns = cb_arg;
+
+	spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid, _spdk_nvmf_ns_hot_remove_done,
+				      subsystem);
+}
+
+static void
+spdk_nvmf_ns_hot_remove(void *remove_ctx)
+{
+	struct spdk_nvmf_ns *ns = remove_ctx;
+	int rc;
+
+	rc = spdk_nvmf_subsystem_pause(ns->subsystem, _spdk_nvmf_ns_hot_remove, ns);
+	if (rc) {
+		SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
+	}
+}
+
+void
+spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
+{
+	/* All current fields are set to 0 by default. */
+	memset(opts, 0, opts_size);
+}
+
+/* Dummy bdev module used to to claim bdevs. */
+static struct spdk_bdev_module ns_bdev_module = {
+	.name	= "NVMe-oF Target",
+};
+
+uint32_t
+spdk_nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, struct spdk_bdev *bdev,
+			   const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size)
+{
+	struct spdk_nvmf_ns_opts opts;
+	struct spdk_nvmf_ns *ns;
+	int rc;
+
+	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
+	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
+		return 0;
+	}
+
+	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
+	if (user_opts) {
+		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
+	}
+
+	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
+		opts.uuid = *spdk_bdev_get_uuid(bdev);
+	}
+
+	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
+		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
+		return 0;
+	}
+
+	if (opts.nsid == 0) {
+		/*
+		 * NSID not specified - find a free index.
+		 *
+		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
+		 * expand max_nsid if possible.
+		 */
+		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
+			if (_spdk_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
+				break;
+			}
+		}
+	}
+
+	if (_spdk_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
+		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
+		return 0;
+	}
+
+	if (opts.nsid > subsystem->max_nsid) {
+		struct spdk_nvmf_ns **new_ns_array;
+
+		/* If MaxNamespaces was specified, we can't extend max_nsid beyond it. */
+		if (subsystem->max_allowed_nsid > 0 && opts.nsid > subsystem->max_allowed_nsid) {
+			SPDK_ERRLOG("Can't extend NSID range above MaxNamespaces\n");
+			return 0;
+		}
+
+		/* If a controller is connected, we can't change NN. */
+		if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
+			SPDK_ERRLOG("Can't extend NSID range while controllers are connected\n");
+			return 0;
+		}
+
+		new_ns_array = realloc(subsystem->ns, sizeof(struct spdk_nvmf_ns *) * opts.nsid);
+		if (new_ns_array == NULL) {
+			SPDK_ERRLOG("Memory allocation error while resizing namespace array.\n");
+			return 0;
+		}
+
+		memset(new_ns_array + subsystem->max_nsid, 0,
+		       sizeof(struct spdk_nvmf_ns *) * (opts.nsid - subsystem->max_nsid));
+		subsystem->ns = new_ns_array;
+		subsystem->max_nsid = opts.nsid;
+	}
+
+	ns = calloc(1, sizeof(*ns));
+	if (ns == NULL) {
+		SPDK_ERRLOG("Namespace allocation failed\n");
+		return 0;
+	}
+
+	ns->bdev = bdev;
+	ns->opts = opts;
+	ns->subsystem = subsystem;
+	rc = spdk_bdev_open(bdev, true, spdk_nvmf_ns_hot_remove, ns, &ns->desc);
+	if (rc != 0) {
+		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
+			    subsystem->subnqn, spdk_bdev_get_name(bdev), rc);
+		free(ns);
+		return 0;
+	}
+	rc = spdk_bdev_module_claim_bdev(bdev, ns->desc, &ns_bdev_module);
+	if (rc != 0) {
+		spdk_bdev_close(ns->desc);
+		free(ns);
+		return 0;
+	}
+	subsystem->ns[opts.nsid - 1] = ns;
+
+	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
+		      spdk_nvmf_subsystem_get_nqn(subsystem),
+		      spdk_bdev_get_name(bdev),
+		      opts.nsid);
+
+	spdk_nvmf_subsystem_ns_changed(subsystem, opts.nsid);
+
+	return opts.nsid;
+}
+
+static uint32_t
+spdk_nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
+		uint32_t prev_nsid)
+{
+	uint32_t nsid;
+
+	if (prev_nsid >= subsystem->max_nsid) {
+		return 0;
+	}
+
+	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
+		if (subsystem->ns[nsid - 1]) {
+			return nsid;
+		}
+	}
+
+	return 0;
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
+{
+	uint32_t first_nsid;
+
+	first_nsid = spdk_nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
+	return _spdk_nvmf_subsystem_get_ns(subsystem, first_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
+				struct spdk_nvmf_ns *prev_ns)
+{
+	uint32_t next_nsid;
+
+	next_nsid = spdk_nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
+	return _spdk_nvmf_subsystem_get_ns(subsystem, next_nsid);
+}
+
+struct spdk_nvmf_ns *
+spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
+{
+	return _spdk_nvmf_subsystem_get_ns(subsystem, nsid);
+}
+
+uint32_t
+spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
+{
+	return ns->opts.nsid;
+}
+
+struct spdk_bdev *
+spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
+{
+	return ns->bdev;
+}
+
+void
+spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
+		      size_t opts_size)
+{
+	memset(opts, 0, opts_size);
+	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
+}
+
+const char *
+spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
+{
+	return subsystem->sn;
+}
+
+int
+spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
+{
+	size_t len, max_len;
+
+	max_len = sizeof(subsystem->sn) - 1;
+	len = strlen(sn);
+	if (len > max_len) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Invalid sn \"%s\": length %zu > max %zu\n",
+			      sn, len, max_len);
+		return -1;
+	}
+
+	if (!spdk_nvmf_valid_ascii_string(sn, len)) {
+		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Non-ASCII sn\n");
+		SPDK_TRACEDUMP(SPDK_LOG_NVMF, "sn", sn, len);
+		return -1;
+	}
+
+	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
+
+	return 0;
+}
+
+const char *
+spdk_nvmf_subsystem_get_nqn(struct spdk_nvmf_subsystem *subsystem)
+{
+	return subsystem->subnqn;
+}
+
+/* Workaround for astyle formatting bug */
+typedef enum spdk_nvmf_subtype nvmf_subtype_t;
+
+nvmf_subtype_t
+spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
+{
+	return subsystem->subtype;
+}
+
+static uint16_t
+spdk_nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
+{
+	int count;
+
+	/*
+	 * In the worst case, we might have to try all CNTLID values between 1 and 0xFFF0 - 1
+	 * before we find one that is unused (or find that all values are in use).
+	 */
+	for (count = 0; count < 0xFFF0 - 1; count++) {
+		subsystem->next_cntlid++;
+		if (subsystem->next_cntlid >= 0xFFF0) {
+			/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
+			subsystem->next_cntlid = 1;
+		}
+
+		/* Check if a controller with this cntlid currently exists. */
+		if (spdk_nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
+			/* Found unused cntlid */
+			return subsystem->next_cntlid;
+		}
+	}
+
+	/* All valid cntlid values are in use. */
+	return 0xFFFF;
+}
+
+int
+spdk_nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
+{
+	ctrlr->cntlid = spdk_nvmf_subsystem_gen_cntlid(subsystem);
+	if (ctrlr->cntlid == 0xFFFF) {
+		/* Unable to get a cntlid */
+		SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
+		return -EBUSY;
+	}
+
+	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
+
+	return 0;
+}
+
+void
+spdk_nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
+				 struct spdk_nvmf_ctrlr *ctrlr)
+{
+	assert(subsystem == ctrlr->subsys);
+	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
+}
+
+struct spdk_nvmf_ctrlr *
+spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
+{
+	struct spdk_nvmf_ctrlr *ctrlr;
+
+	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
+		if (ctrlr->cntlid == cntlid) {
+			return ctrlr;
+		}
+	}
+
+	return NULL;
+}
+
+uint32_t
+spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
+{
+	return subsystem->max_allowed_nsid;
+}
diff --git a/src/spdk/lib/nvmf/transport.c b/src/spdk/lib/nvmf/transport.c
new file mode 100644
index 00000000..af4660c9
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.c
@@ -0,0 +1,236 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "nvmf_internal.h"
+#include "transport.h"
+
+#include "spdk/config.h"
+#include "spdk/log.h"
+#include "spdk/nvmf.h"
+#include "spdk/queue.h"
+#include "spdk/util.h"
+
+static const struct spdk_nvmf_transport_ops *const g_transport_ops[] = {
+#ifdef SPDK_CONFIG_RDMA
+	&spdk_nvmf_transport_rdma,
+#endif
+};
+
+#define NUM_TRANSPORTS (SPDK_COUNTOF(g_transport_ops))
+
+static inline const struct spdk_nvmf_transport_ops *
+spdk_nvmf_get_transport_ops(enum spdk_nvme_transport_type type)
+{
+	size_t i;
+	for (i = 0; i != NUM_TRANSPORTS; i++) {
+		if (g_transport_ops[i]->type == type) {
+			return g_transport_ops[i];
+		}
+	}
+	return NULL;
+}
+
+struct spdk_nvmf_transport *
+spdk_nvmf_transport_create(enum spdk_nvme_transport_type type,
+			   struct spdk_nvmf_transport_opts *opts)
+{
+	const struct spdk_nvmf_transport_ops *ops = NULL;
+	struct spdk_nvmf_transport *transport;
+
+	if ((opts->max_io_size % opts->io_unit_size != 0) ||
+	    (opts->max_io_size / opts->io_unit_size >
+	     SPDK_NVMF_MAX_SGL_ENTRIES)) {
+		SPDK_ERRLOG("%s: invalid IO size, MaxIO:%d, UnitIO:%d, MaxSGL:%d\n",
+			    spdk_nvme_transport_id_trtype_str(type),
+			    opts->max_io_size,
+			    opts->io_unit_size,
+			    SPDK_NVMF_MAX_SGL_ENTRIES);
+		return NULL;
+	}
+
+	ops = spdk_nvmf_get_transport_ops(type);
+	if (!ops) {
+		SPDK_ERRLOG("Transport type %s unavailable.\n",
+			    spdk_nvme_transport_id_trtype_str(type));
+		return NULL;
+	}
+
+	transport = ops->create(opts);
+	if (!transport) {
+		SPDK_ERRLOG("Unable to create new transport of type %s\n",
+			    spdk_nvme_transport_id_trtype_str(type));
+		return NULL;
+	}
+
+	transport->ops = ops;
+	transport->opts = *opts;
+
+	return transport;
+}
+
+int
+spdk_nvmf_transport_destroy(struct spdk_nvmf_transport *transport)
+{
+	return transport->ops->destroy(transport);
+}
+
+int
+spdk_nvmf_transport_listen(struct spdk_nvmf_transport *transport,
+			   const struct spdk_nvme_transport_id *trid)
+{
+	return transport->ops->listen(transport, trid);
+}
+
+int
+spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+				const struct spdk_nvme_transport_id *trid)
+{
+	return transport->ops->stop_listen(transport, trid);
+}
+
+void
+spdk_nvmf_transport_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
+{
+	transport->ops->accept(transport, cb_fn);
+}
+
+void
+spdk_nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+				      struct spdk_nvme_transport_id *trid,
+				      struct spdk_nvmf_discovery_log_page_entry *entry)
+{
+	transport->ops->listener_discover(transport, trid, entry);
+}
+
+struct spdk_nvmf_transport_poll_group *
+spdk_nvmf_transport_poll_group_create(struct spdk_nvmf_transport *transport)
+{
+	struct spdk_nvmf_transport_poll_group *group;
+
+	group = transport->ops->poll_group_create(transport);
+	group->transport = transport;
+
+	return group;
+}
+
+void
+spdk_nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
+{
+	group->transport->ops->poll_group_destroy(group);
+}
+
+int
+spdk_nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+				   struct spdk_nvmf_qpair *qpair)
+{
+	if (qpair->transport) {
+		assert(qpair->transport == group->transport);
+		if (qpair->transport != group->transport) {
+			return -1;
+		}
+	} else {
+		qpair->transport = group->transport;
+	}
+
+	return group->transport->ops->poll_group_add(group, qpair);
+}
+
+int
+spdk_nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
+{
+	return group->transport->ops->poll_group_poll(group);
+}
+
+int
+spdk_nvmf_transport_req_free(struct spdk_nvmf_request *req)
+{
+	return req->qpair->transport->ops->req_free(req);
+}
+
+int
+spdk_nvmf_transport_req_complete(struct spdk_nvmf_request *req)
+{
+	return req->qpair->transport->ops->req_complete(req);
+}
+
+void
+spdk_nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair)
+{
+	qpair->transport->ops->qpair_fini(qpair);
+}
+
+bool
+spdk_nvmf_transport_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
+{
+	return qpair->transport->ops->qpair_is_idle(qpair);
+}
+
+int
+spdk_nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+					struct spdk_nvme_transport_id *trid)
+{
+	return qpair->transport->ops->qpair_get_peer_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+		struct spdk_nvme_transport_id *trid)
+{
+	return qpair->transport->ops->qpair_get_local_trid(qpair, trid);
+}
+
+int
+spdk_nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+		struct spdk_nvme_transport_id *trid)
+{
+	return qpair->transport->ops->qpair_get_listen_trid(qpair, trid);
+}
+
+bool
+spdk_nvmf_transport_opts_init(enum spdk_nvme_transport_type type,
+			      struct spdk_nvmf_transport_opts *opts)
+{
+	const struct spdk_nvmf_transport_ops *ops;
+
+	ops = spdk_nvmf_get_transport_ops(type);
+	if (!ops) {
+		SPDK_ERRLOG("Transport type %s unavailable.\n",
+			    spdk_nvme_transport_id_trtype_str(type));
+		return false;
+	}
+
+	ops->opts_init(opts);
+	return true;
+}
diff --git a/src/spdk/lib/nvmf/transport.h b/src/spdk/lib/nvmf/transport.h
new file mode 100644
index 00000000..1329a80c
--- /dev/null
+++ b/src/spdk/lib/nvmf/transport.h
@@ -0,0 +1,200 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_NVMF_TRANSPORT_H
+#define SPDK_NVMF_TRANSPORT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nvme.h"
+#include "spdk/nvmf.h"
+
+struct spdk_nvmf_transport {
+	struct spdk_nvmf_tgt			*tgt;
+	const struct spdk_nvmf_transport_ops	*ops;
+	struct spdk_nvmf_transport_opts		opts;
+
+	TAILQ_ENTRY(spdk_nvmf_transport)	link;
+};
+
+struct spdk_nvmf_transport_ops {
+	/**
+	 * Transport type
+	 */
+	enum spdk_nvme_transport_type type;
+
+	/**
+	 * Initialize transport options to default value
+	 */
+	void (*opts_init)(struct spdk_nvmf_transport_opts *opts);
+
+	/**
+	 * Create a transport for the given transport opts
+	 */
+	struct spdk_nvmf_transport *(*create)(struct spdk_nvmf_transport_opts *opts);
+
+	/**
+	 * Destroy the transport
+	 */
+	int (*destroy)(struct spdk_nvmf_transport *transport);
+
+	/**
+	  * Instruct the transport to accept new connections at the address
+	  * provided. This may be called multiple times.
+	  */
+	int (*listen)(struct spdk_nvmf_transport *transport,
+		      const struct spdk_nvme_transport_id *trid);
+
+	/**
+	  * Stop accepting new connections at the given address.
+	  */
+	int (*stop_listen)(struct spdk_nvmf_transport *transport,
+			   const struct spdk_nvme_transport_id *trid);
+
+	/**
+	 * Check for new connections on the transport.
+	 */
+	void (*accept)(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn);
+
+	/**
+	 * Fill out a discovery log entry for a specific listen address.
+	 */
+	void (*listener_discover)(struct spdk_nvmf_transport *transport,
+				  struct spdk_nvme_transport_id *trid,
+				  struct spdk_nvmf_discovery_log_page_entry *entry);
+
+	/**
+	 * Create a new poll group
+	 */
+	struct spdk_nvmf_transport_poll_group *(*poll_group_create)(struct spdk_nvmf_transport *transport);
+
+	/**
+	 * Destroy a poll group
+	 */
+	void (*poll_group_destroy)(struct spdk_nvmf_transport_poll_group *group);
+
+	/**
+	 * Add a qpair to a poll group
+	 */
+	int (*poll_group_add)(struct spdk_nvmf_transport_poll_group *group,
+			      struct spdk_nvmf_qpair *qpair);
+
+	/**
+	 * Poll the group to process I/O
+	 */
+	int (*poll_group_poll)(struct spdk_nvmf_transport_poll_group *group);
+
+	/*
+	 * Free the request without sending a response
+	 * to the originator. Release memory tied to this request.
+	 */
+	int (*req_free)(struct spdk_nvmf_request *req);
+
+	/*
+	 * Signal request completion, which sends a response
+	 * to the originator.
+	 */
+	int (*req_complete)(struct spdk_nvmf_request *req);
+
+	/*
+	 * Deinitialize a connection.
+	 */
+	void (*qpair_fini)(struct spdk_nvmf_qpair *qpair);
+
+	/*
+	 * True if the qpair has no pending IO.
+	 */
+	bool (*qpair_is_idle)(struct spdk_nvmf_qpair *qpair);
+
+	/*
+	 * Get the peer transport ID for the queue pair.
+	 */
+	int (*qpair_get_peer_trid)(struct spdk_nvmf_qpair *qpair,
+				   struct spdk_nvme_transport_id *trid);
+
+	/*
+	 * Get the local transport ID for the queue pair.
+	 */
+	int (*qpair_get_local_trid)(struct spdk_nvmf_qpair *qpair,
+				    struct spdk_nvme_transport_id *trid);
+
+	/*
+	 * Get the listener transport ID that accepted this qpair originally.
+	 */
+	int (*qpair_get_listen_trid)(struct spdk_nvmf_qpair *qpair,
+				     struct spdk_nvme_transport_id *trid);
+};
+
+
+int spdk_nvmf_transport_stop_listen(struct spdk_nvmf_transport *transport,
+				    const struct spdk_nvme_transport_id *trid);
+
+void spdk_nvmf_transport_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn);
+
+void spdk_nvmf_transport_listener_discover(struct spdk_nvmf_transport *transport,
+		struct spdk_nvme_transport_id *trid,
+		struct spdk_nvmf_discovery_log_page_entry *entry);
+
+struct spdk_nvmf_transport_poll_group *spdk_nvmf_transport_poll_group_create(
+	struct spdk_nvmf_transport *transport);
+
+void spdk_nvmf_transport_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
+
+int spdk_nvmf_transport_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
+				       struct spdk_nvmf_qpair *qpair);
+
+int spdk_nvmf_transport_poll_group_poll(struct spdk_nvmf_transport_poll_group *group);
+
+int spdk_nvmf_transport_req_free(struct spdk_nvmf_request *req);
+
+int spdk_nvmf_transport_req_complete(struct spdk_nvmf_request *req);
+
+void spdk_nvmf_transport_qpair_fini(struct spdk_nvmf_qpair *qpair);
+
+bool spdk_nvmf_transport_qpair_is_idle(struct spdk_nvmf_qpair *qpair);
+
+int spdk_nvmf_transport_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
+		struct spdk_nvme_transport_id *trid);
+
+int spdk_nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
+		struct spdk_nvme_transport_id *trid);
+
+int spdk_nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
+		struct spdk_nvme_transport_id *trid);
+
+bool spdk_nvmf_transport_opts_init(enum spdk_nvme_transport_type type,
+				   struct spdk_nvmf_transport_opts *opts);
+
+extern const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma;
+
+#endif /* SPDK_NVMF_TRANSPORT_H */
diff --git a/src/spdk/lib/rocksdb/env_spdk.cc b/src/spdk/lib/rocksdb/env_spdk.cc
new file mode 100644
index 00000000..63c979eb
--- /dev/null
+++ b/src/spdk/lib/rocksdb/env_spdk.cc
@@ -0,0 +1,764 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rocksdb/env.h"
+#include <set>
+#include <iostream>
+#include <stdexcept>
+
+extern "C" {
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/blob.h"
+#include "spdk/blobfs.h"
+#include "spdk/blob_bdev.h"
+#include "spdk/log.h"
+#include "spdk/thread.h"
+#include "spdk/bdev.h"
+}
+
+namespace rocksdb
+{
+
+struct spdk_filesystem *g_fs = NULL;
+struct spdk_bs_dev *g_bs_dev;
+uint32_t g_lcore = 0;
+std::string g_bdev_name;
+volatile bool g_spdk_ready = false;
+volatile bool g_spdk_start_failure = false;
+struct sync_args {
+	struct spdk_io_channel *channel;
+};
+
+__thread struct sync_args g_sync_args;
+
+static void
+__call_fn(void *arg1, void *arg2)
+{
+	fs_request_fn fn;
+
+	fn = (fs_request_fn)arg1;
+	fn(arg2);
+}
+
+static void
+__send_request(fs_request_fn fn, void *arg)
+{
+	struct spdk_event *event;
+
+	event = spdk_event_allocate(g_lcore, __call_fn, (void *)fn, arg);
+	spdk_event_call(event);
+}
+
+static std::string
+sanitize_path(const std::string &input, const std::string &mount_directory)
+{
+	int index = 0;
+	std::string name;
+	std::string input_tmp;
+
+	input_tmp = input.substr(mount_directory.length(), input.length());
+	for (const char &c : input_tmp) {
+		if (index == 0) {
+			if (c != '/') {
+				name = name.insert(index, 1, '/');
+				index++;
+			}
+			name = name.insert(index, 1, c);
+			index++;
+		} else {
+			if (name[index - 1] == '/' && c == '/') {
+				continue;
+			} else {
+				name = name.insert(index, 1, c);
+				index++;
+			}
+		}
+	}
+
+	if (name[name.size() - 1] == '/') {
+		name = name.erase(name.size() - 1, 1);
+	}
+	return name;
+}
+
+class SpdkSequentialFile : public SequentialFile
+{
+	struct spdk_file *mFile;
+	uint64_t mOffset;
+public:
+	SpdkSequentialFile(struct spdk_file *file) : mFile(file), mOffset(0) {}
+	virtual ~SpdkSequentialFile();
+
+	virtual Status Read(size_t n, Slice *result, char *scratch) override;
+	virtual Status Skip(uint64_t n) override;
+	virtual Status InvalidateCache(size_t offset, size_t length) override;
+};
+
+SpdkSequentialFile::~SpdkSequentialFile(void)
+{
+	spdk_file_close(mFile, g_sync_args.channel);
+}
+
+Status
+SpdkSequentialFile::Read(size_t n, Slice *result, char *scratch)
+{
+	int64_t ret;
+
+	ret = spdk_file_read(mFile, g_sync_args.channel, scratch, mOffset, n);
+	if (ret >= 0) {
+		mOffset += ret;
+		*result = Slice(scratch, ret);
+		return Status::OK();
+	} else {
+		errno = -ret;
+		return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+	}
+}
+
+Status
+SpdkSequentialFile::Skip(uint64_t n)
+{
+	mOffset += n;
+	return Status::OK();
+}
+
+Status
+SpdkSequentialFile::InvalidateCache(__attribute__((unused)) size_t offset,
+				    __attribute__((unused)) size_t length)
+{
+	return Status::OK();
+}
+
+class SpdkRandomAccessFile : public RandomAccessFile
+{
+	struct spdk_file *mFile;
+public:
+	SpdkRandomAccessFile(struct spdk_file *file) : mFile(file) {}
+	virtual ~SpdkRandomAccessFile();
+
+	virtual Status Read(uint64_t offset, size_t n, Slice *result, char *scratch) const override;
+	virtual Status InvalidateCache(size_t offset, size_t length) override;
+};
+
+SpdkRandomAccessFile::~SpdkRandomAccessFile(void)
+{
+	spdk_file_close(mFile, g_sync_args.channel);
+}
+
+Status
+SpdkRandomAccessFile::Read(uint64_t offset, size_t n, Slice *result, char *scratch) const
+{
+	int64_t rc;
+
+	rc = spdk_file_read(mFile, g_sync_args.channel, scratch, offset, n);
+	if (rc >= 0) {
+		*result = Slice(scratch, n);
+		return Status::OK();
+	} else {
+		errno = -rc;
+		return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+	}
+}
+
+Status
+SpdkRandomAccessFile::InvalidateCache(__attribute__((unused)) size_t offset,
+				      __attribute__((unused)) size_t length)
+{
+	return Status::OK();
+}
+
+class SpdkWritableFile : public WritableFile
+{
+	struct spdk_file *mFile;
+	uint64_t mSize;
+
+public:
+	SpdkWritableFile(struct spdk_file *file) : mFile(file), mSize(0) {}
+	~SpdkWritableFile()
+	{
+		if (mFile != NULL) {
+			Close();
+		}
+	}
+
+	virtual void SetIOPriority(Env::IOPriority pri)
+	{
+		if (pri == Env::IO_HIGH) {
+			spdk_file_set_priority(mFile, SPDK_FILE_PRIORITY_HIGH);
+		}
+	}
+
+	virtual Status Truncate(uint64_t size) override
+	{
+		int rc;
+		rc = spdk_file_truncate(mFile, g_sync_args.channel, size);
+		if (!rc) {
+			mSize = size;
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+		}
+	}
+	virtual Status Close() override
+	{
+		spdk_file_close(mFile, g_sync_args.channel);
+		mFile = NULL;
+		return Status::OK();
+	}
+	virtual Status Append(const Slice &data) override;
+	virtual Status Flush() override
+	{
+		return Status::OK();
+	}
+	virtual Status Sync() override
+	{
+		int rc;
+
+		rc = spdk_file_sync(mFile, g_sync_args.channel);
+		if (!rc) {
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+		}
+	}
+	virtual Status Fsync() override
+	{
+		int rc;
+
+		rc = spdk_file_sync(mFile, g_sync_args.channel);
+		if (!rc) {
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+		}
+	}
+	virtual bool IsSyncThreadSafe() const override
+	{
+		return true;
+	}
+	virtual uint64_t GetFileSize() override
+	{
+		return mSize;
+	}
+	virtual Status InvalidateCache(__attribute__((unused)) size_t offset,
+				       __attribute__((unused)) size_t length) override
+	{
+		return Status::OK();
+	}
+	virtual Status Allocate(uint64_t offset, uint64_t len) override
+	{
+		int rc;
+
+		rc = spdk_file_truncate(mFile, g_sync_args.channel, offset + len);
+		if (!rc) {
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+		}
+	}
+	virtual Status RangeSync(__attribute__((unused)) uint64_t offset,
+				 __attribute__((unused)) uint64_t nbytes) override
+	{
+		int rc;
+
+		/*
+		 * SPDK BlobFS does not have a range sync operation yet, so just sync
+		 *  the whole file.
+		 */
+		rc = spdk_file_sync(mFile, g_sync_args.channel);
+		if (!rc) {
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+		}
+	}
+	virtual size_t GetUniqueId(char *id, size_t max_size) const override
+	{
+		int rc;
+
+		rc = spdk_file_get_id(mFile, id, max_size);
+		if (rc < 0) {
+			return 0;
+		} else {
+			return rc;
+		}
+	}
+};
+
+Status
+SpdkWritableFile::Append(const Slice &data)
+{
+	int64_t rc;
+
+	rc = spdk_file_write(mFile, g_sync_args.channel, (void *)data.data(), mSize, data.size());
+	if (rc >= 0) {
+		mSize += data.size();
+		return Status::OK();
+	} else {
+		errno = -rc;
+		return Status::IOError(spdk_file_get_name(mFile), strerror(errno));
+	}
+}
+
+class SpdkDirectory : public Directory
+{
+public:
+	SpdkDirectory() {}
+	~SpdkDirectory() {}
+	Status Fsync() override
+	{
+		return Status::OK();
+	}
+};
+
+class SpdkAppStartException : public std::runtime_error
+{
+public:
+	SpdkAppStartException(std::string mess): std::runtime_error(mess) {}
+};
+
+class SpdkEnv : public EnvWrapper
+{
+private:
+	pthread_t mSpdkTid;
+	std::string mDirectory;
+	std::string mConfig;
+	std::string mBdev;
+
+public:
+	SpdkEnv(Env *base_env, const std::string &dir, const std::string &conf,
+		const std::string &bdev, uint64_t cache_size_in_mb);
+
+	virtual ~SpdkEnv();
+
+	virtual Status NewSequentialFile(const std::string &fname,
+					 unique_ptr<SequentialFile> *result,
+					 const EnvOptions &options) override
+	{
+		if (fname.compare(0, mDirectory.length(), mDirectory) == 0) {
+			struct spdk_file *file;
+			int rc;
+
+			std::string name = sanitize_path(fname, mDirectory);
+			rc = spdk_fs_open_file(g_fs, g_sync_args.channel,
+					       name.c_str(), 0, &file);
+			if (rc == 0) {
+				result->reset(new SpdkSequentialFile(file));
+				return Status::OK();
+			} else {
+				/* Myrocks engine uses errno(ENOENT) as one
+				 * special condition, for the purpose to
+				 * support MySQL, set the errno to right value.
+				 */
+				errno = -rc;
+				return Status::IOError(name, strerror(errno));
+			}
+		} else {
+			return EnvWrapper::NewSequentialFile(fname, result, options);
+		}
+	}
+
+	virtual Status NewRandomAccessFile(const std::string &fname,
+					   unique_ptr<RandomAccessFile> *result,
+					   const EnvOptions &options) override
+	{
+		if (fname.compare(0, mDirectory.length(), mDirectory) == 0) {
+			std::string name = sanitize_path(fname, mDirectory);
+			struct spdk_file *file;
+			int rc;
+
+			rc = spdk_fs_open_file(g_fs, g_sync_args.channel,
+					       name.c_str(), 0, &file);
+			if (rc == 0) {
+				result->reset(new SpdkRandomAccessFile(file));
+				return Status::OK();
+			} else {
+				errno = -rc;
+				return Status::IOError(name, strerror(errno));
+			}
+		} else {
+			return EnvWrapper::NewRandomAccessFile(fname, result, options);
+		}
+	}
+
+	virtual Status NewWritableFile(const std::string &fname,
+				       unique_ptr<WritableFile> *result,
+				       const EnvOptions &options) override
+	{
+		if (fname.compare(0, mDirectory.length(), mDirectory) == 0) {
+			std::string name = sanitize_path(fname, mDirectory);
+			struct spdk_file *file;
+			int rc;
+
+			rc = spdk_fs_open_file(g_fs, g_sync_args.channel, name.c_str(),
+					       SPDK_BLOBFS_OPEN_CREATE, &file);
+			if (rc == 0) {
+				result->reset(new SpdkWritableFile(file));
+				return Status::OK();
+			} else {
+				errno = -rc;
+				return Status::IOError(name, strerror(errno));
+			}
+		} else {
+			return EnvWrapper::NewWritableFile(fname, result, options);
+		}
+	}
+
+	virtual Status ReuseWritableFile(const std::string &fname,
+					 const std::string &old_fname,
+					 unique_ptr<WritableFile> *result,
+					 const EnvOptions &options) override
+	{
+		return EnvWrapper::ReuseWritableFile(fname, old_fname, result, options);
+	}
+
+	virtual Status NewDirectory(__attribute__((unused)) const std::string &name,
+				    unique_ptr<Directory> *result) override
+	{
+		result->reset(new SpdkDirectory());
+		return Status::OK();
+	}
+	virtual Status FileExists(const std::string &fname) override
+	{
+		struct spdk_file_stat stat;
+		int rc;
+		std::string name = sanitize_path(fname, mDirectory);
+
+		rc = spdk_fs_file_stat(g_fs, g_sync_args.channel, name.c_str(), &stat);
+		if (rc == 0) {
+			return Status::OK();
+		}
+		return EnvWrapper::FileExists(fname);
+	}
+	virtual Status RenameFile(const std::string &src, const std::string &t) override
+	{
+		int rc;
+		std::string src_name = sanitize_path(src, mDirectory);
+		std::string target_name = sanitize_path(t, mDirectory);
+
+		rc = spdk_fs_rename_file(g_fs, g_sync_args.channel,
+					 src_name.c_str(), target_name.c_str());
+		if (rc == -ENOENT) {
+			return EnvWrapper::RenameFile(src, t);
+		}
+		return Status::OK();
+	}
+	virtual Status LinkFile(__attribute__((unused)) const std::string &src,
+				__attribute__((unused)) const std::string &t) override
+	{
+		return Status::NotSupported("SpdkEnv does not support LinkFile");
+	}
+	virtual Status GetFileSize(const std::string &fname, uint64_t *size) override
+	{
+		struct spdk_file_stat stat;
+		int rc;
+		std::string name = sanitize_path(fname, mDirectory);
+
+		rc = spdk_fs_file_stat(g_fs, g_sync_args.channel, name.c_str(), &stat);
+		if (rc == -ENOENT) {
+			return EnvWrapper::GetFileSize(fname, size);
+		}
+		*size = stat.size;
+		return Status::OK();
+	}
+	virtual Status DeleteFile(const std::string &fname) override
+	{
+		int rc;
+		std::string name = sanitize_path(fname, mDirectory);
+
+		rc = spdk_fs_delete_file(g_fs, g_sync_args.channel, name.c_str());
+		if (rc == -ENOENT) {
+			return EnvWrapper::DeleteFile(fname);
+		}
+		return Status::OK();
+	}
+	virtual void StartThread(void (*function)(void *arg), void *arg) override;
+	virtual Status LockFile(const std::string &fname, FileLock **lock) override
+	{
+		std::string name = sanitize_path(fname, mDirectory);
+		int64_t rc;
+
+		rc = spdk_fs_open_file(g_fs, g_sync_args.channel, name.c_str(),
+				       SPDK_BLOBFS_OPEN_CREATE, (struct spdk_file **)lock);
+		if (!rc) {
+			return Status::OK();
+		} else {
+			errno = -rc;
+			return Status::IOError(name, strerror(errno));
+		}
+	}
+	virtual Status UnlockFile(FileLock *lock) override
+	{
+		spdk_file_close((struct spdk_file *)lock, g_sync_args.channel);
+		return Status::OK();
+	}
+	virtual Status GetChildren(const std::string &dir,
+				   std::vector<std::string> *result) override
+	{
+		std::string::size_type pos;
+		std::set<std::string> dir_and_file_set;
+		std::string full_path;
+		std::string filename;
+		std::string dir_name;
+
+		if (dir.find("archive") != std::string::npos) {
+			return Status::OK();
+		}
+		if (dir.compare(0, mDirectory.length(), mDirectory) == 0) {
+			spdk_fs_iter iter;
+			struct spdk_file *file;
+			dir_name = sanitize_path(dir, mDirectory);
+
+			iter = spdk_fs_iter_first(g_fs);
+			while (iter != NULL) {
+				file = spdk_fs_iter_get_file(iter);
+				full_path = spdk_file_get_name(file);
+				if (strncmp(dir_name.c_str(), full_path.c_str(), dir_name.length())) {
+					iter = spdk_fs_iter_next(iter);
+					continue;
+				}
+				pos = full_path.find("/", dir_name.length() + 1);
+
+				if (pos != std::string::npos) {
+					filename = full_path.substr(dir_name.length() + 1, pos - dir_name.length() - 1);
+				} else {
+					filename = full_path.substr(dir_name.length() + 1);
+				}
+				dir_and_file_set.insert(filename);
+				iter = spdk_fs_iter_next(iter);
+			}
+
+			for (auto &s : dir_and_file_set) {
+				result->push_back(s);
+			}
+
+			result->push_back(".");
+			result->push_back("..");
+
+			return Status::OK();
+		}
+		return EnvWrapper::GetChildren(dir, result);
+	}
+};
+
+static void
+_spdk_send_msg(__attribute__((unused)) spdk_thread_fn fn,
+	       __attribute__((unused)) void *ctx,
+	       __attribute__((unused)) void *thread_ctx)
+{
+	/* Not supported */
+	assert(false);
+}
+
+void SpdkInitializeThread(void)
+{
+	if (g_fs != NULL) {
+		/* TODO: Add an event lib call to dynamically register a thread */
+		spdk_allocate_thread(_spdk_send_msg, NULL, NULL, NULL, "spdk_rocksdb");
+		g_sync_args.channel = spdk_fs_alloc_io_channel_sync(g_fs);
+	}
+}
+
+struct SpdkThreadState {
+	void (*user_function)(void *);
+	void *arg;
+};
+
+static void SpdkStartThreadWrapper(void *arg)
+{
+	SpdkThreadState *state = reinterpret_cast<SpdkThreadState *>(arg);
+
+	SpdkInitializeThread();
+	state->user_function(state->arg);
+	delete state;
+}
+
+void SpdkEnv::StartThread(void (*function)(void *arg), void *arg)
+{
+	SpdkThreadState *state = new SpdkThreadState;
+	state->user_function = function;
+	state->arg = arg;
+	EnvWrapper::StartThread(SpdkStartThreadWrapper, state);
+}
+
+static void
+fs_load_cb(__attribute__((unused)) void *ctx,
+	   struct spdk_filesystem *fs, int fserrno)
+{
+	if (fserrno == 0) {
+		g_fs = fs;
+	}
+	g_spdk_ready = true;
+}
+
+static void
+spdk_rocksdb_run(__attribute__((unused)) void *arg1,
+		 __attribute__((unused)) void *arg2)
+{
+	struct spdk_bdev *bdev;
+
+	bdev = spdk_bdev_get_by_name(g_bdev_name.c_str());
+
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev %s not found\n", g_bdev_name.c_str());
+		exit(1);
+	}
+
+	g_lcore = spdk_env_get_first_core();
+
+	g_bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL);
+	printf("using bdev %s\n", g_bdev_name.c_str());
+	spdk_fs_load(g_bs_dev, __send_request, fs_load_cb, NULL);
+}
+
+static void
+fs_unload_cb(__attribute__((unused)) void *ctx,
+	     __attribute__((unused)) int fserrno)
+{
+	assert(fserrno == 0);
+
+	spdk_app_stop(0);
+}
+
+static void
+spdk_rocksdb_shutdown(void)
+{
+	if (g_fs != NULL) {
+		spdk_fs_unload(g_fs, fs_unload_cb, NULL);
+	} else {
+		fs_unload_cb(NULL, 0);
+	}
+}
+
+static void *
+initialize_spdk(void *arg)
+{
+	struct spdk_app_opts *opts = (struct spdk_app_opts *)arg;
+	int rc;
+
+	rc = spdk_app_start(opts, spdk_rocksdb_run, NULL, NULL);
+	/*
+	 * TODO:  Revisit for case of internal failure of
+	 * spdk_app_start(), itself.  At this time, it's known
+	 * the only application's use of spdk_app_stop() passes
+	 * a zero; i.e. no fail (non-zero) cases so here we
+	 * assume there was an internal failure and flag it
+	 * so we can throw an exception.
+	 */
+	if (rc) {
+		g_spdk_start_failure = true;
+	} else {
+		spdk_app_fini();
+		delete opts;
+	}
+	pthread_exit(NULL);
+
+}
+
+SpdkEnv::SpdkEnv(Env *base_env, const std::string &dir, const std::string &conf,
+		 const std::string &bdev, uint64_t cache_size_in_mb)
+	: EnvWrapper(base_env), mDirectory(dir), mConfig(conf), mBdev(bdev)
+{
+	struct spdk_app_opts *opts = new struct spdk_app_opts;
+
+	spdk_app_opts_init(opts);
+	opts->name = "rocksdb";
+	opts->config_file = mConfig.c_str();
+	opts->mem_size = 1024 + cache_size_in_mb;
+	opts->shutdown_cb = spdk_rocksdb_shutdown;
+
+	spdk_fs_set_cache_size(cache_size_in_mb);
+	g_bdev_name = mBdev;
+
+	pthread_create(&mSpdkTid, NULL, &initialize_spdk, opts);
+	while (!g_spdk_ready && !g_spdk_start_failure)
+		;
+	if (g_spdk_start_failure) {
+		delete opts;
+		throw SpdkAppStartException("spdk_app_start() unable to start spdk_rocksdb_run()");
+	}
+
+	SpdkInitializeThread();
+}
+
+SpdkEnv::~SpdkEnv()
+{
+	/* This is a workaround for rocksdb test, we close the files if the rocksdb not
+	 * do the work before the test quit.
+	 */
+	if (g_fs != NULL) {
+		spdk_fs_iter iter;
+		struct spdk_file *file;
+
+		if (!g_sync_args.channel) {
+			SpdkInitializeThread();
+		}
+		iter = spdk_fs_iter_first(g_fs);
+		while (iter != NULL) {
+			file = spdk_fs_iter_get_file(iter);
+			spdk_file_close(file, g_sync_args.channel);
+			iter = spdk_fs_iter_next(iter);
+		}
+	}
+
+	spdk_app_start_shutdown();
+	pthread_join(mSpdkTid, NULL);
+}
+
+Env *NewSpdkEnv(Env *base_env, const std::string &dir, const std::string &conf,
+		const std::string &bdev, uint64_t cache_size_in_mb)
+{
+	try {
+		SpdkEnv *spdk_env = new SpdkEnv(base_env, dir, conf, bdev, cache_size_in_mb);
+		if (g_fs != NULL) {
+			return spdk_env;
+		} else {
+			delete spdk_env;
+			return NULL;
+		}
+	} catch (SpdkAppStartException &e) {
+		SPDK_ERRLOG("NewSpdkEnv: exception caught: %s", e.what());
+		return NULL;
+	} catch (...) {
+		SPDK_ERRLOG("NewSpdkEnv: default exception caught");
+		return NULL;
+	}
+}
+
+} // namespace rocksdb
diff --git a/src/spdk/lib/rocksdb/spdk.rocksdb.mk b/src/spdk/lib/rocksdb/spdk.rocksdb.mk
new file mode 100644
index 00000000..2f7a4a86
--- /dev/null
+++ b/src/spdk/lib/rocksdb/spdk.rocksdb.mk
@@ -0,0 +1,70 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# This snippet will be included into the RocksDB Makefile
+
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
+
+CXXFLAGS +=  -I$(SPDK_DIR)/include -Iinclude/
+
+# The SPDK makefiles turn this on, but RocksDB won't compile with it.  So
+#  turn it off after including the SPDK makefiles.
+CXXFLAGS += -Wno-missing-declarations
+
+# The SPDK Makefiles may turn these options on but we do not want to enable
+#  them for the RocksDB source files.
+CXXFLAGS += -fno-profile-arcs -fno-test-coverage
+ifeq ($(CONFIG_UBSAN),y)
+CXXFLAGS += -fno-sanitize=undefined
+endif
+ifeq ($(CONFIG_ASAN),y)
+CXXFLAGS += -fno-sanitize=address
+endif
+
+SPDK_LIB_LIST = event_bdev event_copy
+SPDK_LIB_LIST += blobfs bdev copy event util conf trace \
+		log jsonrpc json rpc thread
+
+AM_LINK += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_MODULES_LINKER_ARGS)
+AM_LINK += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS)
+AM_LINK += $(SYS_LIBS)
+
+ifeq ($(CONFIG_UBSAN),y)
+AM_LINK += -fsanitize=undefined
+endif
+
+ifeq ($(CONFIG_COVERAGE),y)
+AM_LINK += -fprofile-arcs -ftest-coverage
+endif
diff --git a/src/spdk/lib/rpc/Makefile b/src/spdk/lib/rpc/Makefile
new file mode 100644
index 00000000..024d7a04
--- /dev/null
+++ b/src/spdk/lib/rpc/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = rpc.c
+LIBNAME = rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/rpc/rpc.c b/src/spdk/lib/rpc/rpc.c
new file mode 100644
index 00000000..985d40f4
--- /dev/null
+++ b/src/spdk/lib/rpc/rpc.c
@@ -0,0 +1,285 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/file.h>
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#define RPC_DEFAULT_PORT	"5260"
+
+static struct sockaddr_un g_rpc_listen_addr_unix = {};
+static char g_rpc_lock_path[sizeof(g_rpc_listen_addr_unix.sun_path) + sizeof(".lock")];
+static int g_rpc_lock_fd = -1;
+
+static struct spdk_jsonrpc_server *g_jsonrpc_server = NULL;
+static uint32_t g_rpc_state;
+
+struct spdk_rpc_method {
+	const char *name;
+	spdk_rpc_method_handler func;
+	SLIST_ENTRY(spdk_rpc_method) slist;
+	uint32_t state_mask;
+};
+
+static SLIST_HEAD(, spdk_rpc_method) g_rpc_methods = SLIST_HEAD_INITIALIZER(g_rpc_methods);
+
+void
+spdk_rpc_set_state(uint32_t state)
+{
+	g_rpc_state = state;
+}
+
+static void
+spdk_jsonrpc_handler(struct spdk_jsonrpc_request *request,
+		     const struct spdk_json_val *method,
+		     const struct spdk_json_val *params)
+{
+	struct spdk_rpc_method *m;
+
+	assert(method != NULL);
+
+	SLIST_FOREACH(m, &g_rpc_methods, slist) {
+		if (spdk_json_strequal(method, m->name)) {
+			if ((m->state_mask & g_rpc_state) == g_rpc_state) {
+				m->func(request, params);
+			} else {
+				spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_STATE,
+								     "Method is allowed in any state in the mask (%"PRIx32"),"
+								     " but current state is (%"PRIx32")",
+								     m->state_mask, g_rpc_state);
+			}
+			return;
+		}
+	}
+
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND, "Method not found");
+}
+
+int
+spdk_rpc_listen(const char *listen_addr)
+{
+	struct addrinfo		hints;
+	struct addrinfo		*res;
+
+	memset(&g_rpc_listen_addr_unix, 0, sizeof(g_rpc_listen_addr_unix));
+
+	if (listen_addr[0] == '/') {
+		int rc;
+
+		g_rpc_listen_addr_unix.sun_family = AF_UNIX;
+		rc = snprintf(g_rpc_listen_addr_unix.sun_path,
+			      sizeof(g_rpc_listen_addr_unix.sun_path),
+			      "%s", listen_addr);
+		if (rc < 0 || (size_t)rc >= sizeof(g_rpc_listen_addr_unix.sun_path)) {
+			SPDK_ERRLOG("RPC Listen address Unix socket path too long\n");
+			g_rpc_listen_addr_unix.sun_path[0] = '\0';
+			return -1;
+		}
+
+		snprintf(g_rpc_lock_path, sizeof(g_rpc_lock_path), "%s.lock",
+			 g_rpc_listen_addr_unix.sun_path);
+
+		g_rpc_lock_fd = open(g_rpc_lock_path, O_RDONLY | O_CREAT, 0600);
+		if (g_rpc_lock_fd == -1) {
+			SPDK_ERRLOG("Cannot open lock file %s: %s\n",
+				    g_rpc_lock_path, spdk_strerror(errno));
+			return -1;
+		}
+
+		rc = flock(g_rpc_lock_fd, LOCK_EX | LOCK_NB);
+		if (rc != 0) {
+			SPDK_ERRLOG("RPC Unix domain socket path %s in use. Specify another.\n",
+				    g_rpc_listen_addr_unix.sun_path);
+			return -1;
+		}
+
+		/*
+		 * Since we acquired the lock, it is safe to delete the Unix socket file
+		 * if it still exists from a previous process.
+		 */
+		unlink(g_rpc_listen_addr_unix.sun_path);
+
+		g_jsonrpc_server = spdk_jsonrpc_server_listen(AF_UNIX, 0,
+				   (struct sockaddr *)&g_rpc_listen_addr_unix,
+				   sizeof(g_rpc_listen_addr_unix),
+				   spdk_jsonrpc_handler);
+		if (g_jsonrpc_server == NULL) {
+			close(g_rpc_lock_fd);
+			g_rpc_lock_fd = -1;
+			unlink(g_rpc_lock_path);
+			g_rpc_lock_path[0] = '\0';
+		}
+	} else {
+		char *tmp;
+		char *host, *port;
+
+		tmp = strdup(listen_addr);
+		if (!tmp) {
+			SPDK_ERRLOG("Out of memory\n");
+			return -1;
+		}
+
+		if (spdk_parse_ip_addr(tmp, &host, &port) < 0) {
+			free(tmp);
+			SPDK_ERRLOG("Invalid listen address '%s'\n", listen_addr);
+			return -1;
+		}
+
+		if (port == NULL) {
+			port = RPC_DEFAULT_PORT;
+		}
+
+		memset(&hints, 0, sizeof(hints));
+		hints.ai_family = AF_UNSPEC;
+		hints.ai_socktype = SOCK_STREAM;
+		hints.ai_protocol = IPPROTO_TCP;
+
+		if (getaddrinfo(host, port, &hints, &res) != 0) {
+			free(tmp);
+			SPDK_ERRLOG("Unable to look up RPC listen address '%s'\n", listen_addr);
+			return -1;
+		}
+
+		g_jsonrpc_server = spdk_jsonrpc_server_listen(res->ai_family, res->ai_protocol,
+				   res->ai_addr, res->ai_addrlen,
+				   spdk_jsonrpc_handler);
+
+		freeaddrinfo(res);
+		free(tmp);
+	}
+
+	if (g_jsonrpc_server == NULL) {
+		SPDK_ERRLOG("spdk_jsonrpc_server_listen() failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+spdk_rpc_accept(void)
+{
+	spdk_jsonrpc_server_poll(g_jsonrpc_server);
+}
+
+void
+spdk_rpc_register_method(const char *method, spdk_rpc_method_handler func, uint32_t state_mask)
+{
+	struct spdk_rpc_method *m;
+
+	m = calloc(1, sizeof(struct spdk_rpc_method));
+	assert(m != NULL);
+
+	m->name = strdup(method);
+	assert(m->name != NULL);
+
+	m->func = func;
+	m->state_mask = state_mask;
+
+	/* TODO: use a hash table or sorted list */
+	SLIST_INSERT_HEAD(&g_rpc_methods, m, slist);
+}
+
+void
+spdk_rpc_close(void)
+{
+	if (g_jsonrpc_server) {
+		if (g_rpc_listen_addr_unix.sun_path[0]) {
+			/* Delete the Unix socket file */
+			unlink(g_rpc_listen_addr_unix.sun_path);
+		}
+
+		spdk_jsonrpc_server_shutdown(g_jsonrpc_server);
+		g_jsonrpc_server = NULL;
+
+		if (g_rpc_lock_fd != -1) {
+			close(g_rpc_lock_fd);
+			g_rpc_lock_fd = -1;
+		}
+
+		if (g_rpc_lock_path[0]) {
+			unlink(g_rpc_lock_path);
+			g_rpc_lock_path[0] = '\0';
+		}
+	}
+}
+
+struct rpc_get_rpc_methods {
+	bool current;
+};
+
+static const struct spdk_json_object_decoder rpc_get_rpc_methods_decoders[] = {
+	{"current", offsetof(struct rpc_get_rpc_methods, current), spdk_json_decode_bool, true},
+};
+
+static void
+spdk_rpc_get_rpc_methods(struct spdk_jsonrpc_request *request,
+			 const struct spdk_json_val *params)
+{
+	struct rpc_get_rpc_methods req = {};
+	struct spdk_json_write_ctx *w;
+	struct spdk_rpc_method *m;
+
+	if (params != NULL) {
+		if (spdk_json_decode_object(params, rpc_get_rpc_methods_decoders,
+					    SPDK_COUNTOF(rpc_get_rpc_methods_decoders), &req)) {
+			SPDK_ERRLOG("spdk_json_decode_object failed\n");
+			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+							 "Invalid parameters");
+			return;
+		}
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+	SLIST_FOREACH(m, &g_rpc_methods, slist) {
+		if (req.current && ((m->state_mask & g_rpc_state) != g_rpc_state)) {
+			continue;
+		}
+		spdk_json_write_string(w, m->name);
+	}
+	spdk_json_write_array_end(w);
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_rpc_methods", spdk_rpc_get_rpc_methods, SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/scsi/Makefile b/src/spdk/lib/scsi/Makefile
new file mode 100644
index 00000000..67cb445a
--- /dev/null
+++ b/src/spdk/lib/scsi/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = dev.c lun.c port.c scsi.c scsi_bdev.c scsi_rpc.c task.c
+LIBNAME = scsi
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/scsi/dev.c b/src/spdk/lib/scsi/dev.c
new file mode 100644
index 00000000..335ffacb
--- /dev/null
+++ b/src/spdk/lib/scsi/dev.c
@@ -0,0 +1,415 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+
+static struct spdk_scsi_dev g_devs[SPDK_SCSI_MAX_DEVS];
+
+struct spdk_scsi_dev *
+spdk_scsi_dev_get_list(void)
+{
+	return g_devs;
+}
+
+static struct spdk_scsi_dev *
+allocate_dev(void)
+{
+	struct spdk_scsi_dev *dev;
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_MAX_DEVS; i++) {
+		dev = &g_devs[i];
+		if (!dev->is_allocated) {
+			memset(dev, 0, sizeof(*dev));
+			dev->id = i;
+			dev->is_allocated = 1;
+			return dev;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+free_dev(struct spdk_scsi_dev *dev)
+{
+	assert(dev->is_allocated == 1);
+	assert(dev->removed == true);
+
+	dev->is_allocated = 0;
+}
+
+void
+spdk_scsi_dev_destruct(struct spdk_scsi_dev *dev)
+{
+	int lun_cnt;
+	int i;
+
+	if (dev == NULL || dev->removed) {
+		return;
+	}
+
+	dev->removed = true;
+	lun_cnt = 0;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == NULL) {
+			continue;
+		}
+
+		/*
+		 * LUN will remove itself from this dev when all outstanding IO
+		 * is done. When no more LUNs, dev will be deleted.
+		 */
+		spdk_scsi_lun_destruct(dev->lun[i]);
+		lun_cnt++;
+	}
+
+	if (lun_cnt == 0) {
+		free_dev(dev);
+		return;
+	}
+}
+
+static int
+spdk_scsi_dev_find_lowest_free_lun_id(struct spdk_scsi_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == NULL) {
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+int
+spdk_scsi_dev_add_lun(struct spdk_scsi_dev *dev, const char *bdev_name, int lun_id,
+		      void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+		      void *hotremove_ctx)
+{
+	struct spdk_bdev *bdev;
+	struct spdk_scsi_lun *lun;
+
+	bdev = spdk_bdev_get_by_name(bdev_name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("device %s: cannot find bdev '%s' (target %d)\n",
+			    dev->name, bdev_name, lun_id);
+		return -1;
+	}
+
+	/* Search the lowest free LUN ID if LUN ID is default */
+	if (lun_id == -1) {
+		lun_id = spdk_scsi_dev_find_lowest_free_lun_id(dev);
+		if (lun_id == -1) {
+			SPDK_ERRLOG("Free LUN ID is not found\n");
+			return -1;
+		}
+	}
+
+	lun = spdk_scsi_lun_construct(bdev, hotremove_cb, hotremove_ctx);
+	if (lun == NULL) {
+		return -1;
+	}
+
+	lun->id = lun_id;
+	lun->dev = dev;
+	dev->lun[lun_id] = lun;
+	return 0;
+}
+
+void
+spdk_scsi_dev_delete_lun(struct spdk_scsi_dev *dev,
+			 struct spdk_scsi_lun *lun)
+{
+	int lun_cnt = 0;
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == lun) {
+			dev->lun[i] = NULL;
+		}
+
+		if (dev->lun[i]) {
+			lun_cnt++;
+		}
+	}
+
+	if (dev->removed == true && lun_cnt == 0) {
+		free_dev(dev);
+	}
+}
+
+/* This typedef exists to work around an astyle 2.05 bug.
+ * Remove it when astyle is fixed.
+ */
+typedef struct spdk_scsi_dev _spdk_scsi_dev;
+
+_spdk_scsi_dev *
+spdk_scsi_dev_construct(const char *name, const char *bdev_name_list[],
+			int *lun_id_list, int num_luns, uint8_t protocol_id,
+			void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+			void *hotremove_ctx)
+{
+	struct spdk_scsi_dev *dev;
+	size_t name_len;
+	bool found_lun_0;
+	int i, rc;
+
+	name_len = strlen(name);
+	if (name_len > sizeof(dev->name) - 1) {
+		SPDK_ERRLOG("device %s: name longer than maximum allowed length %zu\n",
+			    name, sizeof(dev->name) - 1);
+		return NULL;
+	}
+
+	if (num_luns == 0) {
+		SPDK_ERRLOG("device %s: no LUNs specified\n", name);
+		return NULL;
+	}
+
+	found_lun_0 = false;
+	for (i = 0; i < num_luns; i++) {
+		if (lun_id_list[i] == 0) {
+			found_lun_0 = true;
+			break;
+		}
+	}
+
+	if (!found_lun_0) {
+		SPDK_ERRLOG("device %s: no LUN 0 specified\n", name);
+		return NULL;
+	}
+
+	for (i = 0; i < num_luns; i++) {
+		if (bdev_name_list[i] == NULL) {
+			SPDK_ERRLOG("NULL spdk_scsi_lun for LUN %d\n",
+				    lun_id_list[i]);
+			return NULL;
+		}
+	}
+
+	dev = allocate_dev();
+	if (dev == NULL) {
+		return NULL;
+	}
+
+	memcpy(dev->name, name, name_len + 1);
+
+	dev->num_ports = 0;
+	dev->protocol_id = protocol_id;
+
+	for (i = 0; i < num_luns; i++) {
+		rc = spdk_scsi_dev_add_lun(dev, bdev_name_list[i], lun_id_list[i],
+					   hotremove_cb, hotremove_ctx);
+		if (rc < 0) {
+			spdk_scsi_dev_destruct(dev);
+			return NULL;
+		}
+	}
+
+	return dev;
+}
+
+void
+spdk_scsi_dev_queue_mgmt_task(struct spdk_scsi_dev *dev,
+			      struct spdk_scsi_task *task,
+			      enum spdk_scsi_task_func func)
+{
+	assert(task != NULL);
+
+	task->function = func;
+	spdk_scsi_lun_task_mgmt_execute(task, func);
+}
+
+void
+spdk_scsi_dev_queue_task(struct spdk_scsi_dev *dev,
+			 struct spdk_scsi_task *task)
+{
+	assert(task != NULL);
+
+	spdk_scsi_lun_execute_task(task->lun, task);
+}
+
+static struct spdk_scsi_port *
+spdk_scsi_dev_find_free_port(struct spdk_scsi_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_PORTS; i++) {
+		if (!dev->port[i].is_used) {
+			return &dev->port[i];
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_scsi_dev_add_port(struct spdk_scsi_dev *dev, uint64_t id, const char *name)
+{
+	struct spdk_scsi_port *port;
+	int rc;
+
+	if (dev->num_ports == SPDK_SCSI_DEV_MAX_PORTS) {
+		SPDK_ERRLOG("device already has %d ports\n", SPDK_SCSI_DEV_MAX_PORTS);
+		return -1;
+	}
+
+	port = spdk_scsi_dev_find_port_by_id(dev, id);
+	if (port != NULL) {
+		SPDK_ERRLOG("device already has port(%" PRIu64 ")\n", id);
+		return -1;
+	}
+
+	port = spdk_scsi_dev_find_free_port(dev);
+	if (port == NULL) {
+		assert(false);
+		return -1;
+	}
+
+	rc = spdk_scsi_port_construct(port, id, dev->num_ports, name);
+	if (rc != 0) {
+		return rc;
+	}
+
+	dev->num_ports++;
+	return 0;
+}
+
+int
+spdk_scsi_dev_delete_port(struct spdk_scsi_dev *dev, uint64_t id)
+{
+	struct spdk_scsi_port *port;
+
+	port = spdk_scsi_dev_find_port_by_id(dev, id);
+	if (port == NULL) {
+		SPDK_ERRLOG("device does not have specified port(%" PRIu64 ")\n", id);
+		return -1;
+	}
+
+	spdk_scsi_port_destruct(port);
+
+	dev->num_ports--;
+
+	return 0;
+}
+
+struct spdk_scsi_port *
+spdk_scsi_dev_find_port_by_id(struct spdk_scsi_dev *dev, uint64_t id)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_PORTS; i++) {
+		if (!dev->port[i].is_used) {
+			continue;
+		}
+		if (dev->port[i].id == id) {
+			return &dev->port[i];
+		}
+	}
+
+	/* No matching port found. */
+	return NULL;
+}
+
+void
+spdk_scsi_dev_free_io_channels(struct spdk_scsi_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == NULL) {
+			continue;
+		}
+		_spdk_scsi_lun_free_io_channel(dev->lun[i]);
+	}
+}
+
+int
+spdk_scsi_dev_allocate_io_channels(struct spdk_scsi_dev *dev)
+{
+	int i, rc;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == NULL) {
+			continue;
+		}
+		rc = _spdk_scsi_lun_allocate_io_channel(dev->lun[i]);
+		if (rc < 0) {
+			spdk_scsi_dev_free_io_channels(dev);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+const char *
+spdk_scsi_dev_get_name(const struct spdk_scsi_dev *dev)
+{
+	return dev->name;
+}
+
+int
+spdk_scsi_dev_get_id(const struct spdk_scsi_dev *dev)
+{
+	return dev->id;
+}
+
+struct spdk_scsi_lun *
+spdk_scsi_dev_get_lun(struct spdk_scsi_dev *dev, int lun_id)
+{
+	if (lun_id < 0 || lun_id >= SPDK_SCSI_DEV_MAX_LUN) {
+		return NULL;
+	}
+
+	return dev->lun[lun_id];
+}
+
+bool
+spdk_scsi_dev_has_pending_tasks(const struct spdk_scsi_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; ++i) {
+		if (dev->lun[i] && spdk_scsi_lun_has_pending_tasks(dev->lun[i])) {
+			return true;
+		}
+	}
+
+	return false;
+}
diff --git a/src/spdk/lib/scsi/lun.c b/src/spdk/lib/scsi/lun.c
new file mode 100644
index 00000000..ea44d86e
--- /dev/null
+++ b/src/spdk/lib/scsi/lun.c
@@ -0,0 +1,452 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/event.h"
+#include "spdk/util.h"
+
+void
+spdk_scsi_lun_complete_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task)
+{
+	if (lun) {
+		TAILQ_REMOVE(&lun->tasks, task, scsi_link);
+		spdk_trace_record(TRACE_SCSI_TASK_DONE, lun->dev->id, 0, (uintptr_t)task, 0);
+	}
+	task->cpl_fn(task);
+}
+
+void
+spdk_scsi_lun_complete_mgmt_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task)
+{
+	if (task->function == SPDK_SCSI_TASK_FUNC_LUN_RESET &&
+	    task->status == SPDK_SCSI_STATUS_GOOD) {
+		/*
+		 * The backend LUN device was just reset. If there are active tasks
+		 * in the backend, it means that LUN reset fails, and we set failure
+		 * status to LUN reset task.
+		 */
+		if (spdk_scsi_lun_has_pending_tasks(lun)) {
+			SPDK_ERRLOG("lun->tasks should be empty after reset\n");
+			task->response = SPDK_SCSI_TASK_MGMT_RESP_TARGET_FAILURE;
+		}
+	}
+	task->cpl_fn(task);
+}
+
+int
+spdk_scsi_lun_task_mgmt_execute(struct spdk_scsi_task *task,
+				enum spdk_scsi_task_func func)
+{
+	if (!task) {
+		return -1;
+	}
+
+	if (!task->lun) {
+		/* LUN does not exist */
+		task->response = SPDK_SCSI_TASK_MGMT_RESP_INVALID_LUN;
+		task->cpl_fn(task);
+		return -1;
+	}
+
+	switch (func) {
+	case SPDK_SCSI_TASK_FUNC_ABORT_TASK:
+		task->response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		SPDK_ERRLOG("ABORT_TASK failed\n");
+		break;
+
+	case SPDK_SCSI_TASK_FUNC_ABORT_TASK_SET:
+		task->response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		SPDK_ERRLOG("ABORT_TASK_SET failed\n");
+		break;
+
+	case SPDK_SCSI_TASK_FUNC_LUN_RESET:
+		spdk_bdev_scsi_reset(task);
+		return 0;
+
+	default:
+		SPDK_ERRLOG("Unknown Task Management Function!\n");
+		/*
+		 * Task management functions other than those above should never
+		 * reach this point having been filtered by the frontend. Reject
+		 * the task as being unsupported.
+		 */
+		task->response = SPDK_SCSI_TASK_MGMT_RESP_REJECT_FUNC_NOT_SUPPORTED;
+		break;
+	}
+
+	spdk_scsi_lun_complete_mgmt_task(task->lun, task);
+
+	return -1;
+}
+
+void
+spdk_scsi_task_process_null_lun(struct spdk_scsi_task *task)
+{
+	uint8_t buffer[36];
+	uint32_t allocation_len;
+	uint32_t data_len;
+
+	task->length = task->transfer_len;
+	if (task->cdb[0] == SPDK_SPC_INQUIRY) {
+		/*
+		 * SPC-4 states that INQUIRY commands to an unsupported LUN
+		 *  must be served with PERIPHERAL QUALIFIER = 0x3 and
+		 *  PERIPHERAL DEVICE TYPE = 0x1F.
+		 */
+		data_len = sizeof(buffer);
+
+		memset(buffer, 0, data_len);
+		/* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */
+		buffer[0] = 0x03 << 5 | 0x1f;
+		/* ADDITIONAL LENGTH */
+		buffer[4] = data_len - 5;
+
+		allocation_len = from_be16(&task->cdb[3]);
+		if (spdk_scsi_task_scatter_data(task, buffer, spdk_min(allocation_len, data_len)) >= 0) {
+			task->data_transferred = data_len;
+			task->status = SPDK_SCSI_STATUS_GOOD;
+		}
+	} else {
+		/* LOGICAL UNIT NOT SUPPORTED */
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		task->data_transferred = 0;
+	}
+}
+
+void
+spdk_scsi_lun_execute_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task)
+{
+	int rc;
+
+	task->status = SPDK_SCSI_STATUS_GOOD;
+	spdk_trace_record(TRACE_SCSI_TASK_START, lun->dev->id, task->length, (uintptr_t)task, 0);
+	TAILQ_INSERT_TAIL(&lun->tasks, task, scsi_link);
+	if (!lun->removed) {
+		rc = spdk_bdev_scsi_execute(task);
+	} else {
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ABORTED_COMMAND,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		rc = SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	switch (rc) {
+	case SPDK_SCSI_TASK_PENDING:
+		break;
+
+	case SPDK_SCSI_TASK_COMPLETE:
+		spdk_scsi_lun_complete_task(lun, task);
+		break;
+
+	default:
+		abort();
+	}
+}
+
+static void
+spdk_scsi_lun_remove(struct spdk_scsi_lun *lun)
+{
+	spdk_bdev_close(lun->bdev_desc);
+
+	spdk_scsi_dev_delete_lun(lun->dev, lun);
+	free(lun);
+}
+
+static int
+spdk_scsi_lun_check_io_channel(void *arg)
+{
+	struct spdk_scsi_lun *lun = (struct spdk_scsi_lun *)arg;
+
+	if (lun->io_channel) {
+		return -1;
+	}
+	spdk_poller_unregister(&lun->hotremove_poller);
+
+	spdk_scsi_lun_remove(lun);
+	return -1;
+}
+
+static void
+spdk_scsi_lun_notify_hot_remove(struct spdk_scsi_lun *lun)
+{
+	struct spdk_scsi_desc *desc, *tmp;
+
+	if (lun->hotremove_cb) {
+		lun->hotremove_cb(lun, lun->hotremove_ctx);
+	}
+
+	TAILQ_FOREACH_SAFE(desc, &lun->open_descs, link, tmp) {
+		if (desc->hotremove_cb) {
+			desc->hotremove_cb(lun, desc->hotremove_ctx);
+		} else {
+			spdk_scsi_lun_close(desc);
+		}
+	}
+
+	if (lun->io_channel) {
+		lun->hotremove_poller = spdk_poller_register(spdk_scsi_lun_check_io_channel,
+					lun, 10);
+	} else {
+		spdk_scsi_lun_remove(lun);
+	}
+}
+
+static int
+spdk_scsi_lun_check_pending_tasks(void *arg)
+{
+	struct spdk_scsi_lun *lun = (struct spdk_scsi_lun *)arg;
+
+	if (spdk_scsi_lun_has_pending_tasks(lun)) {
+		return -1;
+	}
+	spdk_poller_unregister(&lun->hotremove_poller);
+
+	spdk_scsi_lun_notify_hot_remove(lun);
+	return -1;
+}
+
+static void
+_spdk_scsi_lun_hot_remove(void *arg1)
+{
+	struct spdk_scsi_lun *lun = arg1;
+
+	if (spdk_scsi_lun_has_pending_tasks(lun)) {
+		lun->hotremove_poller = spdk_poller_register(spdk_scsi_lun_check_pending_tasks,
+					lun, 10);
+	} else {
+		spdk_scsi_lun_notify_hot_remove(lun);
+	}
+}
+
+static void
+spdk_scsi_lun_hot_remove(void *remove_ctx)
+{
+	struct spdk_scsi_lun *lun = (struct spdk_scsi_lun *)remove_ctx;
+	struct spdk_thread *thread;
+
+	if (lun->removed) {
+		return;
+	}
+
+	lun->removed = true;
+	if (lun->io_channel == NULL) {
+		_spdk_scsi_lun_hot_remove(lun);
+		return;
+	}
+
+	thread = spdk_io_channel_get_thread(lun->io_channel);
+	if (thread != spdk_get_thread()) {
+		spdk_thread_send_msg(thread, _spdk_scsi_lun_hot_remove, lun);
+	} else {
+		_spdk_scsi_lun_hot_remove(lun);
+	}
+}
+
+/**
+ * \brief Constructs a new spdk_scsi_lun object based on the provided parameters.
+ *
+ * \param bdev  bdev associated with this LUN
+ *
+ * \return NULL if bdev == NULL
+ * \return pointer to the new spdk_scsi_lun object otherwise
+ */
+_spdk_scsi_lun *
+spdk_scsi_lun_construct(struct spdk_bdev *bdev,
+			void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+			void *hotremove_ctx)
+{
+	struct spdk_scsi_lun *lun;
+	int rc;
+
+	if (bdev == NULL) {
+		SPDK_ERRLOG("bdev must be non-NULL\n");
+		return NULL;
+	}
+
+	lun = calloc(1, sizeof(*lun));
+	if (lun == NULL) {
+		SPDK_ERRLOG("could not allocate lun\n");
+		return NULL;
+	}
+
+	rc = spdk_bdev_open(bdev, true, spdk_scsi_lun_hot_remove, lun, &lun->bdev_desc);
+
+	if (rc != 0) {
+		SPDK_ERRLOG("bdev %s cannot be opened, error=%d\n", spdk_bdev_get_name(bdev), rc);
+		free(lun);
+		return NULL;
+	}
+
+	TAILQ_INIT(&lun->tasks);
+
+	lun->bdev = bdev;
+	lun->io_channel = NULL;
+	lun->hotremove_cb = hotremove_cb;
+	lun->hotremove_ctx = hotremove_ctx;
+	TAILQ_INIT(&lun->open_descs);
+
+	return lun;
+}
+
+void
+spdk_scsi_lun_destruct(struct spdk_scsi_lun *lun)
+{
+	spdk_scsi_lun_hot_remove(lun);
+}
+
+int
+spdk_scsi_lun_open(struct spdk_scsi_lun *lun, spdk_scsi_remove_cb_t hotremove_cb,
+		   void *hotremove_ctx, struct spdk_scsi_desc **_desc)
+{
+	struct spdk_scsi_desc *desc;
+
+	desc = calloc(1, sizeof(*desc));
+	if (desc == NULL) {
+		SPDK_ERRLOG("calloc() failed for LUN descriptor.\n");
+		return -ENOMEM;
+	}
+
+	TAILQ_INSERT_TAIL(&lun->open_descs, desc, link);
+
+	desc->lun = lun;
+	desc->hotremove_cb = hotremove_cb;
+	desc->hotremove_ctx = hotremove_ctx;
+	*_desc = desc;
+
+	return 0;
+}
+
+void
+spdk_scsi_lun_close(struct spdk_scsi_desc *desc)
+{
+	struct spdk_scsi_lun *lun = desc->lun;
+
+	TAILQ_REMOVE(&lun->open_descs, desc, link);
+	free(desc);
+
+	assert(!TAILQ_EMPTY(&lun->open_descs) || lun->io_channel == NULL);
+}
+
+int
+_spdk_scsi_lun_allocate_io_channel(struct spdk_scsi_lun *lun)
+{
+	if (lun->io_channel != NULL) {
+		if (spdk_get_thread() == spdk_io_channel_get_thread(lun->io_channel)) {
+			lun->ref++;
+			return 0;
+		}
+		SPDK_ERRLOG("io_channel already allocated for lun %s\n",
+			    spdk_bdev_get_name(lun->bdev));
+		return -1;
+	}
+
+	lun->io_channel = spdk_bdev_get_io_channel(lun->bdev_desc);
+	if (lun->io_channel == NULL) {
+		return -1;
+	}
+	lun->ref = 1;
+	return 0;
+}
+
+void
+_spdk_scsi_lun_free_io_channel(struct spdk_scsi_lun *lun)
+{
+	if (lun->io_channel == NULL) {
+		return;
+	}
+
+	if (spdk_get_thread() != spdk_io_channel_get_thread(lun->io_channel)) {
+		SPDK_ERRLOG("io_channel was freed by different thread\n");
+		return;
+	}
+
+	lun->ref--;
+	if (lun->ref == 0) {
+		spdk_put_io_channel(lun->io_channel);
+		lun->io_channel = NULL;
+	}
+}
+
+int
+spdk_scsi_lun_allocate_io_channel(struct spdk_scsi_desc *desc)
+{
+	struct spdk_scsi_lun *lun = desc->lun;
+
+	return _spdk_scsi_lun_allocate_io_channel(lun);
+}
+
+void
+spdk_scsi_lun_free_io_channel(struct spdk_scsi_desc *desc)
+{
+	struct spdk_scsi_lun *lun = desc->lun;
+
+	_spdk_scsi_lun_free_io_channel(lun);
+}
+
+int
+spdk_scsi_lun_get_id(const struct spdk_scsi_lun *lun)
+{
+	return lun->id;
+}
+
+const char *
+spdk_scsi_lun_get_bdev_name(const struct spdk_scsi_lun *lun)
+{
+	return spdk_bdev_get_name(lun->bdev);
+}
+
+const struct spdk_scsi_dev *
+spdk_scsi_lun_get_dev(const struct spdk_scsi_lun *lun)
+{
+	return lun->dev;
+}
+
+bool
+spdk_scsi_lun_has_pending_tasks(const struct spdk_scsi_lun *lun)
+{
+	return !TAILQ_EMPTY(&lun->tasks);
+}
+
+bool
+spdk_scsi_lun_is_removing(const struct spdk_scsi_lun *lun)
+{
+	return lun->removed;
+}
diff --git a/src/spdk/lib/scsi/port.c b/src/spdk/lib/scsi/port.c
new file mode 100644
index 00000000..70d72004
--- /dev/null
+++ b/src/spdk/lib/scsi/port.c
@@ -0,0 +1,96 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+
+struct spdk_scsi_port *
+spdk_scsi_port_create(uint64_t id, uint16_t index, const char *name)
+{
+	struct spdk_scsi_port *port;
+
+	port = calloc(1, sizeof(struct spdk_scsi_port));
+
+	if (!port) {
+		return NULL;
+	}
+
+	if (spdk_scsi_port_construct(port, id, index, name) != 0) {
+		spdk_scsi_port_free(&port);
+		return NULL;
+	}
+
+	return port;
+}
+
+void
+spdk_scsi_port_free(struct spdk_scsi_port **pport)
+{
+	struct spdk_scsi_port *port;
+
+	if (!pport) {
+		return;
+	}
+
+	port = *pport;
+	*pport = NULL;
+	free(port);
+}
+
+int
+spdk_scsi_port_construct(struct spdk_scsi_port *port, uint64_t id, uint16_t index,
+			 const char *name)
+{
+	if (strlen(name) >= sizeof(port->name)) {
+		SPDK_ERRLOG("port name too long\n");
+		return -1;
+	}
+
+	port->is_used = 1;
+	port->id = id;
+	port->index = index;
+	snprintf(port->name, sizeof(port->name), "%s", name);
+	return 0;
+}
+
+void
+spdk_scsi_port_destruct(struct spdk_scsi_port *port)
+{
+	memset(port, 0, sizeof(struct spdk_scsi_port));
+}
+
+const char *
+spdk_scsi_port_get_name(const struct spdk_scsi_port *port)
+{
+	return port->name;
+}
diff --git a/src/spdk/lib/scsi/scsi.c b/src/spdk/lib/scsi/scsi.c
new file mode 100644
index 00000000..5dce0446
--- /dev/null
+++ b/src/spdk/lib/scsi/scsi.c
@@ -0,0 +1,69 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+
+struct spdk_scsi_globals g_spdk_scsi;
+
+int
+spdk_scsi_init(void)
+{
+	int rc;
+
+	rc = pthread_mutex_init(&g_spdk_scsi.mutex, NULL);
+	if (rc != 0) {
+		SPDK_ERRLOG("mutex_init() failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+spdk_scsi_fini(void)
+{
+	pthread_mutex_destroy(&g_spdk_scsi.mutex);
+}
+
+SPDK_TRACE_REGISTER_FN(scsi_trace)
+{
+	spdk_trace_register_owner(OWNER_SCSI_DEV, 'd');
+	spdk_trace_register_object(OBJECT_SCSI_TASK, 't');
+	spdk_trace_register_description("SCSI_TASK_DONE", "", TRACE_SCSI_TASK_DONE,
+					OWNER_SCSI_DEV, OBJECT_SCSI_TASK, 0, 0, "");
+	spdk_trace_register_description("SCSI_TASK_START", "", TRACE_SCSI_TASK_START,
+					OWNER_SCSI_DEV, OBJECT_SCSI_TASK, 0, 0, "");
+}
+
+SPDK_LOG_REGISTER_COMPONENT("scsi", SPDK_LOG_SCSI)
diff --git a/src/spdk/lib/scsi/scsi_bdev.c b/src/spdk/lib/scsi/scsi_bdev.c
new file mode 100644
index 00000000..289d8626
--- /dev/null
+++ b/src/spdk/lib/scsi/scsi_bdev.c
@@ -0,0 +1,2116 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+
+/*
+ * TODO: move bdev SCSI error code translation tests to bdev unit test
+ * and remove this include.
+ */
+#include "spdk/bdev_module.h"
+
+#include "spdk/env.h"
+#include "spdk/bdev.h"
+#include "spdk/endian.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#define SPDK_WORK_BLOCK_SIZE		(4ULL * 1024ULL * 1024ULL)
+#define SPDK_WORK_ATS_BLOCK_SIZE	(1ULL * 1024ULL * 1024ULL)
+#define MAX_SERIAL_STRING		32
+
+#define DEFAULT_DISK_VENDOR		"INTEL"
+#define DEFAULT_DISK_REVISION		"0001"
+#define DEFAULT_DISK_ROTATION_RATE	1	/* Non-rotating medium */
+#define DEFAULT_DISK_FORM_FACTOR	0x02	/* 3.5 inch */
+#define DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT	256
+
+#define INQUIRY_OFFSET(field)		offsetof(struct spdk_scsi_cdb_inquiry_data, field) + \
+					sizeof(((struct spdk_scsi_cdb_inquiry_data *)0x0)->field)
+
+static void spdk_bdev_scsi_process_block_resubmit(void *arg);
+
+static int
+spdk_hex2bin(char ch)
+{
+	if ((ch >= '0') && (ch <= '9')) {
+		return ch - '0';
+	}
+	ch = tolower(ch);
+	if ((ch >= 'a') && (ch <= 'f')) {
+		return ch - 'a' + 10;
+	}
+	return (int)ch;
+}
+
+static void
+spdk_bdev_scsi_set_naa_ieee_extended(const char *name, uint8_t *buf)
+{
+	int i, value, count = 0;
+	uint64_t local_value;
+
+	for (i = 0; (i < 16) && (name[i] != '\0'); i++) {
+		value = spdk_hex2bin(name[i]);
+		if (i % 2) {
+			buf[count++] |= value << 4;
+		} else {
+			buf[count] = value;
+		}
+	}
+
+	local_value = *(uint64_t *)buf;
+	/*
+	 * see spc3r23 7.6.3.6.2,
+	 *  NAA IEEE Extended identifer format
+	 */
+	local_value &= 0x0fff000000ffffffull;
+	/* NAA 02, and 00 03 47 for IEEE Intel */
+	local_value |= 0x2000000347000000ull;
+
+	to_be64((void *)buf, local_value);
+}
+
+static int
+spdk_bdev_scsi_report_luns(struct spdk_scsi_lun *lun,
+			   int sel, uint8_t *data, int alloc_len)
+{
+	struct spdk_scsi_dev *dev;
+	uint64_t fmt_lun, lun_id, method;
+	int hlen, len = 0;
+	int i;
+
+	if (alloc_len < 8) {
+		return -1;
+	}
+
+	if (sel == 0x00) {
+		/* logical unit with addressing method */
+	} else if (sel == 0x01) {
+		/* well known logical unit */
+	} else if (sel == 0x02) {
+		/* logical unit */
+	} else {
+		return -1;
+	}
+
+	/* LUN LIST LENGTH */
+	memset(data, 0, 4);
+
+	/* Reserved */
+	memset(&data[4], 0, 4);
+	hlen = 8;
+
+	dev = lun->dev;
+
+	for (i = 0; i < SPDK_SCSI_DEV_MAX_LUN; i++) {
+		if (dev->lun[i] == NULL) {
+			continue;
+		}
+
+		if (alloc_len - (hlen + len) < 8) {
+			return -1;
+		}
+
+		lun_id = (uint64_t)i;
+
+		if (SPDK_SCSI_DEV_MAX_LUN <= 0x0100) {
+			/* below 256 */
+			method = 0x00U;
+			fmt_lun = (method & 0x03U) << 62;
+			fmt_lun |= (lun_id & 0x00ffU) << 48;
+		} else if (SPDK_SCSI_DEV_MAX_LUN <= 0x4000) {
+			/* below 16384 */
+			method = 0x01U;
+			fmt_lun = (method & 0x03U) << 62;
+			fmt_lun |= (lun_id & 0x3fffU) << 48;
+		} else {
+			/* XXX */
+			fmt_lun = 0;
+		}
+
+		/* LUN */
+		to_be64(&data[hlen + len], fmt_lun);
+		len += 8;
+	}
+
+	/* LUN LIST LENGTH */
+	to_be32(data, len);
+
+	return hlen + len;
+}
+
+static int
+spdk_bdev_scsi_pad_scsi_name(char *dst, const char *name)
+{
+	size_t len;
+
+	len = strlen(name);
+	memcpy(dst, name, len);
+	do {
+		dst[len++] = '\0';
+	} while (len & 3);
+
+	return len;
+}
+
+static int
+spdk_bdev_scsi_inquiry(struct spdk_bdev *bdev, struct spdk_scsi_task *task,
+		       uint8_t *cdb, uint8_t *data, uint16_t alloc_len)
+{
+	struct spdk_scsi_lun	*lun;
+	struct spdk_scsi_dev	*dev;
+	struct spdk_scsi_port	*port;
+	uint32_t blocks, optimal_blocks;
+	int hlen = 0, plen, plen2;
+	uint16_t len = 0;
+	int pc;
+	int pd;
+	int evpd;
+	int i;
+	struct spdk_scsi_cdb_inquiry *inq = (struct spdk_scsi_cdb_inquiry *)cdb;
+
+	/* standard inquiry command at lease with 36 Bytes */
+	if (alloc_len < 0x24) {
+		goto inq_error;
+	}
+
+	lun = task->lun;
+	dev = lun->dev;
+	port = task->target_port;
+
+	pd = SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DISK;
+	pc = inq->page_code;
+	evpd = inq->evpd & 0x1;
+
+	if (!evpd && pc) {
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return -1;
+	}
+
+	if (evpd) {
+		struct spdk_scsi_vpd_page *vpage = (struct spdk_scsi_vpd_page *)data;
+
+		/* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */
+		vpage->peripheral_device_type = pd;
+		vpage->peripheral_qualifier = SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED;
+		/* PAGE CODE */
+		vpage->page_code = pc;
+
+		/* Vital product data */
+		switch (pc) {
+		case SPDK_SPC_VPD_SUPPORTED_VPD_PAGES:
+			hlen = 4;
+
+			vpage->params[0] = SPDK_SPC_VPD_SUPPORTED_VPD_PAGES;
+			vpage->params[1] = SPDK_SPC_VPD_UNIT_SERIAL_NUMBER;
+			vpage->params[2] = SPDK_SPC_VPD_DEVICE_IDENTIFICATION;
+			vpage->params[3] = SPDK_SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES;
+			vpage->params[4] = SPDK_SPC_VPD_EXTENDED_INQUIRY_DATA;
+			vpage->params[5] = SPDK_SPC_VPD_MODE_PAGE_POLICY;
+			vpage->params[6] = SPDK_SPC_VPD_SCSI_PORTS;
+			vpage->params[7] = SPDK_SPC_VPD_BLOCK_LIMITS;
+			vpage->params[8] = SPDK_SPC_VPD_BLOCK_DEV_CHARS;
+			len = 9;
+			if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+				vpage->params[9] = SPDK_SPC_VPD_BLOCK_THIN_PROVISION;
+				len++;
+			}
+
+			/* PAGE LENGTH */
+			to_be16(vpage->alloc_len, len);
+			break;
+
+		case SPDK_SPC_VPD_UNIT_SERIAL_NUMBER: {
+			const char *name = spdk_bdev_get_name(bdev);
+
+			hlen = 4;
+
+			/* PRODUCT SERIAL NUMBER */
+			len = strlen(name) + 1;
+			if (len > MAX_SERIAL_STRING) {
+				len = MAX_SERIAL_STRING;
+			}
+
+			memcpy(vpage->params, name, len - 1);
+			vpage->params[len - 1] = 0;
+
+			/* PAGE LENGTH */
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_DEVICE_IDENTIFICATION: {
+			const char *name = spdk_bdev_get_name(bdev);
+			const char *product_name = spdk_bdev_get_product_name(bdev);
+			uint8_t protocol_id = dev->protocol_id;
+			uint8_t *buf = vpage->params;
+			struct spdk_scsi_desig_desc *desig;
+
+			hlen = 4;
+
+			/* Check total length by calculated how much space all entries take */
+			len = sizeof(struct spdk_scsi_desig_desc) + 8;
+			len += sizeof(struct spdk_scsi_desig_desc) + 8 + 16 + MAX_SERIAL_STRING;
+			len += sizeof(struct spdk_scsi_desig_desc) + SPDK_SCSI_DEV_MAX_NAME + 1;
+			len += sizeof(struct spdk_scsi_desig_desc) + SPDK_SCSI_PORT_MAX_NAME_LENGTH;
+			len += sizeof(struct spdk_scsi_desig_desc) + 4;
+			len += sizeof(struct spdk_scsi_desig_desc) + 4;
+			len += sizeof(struct spdk_scsi_desig_desc) + 4;
+			if (sizeof(struct spdk_scsi_vpd_page) + len > alloc_len) {
+				spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+							  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+							  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+							  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+				return -1;
+			}
+
+			/* Now fill out the designator array */
+
+			/* NAA designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_BINARY;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_NAA;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_LOGICAL_UNIT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = 8;
+			spdk_bdev_scsi_set_naa_ieee_extended(name, desig->desig);
+			len = sizeof(struct spdk_scsi_desig_desc) + 8;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* T10 Vendor ID designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_ASCII;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_LOGICAL_UNIT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = 8 + 16 + MAX_SERIAL_STRING;
+			spdk_strcpy_pad(desig->desig, DEFAULT_DISK_VENDOR, 8, ' ');
+			spdk_strcpy_pad(&desig->desig[8], product_name, 16, ' ');
+			spdk_strcpy_pad(&desig->desig[24], name, MAX_SERIAL_STRING, ' ');
+			len += sizeof(struct spdk_scsi_desig_desc) + 8 + 16 + MAX_SERIAL_STRING;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* SCSI Device Name designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_UTF8;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_TARGET_DEVICE;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = spdk_bdev_scsi_pad_scsi_name(desig->desig, dev->name);
+			len += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* SCSI Port Name designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_UTF8;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = snprintf(desig->desig, SPDK_SCSI_PORT_MAX_NAME_LENGTH, "%s", port->name);
+			len += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* Relative Target Port designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_BINARY;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = 4;
+			memset(desig->desig, 0, 2); /* Reserved */
+			to_be16(&desig->desig[2], port->index);
+			len += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* Target port group designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_BINARY;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = 4;
+			memset(desig->desig, 0, 4);
+			len += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			buf += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			/* Logical unit group designator */
+			desig = (struct spdk_scsi_desig_desc *)buf;
+			desig->code_set = SPDK_SPC_VPD_CODE_SET_BINARY;
+			desig->protocol_id = protocol_id;
+			desig->type = SPDK_SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP;
+			desig->association = SPDK_SPC_VPD_ASSOCIATION_LOGICAL_UNIT;
+			desig->reserved0 = 0;
+			desig->piv = 1;
+			desig->reserved1 = 0;
+			desig->len = 4;
+			memset(desig->desig, 0, 2); /* Reserved */
+			to_be16(&desig->desig[2], dev->id);
+			len += sizeof(struct spdk_scsi_desig_desc) + desig->len;
+
+			to_be16(vpage->alloc_len, len);
+
+			break;
+		}
+
+		case SPDK_SPC_VPD_EXTENDED_INQUIRY_DATA: {
+			struct spdk_scsi_vpd_ext_inquiry *vext = (struct spdk_scsi_vpd_ext_inquiry *)vpage;
+
+			memset(vext, 0, sizeof(*vext));
+			hlen = 4;
+
+			/* RTO(3) GRD_CHK(2) APP_CHK(1) REF_CHK(0) */
+
+			/* GROUP_SUP(4) PRIOR_SUP(3) HEADSUP(2) ORDSUP(1) SIMPSUP(0) */
+			vext->sup = SPDK_SCSI_VEXT_HEADSUP | SPDK_SCSI_VEXT_SIMPSUP;
+
+			/* NV_SUP(1) V_SUP(0) */
+
+			/* Reserved[7-63] */
+
+			len = 64 - hlen;
+
+			/* PAGE LENGTH */
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES:
+			/* PAGE LENGTH */
+			hlen = 4;
+
+			to_be16(vpage->alloc_len, len);
+			break;
+
+		case SPDK_SPC_VPD_MODE_PAGE_POLICY: {
+			struct spdk_scsi_mpage_policy_desc *pdesc =
+				(struct spdk_scsi_mpage_policy_desc *)vpage->params;
+
+			hlen = 4;
+
+			/* Mode page policy descriptor 1 */
+
+			/* POLICY PAGE CODE(5-0) */
+			/* all page code */
+			pdesc->page_code = 0x3f;
+
+			/* POLICY SUBPAGE CODE */
+			/* all sub page */
+			pdesc->sub_page_code = 0xff;
+
+			/* MLUS(7) MODE PAGE POLICY(1-0) */
+			/* MLUS own copy */
+			/* Shared MODE PAGE policy */
+			pdesc->policy = 0;
+			/* Reserved */
+			pdesc->reserved = 0;
+
+			len += 4;
+
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_SCSI_PORTS: {
+			/* PAGE LENGTH */
+			hlen = 4;
+
+			/* Identification descriptor list */
+			for (i = 0; i < SPDK_SCSI_DEV_MAX_PORTS; i++) {
+				struct spdk_scsi_port_desc *sdesc;
+				struct spdk_scsi_tgt_port_desc *pdesc;
+
+				if (!dev->port[i].is_used) {
+					continue;
+				}
+
+				/* Identification descriptor N */
+				sdesc = (struct spdk_scsi_port_desc *)&vpage->params[len];
+
+				/* Reserved */
+				sdesc->reserved = 0;
+
+				/* RELATIVE PORT IDENTIFIER */
+				to_be16(&sdesc->rel_port_id, dev->port[i].index);
+
+				/* Reserved */
+				sdesc->reserved2 = 0;
+
+				/* INITIATOR PORT TRANSPORTID LENGTH */
+				sdesc->init_port_len = 0;
+
+				/* Reserved */
+				sdesc->init_port_id = 0;
+
+				/* TARGET PORT DESCRIPTORS LENGTH */
+				sdesc->tgt_desc_len = 0;
+
+				len += 12;
+
+				plen2 = 0;
+				/* Target port descriptor 1 */
+				pdesc = (struct spdk_scsi_tgt_port_desc *)sdesc->tgt_desc;
+
+				/* PROTOCOL IDENTIFIER(7-4) CODE SET(3-0) */
+				pdesc->code_set =
+					SPDK_SPC_PROTOCOL_IDENTIFIER_ISCSI << 4 |
+					SPDK_SPC_VPD_CODE_SET_UTF8;
+
+				/* PIV(7) ASSOCIATION(5-4) IDENTIFIER TYPE(3-0) */
+				pdesc->desig_type = SPDK_SPC_VPD_DESIG_PIV |
+						    SPDK_SPC_VPD_ASSOCIATION_TARGET_PORT << 4 |
+						    SPDK_SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME;
+
+				/* Reserved */
+				pdesc->reserved = 0;
+
+				/* IDENTIFIER */
+				plen = snprintf((char *)pdesc->designator,
+						SPDK_SCSI_PORT_MAX_NAME_LENGTH, "%s",
+						dev->port[i].name);
+				pdesc->len = plen;
+
+				plen2 += 4 + plen;
+
+				/* TARGET PORT DESCRIPTORS LENGTH */
+				to_be16(&sdesc->tgt_desc_len, plen2);
+
+				len += plen2;
+			}
+
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_BLOCK_LIMITS: {
+			uint32_t block_size = spdk_bdev_get_block_size(bdev);
+
+			/* PAGE LENGTH */
+			memset(&data[4], 0, 60);
+
+			hlen = 4;
+
+			/* WSNZ(0) */
+			/* support zero length in WRITE SAME */
+
+			/* MAXIMUM COMPARE AND WRITE LENGTH */
+			blocks = SPDK_WORK_ATS_BLOCK_SIZE / block_size;
+
+			if (blocks > 0xff) {
+				blocks = 0xff;
+			}
+
+			data[5] = (uint8_t)blocks;
+
+			/* force align to 4KB */
+			if (block_size < 4096) {
+				optimal_blocks = 4096 / block_size;
+			} else {
+				optimal_blocks = 1;
+			}
+
+			/* OPTIMAL TRANSFER LENGTH GRANULARITY */
+			to_be16(&data[6], optimal_blocks);
+
+			blocks = SPDK_WORK_BLOCK_SIZE / block_size;
+
+			/* MAXIMUM TRANSFER LENGTH */
+			to_be32(&data[8], blocks);
+			/* OPTIMAL TRANSFER LENGTH */
+			to_be32(&data[12], blocks);
+
+			/* MAXIMUM PREFETCH XDREAD XDWRITE TRANSFER LENGTH */
+
+			len = 20 - hlen;
+
+			if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+				/*
+				 * MAXIMUM UNMAP LBA COUNT: indicates the
+				 * maximum  number of LBAs that may be
+				 * unmapped by an UNMAP command.
+				 */
+				/* For now, choose 4MB as the maximum. */
+				to_be32(&data[20], 4194304);
+
+				/*
+				 * MAXIMUM UNMAP BLOCK DESCRIPTOR COUNT:
+				 * indicates the maximum number of UNMAP
+				 * block descriptors that shall be contained
+				 * in the parameter data transferred to the
+				 * device server for an UNMAP command.
+				 * The bdev layer automatically splits unmap
+				 * requests, so pick an arbitrary high number here.
+				 */
+				to_be32(&data[24], DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT);
+
+				/*
+				 * The UGAVALID bit is left as 0 which means neither the
+				 * OPTIMAL UNMAP GRANULARITY nor the UNMAP GRANULARITY
+				 * ALIGNMENT fields are valid.
+				 */
+
+				/*
+				 * MAXIMUM WRITE SAME LENGTH: indicates the
+				 * maximum number of contiguous logical blocks
+				 * that the device server allows to be unmapped
+				 * or written in a single WRITE SAME command.
+				 */
+				to_be64(&data[36], 512);
+
+				/* Reserved */
+				/* not specified */
+				len = 64 - hlen;
+			}
+
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_BLOCK_DEV_CHARS: {
+			/* PAGE LENGTH */
+			hlen = 4;
+			len = 64 - hlen;
+
+			to_be16(&data[4], DEFAULT_DISK_ROTATION_RATE);
+
+			/* Reserved */
+			data[6] = 0;
+			/* NOMINAL FORM FACTOR(3-0) */
+			data[7] = DEFAULT_DISK_FORM_FACTOR << 4;
+			/* Reserved */
+			memset(&data[8], 0, 64 - 8);
+
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		case SPDK_SPC_VPD_BLOCK_THIN_PROVISION: {
+			if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+				SPDK_ERRLOG("unsupported INQUIRY VPD page 0x%x\n", pc);
+				goto inq_error;
+			}
+
+			hlen = 4;
+			len = 7;
+
+			/*
+			 *  PAGE LENGTH : if the DP bit is set to one, then the
+			 *  page length shall be set  0004h.
+			 */
+			to_be16(&data[2], 0x0004);
+
+			/*
+			 * THRESHOLD EXPONENT : it indicates the threshold set
+			 * size in LBAs as a power of 2( i.e., the threshold
+			 * set size  = 2 ^ (threshold exponent).
+			 */
+			data[4] = 0;
+
+			/*
+			 * Set the LBPU bit to indicate  the support for UNMAP
+			 * command.
+			 */
+			data[5] |= SPDK_SCSI_UNMAP_LBPU;
+
+			/*
+			 * Set the provisioning type to thin provision.
+			 */
+			data[6] = SPDK_SCSI_UNMAP_THIN_PROVISIONING;
+
+			to_be16(vpage->alloc_len, len);
+			break;
+		}
+
+		default:
+			if (pc >= 0xc0 && pc <= 0xff) {
+				SPDK_DEBUGLOG(SPDK_LOG_SCSI, "Vendor specific INQUIRY VPD page 0x%x\n", pc);
+			} else {
+				SPDK_ERRLOG("unsupported INQUIRY VPD page 0x%x\n", pc);
+			}
+			goto inq_error;
+		}
+	} else {
+		struct spdk_scsi_cdb_inquiry_data *inqdata =
+			(struct spdk_scsi_cdb_inquiry_data *)data;
+
+		/* Standard INQUIRY data */
+		/* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */
+		inqdata->peripheral_device_type = pd;
+		inqdata->peripheral_qualifier = SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED;
+
+		/* RMB(7) */
+		inqdata->rmb = 0;
+
+		/* VERSION */
+		/* See SPC3/SBC2/MMC4/SAM2 for more details */
+		inqdata->version = SPDK_SPC_VERSION_SPC3;
+
+		/* NORMACA(5) HISUP(4) RESPONSE DATA FORMAT(3-0) */
+		/* format 2 */ /* hierarchical support */
+		inqdata->response = 2 | 1 << 4;
+
+		hlen = 5;
+
+		/* SCCS(7) ACC(6) TPGS(5-4) 3PC(3) PROTECT(0) */
+		/* Not support TPGS */
+		inqdata->flags = 0;
+
+		/* MULTIP */
+		inqdata->flags2 = 0x10;
+
+		/* WBUS16(5) SYNC(4) LINKED(3) CMDQUE(1) VS(0) */
+		/* CMDQUE */
+		inqdata->flags3 = 0x2;
+
+		/* T10 VENDOR IDENTIFICATION */
+		spdk_strcpy_pad(inqdata->t10_vendor_id, DEFAULT_DISK_VENDOR, 8, ' ');
+
+		/* PRODUCT IDENTIFICATION */
+		spdk_strcpy_pad(inqdata->product_id, spdk_bdev_get_product_name(bdev), 16, ' ');
+
+		/* PRODUCT REVISION LEVEL */
+		spdk_strcpy_pad(inqdata->product_rev, DEFAULT_DISK_REVISION, 4, ' ');
+
+		/*
+		 * Standard inquiry data ends here.  Only populate remaining fields if alloc_len
+		 *  indicates enough space to hold it.
+		 */
+		len = INQUIRY_OFFSET(product_rev) - 5;
+
+		if (alloc_len >= INQUIRY_OFFSET(vendor)) {
+			/* Vendor specific */
+			memset(inqdata->vendor, 0x20, 20);
+			len += sizeof(inqdata->vendor);
+		}
+
+		if (alloc_len >= INQUIRY_OFFSET(ius)) {
+			/* CLOCKING(3-2) QAS(1) IUS(0) */
+			inqdata->ius = 0;
+			len += sizeof(inqdata->ius);
+		}
+
+		if (alloc_len >= INQUIRY_OFFSET(reserved)) {
+			/* Reserved */
+			inqdata->reserved = 0;
+			len += sizeof(inqdata->reserved);
+		}
+
+		/* VERSION DESCRIPTOR 1-8 */
+		if (alloc_len >= INQUIRY_OFFSET(reserved) + 2) {
+			to_be16(&inqdata->desc[0], 0x0960);
+			len += 2;
+		}
+
+		if (alloc_len >= INQUIRY_OFFSET(reserved) + 4) {
+			to_be16(&inqdata->desc[2], 0x0300); /* SPC-3 (no version claimed) */
+			len += 2;
+		}
+
+		if (alloc_len >= INQUIRY_OFFSET(reserved) + 6) {
+			to_be16(&inqdata->desc[4], 0x320); /* SBC-2 (no version claimed) */
+			len += 2;
+		}
+
+		if (alloc_len >= INQUIRY_OFFSET(reserved) + 8) {
+			to_be16(&inqdata->desc[6], 0x0040); /* SAM-2 (no version claimed) */
+			len += 2;
+		}
+
+		/*
+		 * We only fill out 4 descriptors, but if the allocation length goes past
+		 *  that, zero the remaining bytes.  This fixes some SCSI compliance tests
+		 *  which expect a full 96 bytes to be returned, including the unpopulated
+		 *  version descriptors 5-8 (4 * 2 = 8 bytes) plus the 22 bytes of reserved
+		 *  space (bytes 74-95) - for a total of 30 bytes.
+		 */
+		if (alloc_len > INQUIRY_OFFSET(reserved) + 8) {
+			i = alloc_len - (INQUIRY_OFFSET(reserved) + 8);
+			if (i > 30) {
+				i = 30;
+			}
+			memset(&inqdata->desc[8], 0, i);
+			len += i;
+		}
+
+		/* ADDITIONAL LENGTH */
+		inqdata->add_len = len;
+	}
+
+	return hlen + len;
+
+inq_error:
+	task->data_transferred = 0;
+	spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+				  SPDK_SCSI_SENSE_NO_SENSE,
+				  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+				  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+	return -1;
+}
+
+static void
+mode_sense_page_init(uint8_t *buf, int len, int page, int subpage)
+{
+	if (!buf) {
+		return;
+	}
+
+	memset(buf, 0, len);
+	if (subpage != 0) {
+		buf[0] = page | 0x40; /* PAGE + SPF=1 */
+		buf[1] = subpage;
+		to_be16(&buf[2], len - 4);
+	} else {
+		buf[0] = page;
+		buf[1] = len - 2;
+	}
+}
+
+static int
+spdk_bdev_scsi_mode_sense_page(struct spdk_bdev *bdev,
+			       uint8_t *cdb, int pc, int page, int subpage,
+			       uint8_t *data, struct spdk_scsi_task *task)
+{
+	uint8_t *cp = data;
+	int len = 0;
+	int plen;
+	int i;
+
+	if (pc == 0x00) {
+		/* Current values */
+	} else if (pc == 0x01) {
+		/* Changeable values */
+		/* As we currently do not support changeable values,
+		   all parameters are reported as zero. */
+	} else if (pc == 0x02) {
+		/* Default values */
+	} else {
+		/* Saved values not supported */
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_SAVING_PARAMETERS_NOT_SUPPORTED,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return -1;
+	}
+
+	switch (page) {
+	case 0x00:
+		/* Vendor specific */
+		break;
+	case 0x01:
+		/* Read-Write Error Recovery */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+			      "MODE_SENSE Read-Write Error Recovery\n");
+		if (subpage != 0x00) {
+			break;
+		}
+		plen = 0x0a + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x02:
+		/* Disconnect-Reconnect */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+			      "MODE_SENSE Disconnect-Reconnect\n");
+		if (subpage != 0x00) {
+			break;
+		}
+		plen = 0x0e + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x03:
+		/* Obsolete (Format Device) */
+		break;
+	case 0x04:
+		/* Obsolete (Rigid Disk Geometry) */
+		break;
+	case 0x05:
+		/* Obsolete (Rigid Disk Geometry) */
+		break;
+	case 0x06:
+		/* Reserved */
+		break;
+	case 0x07:
+		/* Verify Error Recovery */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+			      "MODE_SENSE Verify Error Recovery\n");
+
+		if (subpage != 0x00) {
+			break;
+		}
+
+		plen = 0x0a + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x08: {
+		/* Caching */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "MODE_SENSE Caching\n");
+		if (subpage != 0x00) {
+			break;
+		}
+
+		plen = 0x12 + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+
+		if (cp && spdk_bdev_has_write_cache(bdev) && pc != 0x01) {
+			cp[2] |= 0x4;        /* WCE */
+		}
+
+		/* Read Cache Disable (RCD) = 1 */
+		if (cp && pc != 0x01) {
+			cp[2] |= 0x1;
+		}
+
+		len += plen;
+		break;
+	}
+	case 0x09:
+		/* Obsolete */
+		break;
+	case 0x0a:
+		switch (subpage) {
+		case 0x00:
+			/* Control */
+			SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+				      "MODE_SENSE Control\n");
+			plen = 0x0a + 2;
+			mode_sense_page_init(cp, plen, page, subpage);
+			len += plen;
+			break;
+		case 0x01:
+			/* Control Extension */
+			SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+				      "MODE_SENSE Control Extension\n");
+			plen = 0x1c + 4;
+			mode_sense_page_init(cp, plen, page, subpage);
+			len += plen;
+			break;
+		case 0xff:
+			/* All subpages */
+			len += spdk_bdev_scsi_mode_sense_page(bdev,
+							      cdb, pc, page,
+							      0x00,
+							      cp ? &cp[len] : NULL, task);
+			len += spdk_bdev_scsi_mode_sense_page(bdev,
+							      cdb, pc, page,
+							      0x01,
+							      cp ? &cp[len] : NULL, task);
+			break;
+		default:
+			/* 0x02-0x3e: Reserved */
+			break;
+		}
+		break;
+	case 0x0b:
+		/* Obsolete (Medium Types Supported) */
+		break;
+	case 0x0c:
+		/* Obsolete (Notch And Partitio) */
+		break;
+	case 0x0d:
+		/* Obsolete */
+		break;
+	case 0x0e:
+	case 0x0f:
+		/* Reserved */
+		break;
+	case 0x10:
+		/* XOR Control */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "MODE_SENSE XOR Control\n");
+		if (subpage != 0x00) {
+			break;
+		}
+		plen = 0x16 + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x11:
+	case 0x12:
+	case 0x13:
+		/* Reserved */
+		break;
+	case 0x14:
+		/* Enclosure Services Management */
+		break;
+	case 0x15:
+	case 0x16:
+	case 0x17:
+		/* Reserved */
+		break;
+	case 0x18:
+		/* Protocol-Specific LUN */
+		break;
+	case 0x19:
+		/* Protocol-Specific Port */
+		break;
+	case 0x1a:
+		/* Power Condition */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+			      "MODE_SENSE Power Condition\n");
+		if (subpage != 0x00) {
+			break;
+		}
+		plen = 0x0a + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x1b:
+		/* Reserved */
+		break;
+	case 0x1c:
+		/* Informational Exceptions Control */
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+			      "MODE_SENSE Informational Exceptions Control\n");
+		if (subpage != 0x00) {
+			break;
+		}
+
+		plen = 0x0a + 2;
+		mode_sense_page_init(cp, plen, page, subpage);
+		len += plen;
+		break;
+	case 0x1d:
+	case 0x1e:
+	case 0x1f:
+		/* Reserved */
+		break;
+	case 0x20:
+	case 0x21:
+	case 0x22:
+	case 0x23:
+	case 0x24:
+	case 0x25:
+	case 0x26:
+	case 0x27:
+	case 0x28:
+	case 0x29:
+	case 0x2a:
+	case 0x2b:
+	case 0x2c:
+	case 0x2d:
+	case 0x2e:
+	case 0x2f:
+	case 0x30:
+	case 0x31:
+	case 0x32:
+	case 0x33:
+	case 0x34:
+	case 0x35:
+	case 0x36:
+	case 0x37:
+	case 0x38:
+	case 0x39:
+	case 0x3a:
+	case 0x3b:
+	case 0x3c:
+	case 0x3d:
+	case 0x3e:
+		/* Vendor-specific */
+		break;
+	case 0x3f:
+		switch (subpage) {
+		case 0x00:
+			/* All mode pages */
+			for (i = 0x00; i < 0x3e; i ++) {
+				len += spdk_bdev_scsi_mode_sense_page(
+					       bdev, cdb, pc, i, 0x00,
+					       cp ? &cp[len] : NULL, task);
+			}
+			break;
+		case 0xff:
+			/* All mode pages and subpages */
+			for (i = 0x00; i < 0x3e; i ++) {
+				len += spdk_bdev_scsi_mode_sense_page(
+					       bdev, cdb, pc, i, 0x00,
+					       cp ? &cp[len] : NULL, task);
+			}
+			for (i = 0x00; i < 0x3e; i ++) {
+				len += spdk_bdev_scsi_mode_sense_page(
+					       bdev, cdb, pc, i, 0xff,
+					       cp ? &cp[len] : NULL, task);
+			}
+			break;
+		default:
+			/* 0x01-0x3e: Reserved */
+			break;
+		}
+	}
+
+	return len;
+}
+
+static int
+spdk_bdev_scsi_mode_sense(struct spdk_bdev *bdev, int md,
+			  uint8_t *cdb, int dbd, int llbaa, int pc,
+			  int page, int subpage, uint8_t *data, struct spdk_scsi_task *task)
+{
+	uint64_t num_blocks = spdk_bdev_get_num_blocks(bdev);
+	uint32_t block_size = spdk_bdev_get_block_size(bdev);
+	uint8_t *hdr, *bdesc, *pages;
+	int hlen;
+	int blen;
+	int plen, total;
+
+	assert(md == 6 || md == 10);
+
+	if (md == 6) {
+		hlen = 4;
+		blen = 8; /* For MODE SENSE 6 only short LBA */
+	} else {
+		hlen = 8;
+		blen = llbaa ? 16 : 8;
+	}
+
+	if (dbd) {
+		blen = 0;
+	}
+
+	pages = data ? &data[hlen + blen] : NULL;
+	plen = spdk_bdev_scsi_mode_sense_page(bdev, cdb, pc, page,
+					      subpage,
+					      pages, task);
+	if (plen < 0) {
+		return -1;
+	}
+
+	total = hlen + blen + plen;
+	if (data == NULL) {
+		return total;
+	}
+
+	hdr = &data[0];
+	if (hlen == 4) {
+		hdr[0] = total - 1;            /* Mode Data Length */
+		hdr[1] = 0;                    /* Medium Type */
+		hdr[2] = 0;                    /* Device-Specific Parameter */
+		hdr[3] = blen;                 /* Block Descripter Length */
+	} else {
+		to_be16(&hdr[0], total - 2);   /* Mode Data Length */
+		hdr[2] = 0;                    /* Medium Type */
+		hdr[3] = 0;                    /* Device-Specific Parameter */
+		hdr[4] = llbaa ? 0x1 : 0;      /* Long/short LBA */
+		hdr[5] = 0;                    /* Reserved */
+		to_be16(&hdr[6], blen);        /* Block Descripter Length */
+	}
+
+	bdesc = &data[hlen];
+	if (blen == 16) {
+		/* Number of Blocks */
+		to_be64(&bdesc[0], num_blocks);
+		/* Reserved */
+		memset(&bdesc[8], 0, 4);
+		/* Block Length */
+		to_be32(&bdesc[12], block_size);
+	} else if (blen == 8) {
+		/* Number of Blocks */
+		if (num_blocks > 0xffffffffULL) {
+			memset(&bdesc[0], 0xff, 4);
+		} else {
+			to_be32(&bdesc[0], num_blocks);
+		}
+
+		/* Block Length */
+		to_be32(&bdesc[4], block_size);
+	}
+
+	return total;
+}
+
+static int
+spdk_bdev_scsi_mode_select_page(struct spdk_bdev *bdev,
+				uint8_t *cdb, int pf, int sp,
+				uint8_t *data, size_t len)
+{
+	size_t hlen, plen;
+	int spf, page, subpage;
+	int rc;
+
+	/* vendor specific */
+	if (pf == 0) {
+		return 0;
+	}
+
+	if (len < 1) {
+		return 0;
+	}
+
+	spf = !!(data[0] & 0x40);
+	page = data[0] & 0x3f;
+	if (spf) {
+		/* Sub_page mode page format */
+		hlen = 4;
+		if (len < hlen) {
+			return 0;
+		}
+		subpage = data[1];
+
+		plen = from_be16(&data[2]);
+	} else {
+		/* Page_0 mode page format */
+		hlen = 2;
+		if (len < hlen) {
+			return 0;
+		}
+		subpage = 0;
+		plen = data[1];
+	}
+
+	plen += hlen;
+	if (len < plen) {
+		return 0;
+	}
+
+	switch (page) {
+	case 0x08: { /* Caching */
+		//int wce;
+
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "MODE_SELECT Caching\n");
+		if (subpage != 0x00) {
+			break;
+		}
+
+		if (plen != 0x12 + hlen) {
+			/* unknown format */
+			break;
+		}
+
+		// TODO:
+		//wce = data[2] & 0x4; /* WCE */
+
+		//fd = bdev->fd;
+		//
+		//rc = fcntl(fd, F_GETFL, 0);
+		//if (rc != -1) {
+		//	if (wce) {
+		//		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "MODE_SELECT Writeback cache enable\n");
+		//		rc = fcntl(fd, F_SETFL, (rc & ~O_FSYNC));
+		//		bdev->write_cache = 1;
+		//	} else {
+		//		rc = fcntl(fd, F_SETFL, (rc | O_FSYNC));
+		//		bdev->write_cache = 0;
+		//	}
+		//}
+
+		break;
+	}
+	default:
+		/* not supported */
+		break;
+	}
+
+	len -= plen;
+	if (len != 0) {
+		rc = spdk_bdev_scsi_mode_select_page(bdev, cdb, pf, sp, &data[plen], len);
+		if (rc < 0) {
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static void
+spdk_bdev_scsi_task_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
+				 void *cb_arg)
+{
+	struct spdk_scsi_task *task = cb_arg;
+	int sc, sk, asc, ascq;
+
+	task->bdev_io = bdev_io;
+
+	spdk_bdev_io_get_scsi_status(bdev_io, &sc, &sk, &asc, &ascq);
+	spdk_scsi_task_set_status(task, sc, sk, asc, ascq);
+	spdk_scsi_lun_complete_task(task->lun, task);
+}
+
+static void
+spdk_bdev_scsi_task_complete_mgmt(struct spdk_bdev_io *bdev_io, bool success,
+				  void *cb_arg)
+{
+	struct spdk_scsi_task *task = cb_arg;
+
+	task->bdev_io = bdev_io;
+
+	if (success) {
+		task->response = SPDK_SCSI_TASK_MGMT_RESP_SUCCESS;
+	}
+
+	spdk_scsi_lun_complete_mgmt_task(task->lun, task);
+}
+
+static void
+spdk_bdev_scsi_queue_io(struct spdk_scsi_task *task, spdk_bdev_io_wait_cb cb_fn, void *cb_arg)
+{
+	struct spdk_scsi_lun *lun = task->lun;
+	struct spdk_bdev *bdev = lun->bdev;
+	struct spdk_io_channel *ch = lun->io_channel;
+	int rc;
+
+	task->bdev_io_wait.bdev = bdev;
+	task->bdev_io_wait.cb_fn = cb_fn;
+	task->bdev_io_wait.cb_arg = cb_arg;
+
+	rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
+	if (rc != 0) {
+		assert(false);
+	}
+}
+
+static int
+spdk_bdev_scsi_read(struct spdk_bdev *bdev, struct spdk_bdev_desc *bdev_desc,
+		    struct spdk_io_channel *bdev_ch, struct spdk_scsi_task *task,
+		    uint64_t lba, uint32_t len)
+{
+	uint64_t blen;
+	uint64_t offset;
+	uint64_t nbytes;
+	int rc;
+
+	blen = spdk_bdev_get_block_size(bdev);
+
+	lba += (task->offset / blen);
+	offset = lba * blen;
+	nbytes = task->length;
+
+	SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+		      "Read: lba=%"PRIu64", len=%"PRIu64"\n",
+		      lba, (uint64_t)task->length / blen);
+
+	rc = spdk_bdev_readv(bdev_desc, bdev_ch, task->iovs,
+			     task->iovcnt, offset, nbytes,
+			     spdk_bdev_scsi_task_complete_cmd, task);
+
+	if (rc) {
+		if (rc == -ENOMEM) {
+			spdk_bdev_scsi_queue_io(task, spdk_bdev_scsi_process_block_resubmit, task);
+			return SPDK_SCSI_TASK_PENDING;
+		}
+		SPDK_ERRLOG("spdk_bdev_readv() failed\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	task->data_transferred = nbytes;
+	return SPDK_SCSI_TASK_PENDING;
+}
+
+static int
+spdk_bdev_scsi_write(struct spdk_bdev *bdev, struct spdk_bdev_desc *bdev_desc,
+		     struct spdk_io_channel *bdev_ch, struct spdk_scsi_task *task,
+		     uint64_t lba, uint32_t len)
+{
+	uint64_t blen;
+	uint64_t offset;
+	uint64_t nbytes;
+	int rc;
+
+	blen = spdk_bdev_get_block_size(bdev);
+	offset = lba * blen;
+	nbytes = ((uint64_t)len) * blen;
+
+	SPDK_DEBUGLOG(SPDK_LOG_SCSI,
+		      "Write: lba=%"PRIu64", len=%u\n",
+		      lba, len);
+
+	if (nbytes > task->transfer_len) {
+		SPDK_ERRLOG("nbytes(%zu) > transfer_len(%u)\n",
+			    (size_t)nbytes, task->transfer_len);
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	offset += task->offset;
+	rc = spdk_bdev_writev(bdev_desc, bdev_ch, task->iovs,
+			      task->iovcnt, offset, task->length,
+			      spdk_bdev_scsi_task_complete_cmd,
+			      task);
+
+	if (rc) {
+		if (rc == -ENOMEM) {
+			spdk_bdev_scsi_queue_io(task, spdk_bdev_scsi_process_block_resubmit, task);
+			return SPDK_SCSI_TASK_PENDING;
+		}
+		SPDK_ERRLOG("spdk_bdev_writev failed\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_SCSI, "Wrote %"PRIu64"/%"PRIu64" bytes\n",
+		      (uint64_t)task->length, nbytes);
+
+	task->data_transferred = task->length;
+	return SPDK_SCSI_TASK_PENDING;
+}
+
+static int
+spdk_bdev_scsi_sync(struct spdk_bdev *bdev, struct spdk_bdev_desc *bdev_desc,
+		    struct spdk_io_channel *bdev_ch, struct spdk_scsi_task *task,
+		    uint64_t lba, uint32_t num_blocks)
+{
+	uint64_t bdev_num_blocks;
+	int rc;
+
+	if (num_blocks == 0) {
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+
+	if (lba >= bdev_num_blocks || num_blocks > bdev_num_blocks ||
+	    lba > (bdev_num_blocks - num_blocks)) {
+		SPDK_ERRLOG("end of media\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	rc = spdk_bdev_flush_blocks(bdev_desc, bdev_ch, lba, num_blocks,
+				    spdk_bdev_scsi_task_complete_cmd, task);
+
+	if (rc) {
+		if (rc == -ENOMEM) {
+			spdk_bdev_scsi_queue_io(task, spdk_bdev_scsi_process_block_resubmit, task);
+			return SPDK_SCSI_TASK_PENDING;
+		}
+		SPDK_ERRLOG("spdk_bdev_flush_blocks() failed\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+	task->data_transferred = 0;
+	return SPDK_SCSI_TASK_PENDING;
+}
+
+static int
+spdk_bdev_scsi_readwrite(struct spdk_scsi_task *task,
+			 uint64_t lba, uint32_t xfer_len, bool is_read)
+{
+	struct spdk_scsi_lun *lun = task->lun;
+	struct spdk_bdev *bdev = lun->bdev;
+	struct spdk_bdev_desc *bdev_desc = lun->bdev_desc;
+	struct spdk_io_channel *bdev_ch = lun->io_channel;
+	uint64_t bdev_num_blocks;
+	uint32_t max_xfer_len;
+
+	task->data_transferred = 0;
+
+	if (spdk_unlikely(task->dxfer_dir != SPDK_SCSI_DIR_NONE &&
+			  task->dxfer_dir != (is_read ? SPDK_SCSI_DIR_FROM_DEV : SPDK_SCSI_DIR_TO_DEV))) {
+		SPDK_ERRLOG("Incorrect data direction\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_NO_SENSE,
+					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
+	if (spdk_unlikely(bdev_num_blocks <= lba || bdev_num_blocks - lba < xfer_len)) {
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "end of media\n");
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	if (spdk_unlikely(xfer_len == 0)) {
+		task->status = SPDK_SCSI_STATUS_GOOD;
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	/* Transfer Length is limited to the Block Limits VPD page Maximum Transfer Length */
+	max_xfer_len = SPDK_WORK_BLOCK_SIZE / spdk_bdev_get_block_size(bdev);
+	if (spdk_unlikely(xfer_len > max_xfer_len)) {
+		SPDK_ERRLOG("xfer_len %" PRIu32 " > maximum transfer length %" PRIu32 "\n",
+			    xfer_len, max_xfer_len);
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	if (is_read) {
+		return spdk_bdev_scsi_read(bdev, bdev_desc, bdev_ch, task, lba, xfer_len);
+	} else {
+		return spdk_bdev_scsi_write(bdev, bdev_desc, bdev_ch, task, lba, xfer_len);
+	}
+}
+
+struct spdk_bdev_scsi_unmap_ctx {
+	struct spdk_scsi_task		*task;
+	struct spdk_scsi_unmap_bdesc	desc[DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT];
+	uint32_t			count;
+};
+
+static int spdk_bdev_scsi_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *bdev_desc,
+				struct spdk_io_channel *bdev_ch, struct spdk_scsi_task *task, struct spdk_bdev_scsi_unmap_ctx *ctx);
+
+static void
+spdk_bdev_scsi_task_complete_unmap_cmd(struct spdk_bdev_io *bdev_io, bool success,
+				       void *cb_arg)
+{
+	struct spdk_bdev_scsi_unmap_ctx *ctx = cb_arg;
+	struct spdk_scsi_task *task = ctx->task;
+	int sc, sk, asc, ascq;
+
+	ctx->count--;
+
+	task->bdev_io = bdev_io;
+
+	if (task->status == SPDK_SCSI_STATUS_GOOD) {
+		spdk_bdev_io_get_scsi_status(bdev_io, &sc, &sk, &asc, &ascq);
+		spdk_scsi_task_set_status(task, sc, sk, asc, ascq);
+	}
+
+	if (ctx->count == 0) {
+		spdk_scsi_lun_complete_task(task->lun, task);
+		free(ctx);
+	}
+}
+
+static int
+__copy_desc(struct spdk_bdev_scsi_unmap_ctx *ctx, uint8_t *data, size_t data_len)
+{
+	uint16_t	desc_data_len;
+	uint16_t	desc_count;
+
+	if (!data) {
+		return -EINVAL;
+	}
+
+	if (data_len < 8) {
+		/* We can't even get the reported length, so fail. */
+		return -EINVAL;
+	}
+
+	desc_data_len = from_be16(&data[2]);
+	desc_count = desc_data_len / 16;
+
+	if (desc_data_len > (data_len - 8)) {
+		SPDK_ERRLOG("Error - desc_data_len (%u) > data_len (%lu) - 8\n",
+			    desc_data_len, data_len);
+		return -EINVAL;
+	}
+
+	if (desc_count > DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT) {
+		SPDK_ERRLOG("desc_count (%u) greater than max allowed (%u)\n",
+			    desc_count, DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->desc, &data[8], desc_data_len);
+	return desc_count;
+}
+
+static void
+spdk_bdev_scsi_unmap_resubmit(void *arg)
+{
+	struct spdk_bdev_scsi_unmap_ctx	*ctx = arg;
+	struct spdk_scsi_task *task = ctx->task;
+	struct spdk_scsi_lun *lun = task->lun;
+
+	spdk_bdev_scsi_unmap(lun->bdev, lun->bdev_desc, lun->io_channel, task, ctx);
+}
+
+static int
+spdk_bdev_scsi_unmap(struct spdk_bdev *bdev, struct spdk_bdev_desc *bdev_desc,
+		     struct spdk_io_channel *bdev_ch, struct spdk_scsi_task *task, struct spdk_bdev_scsi_unmap_ctx *ctx)
+{
+	uint8_t				*data;
+	int				desc_count, i;
+	int				data_len;
+	int				rc;
+
+	assert(task->status == SPDK_SCSI_STATUS_GOOD);
+
+	if (ctx == NULL) {
+		ctx = calloc(1, sizeof(*ctx));
+		if (!ctx) {
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_NO_SENSE,
+						  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			return SPDK_SCSI_TASK_COMPLETE;
+		}
+
+		ctx->task = task;
+		ctx->count = 0;
+	}
+
+
+	if (task->iovcnt == 1) {
+		data = (uint8_t *)task->iovs[0].iov_base;
+		data_len = task->iovs[0].iov_len;
+		desc_count = __copy_desc(ctx, data, data_len);
+	} else {
+		data = spdk_scsi_task_gather_data(task, &data_len);
+		desc_count = __copy_desc(ctx, data, data_len);
+		if (desc_count < 0) {
+			spdk_dma_free(data);
+		}
+	}
+
+	if (desc_count < 0) {
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		free(ctx);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	for (i = ctx->count; i < desc_count; i++) {
+		struct spdk_scsi_unmap_bdesc	*desc;
+		uint64_t offset_blocks;
+		uint64_t num_blocks;
+
+		desc = &ctx->desc[i];
+
+		offset_blocks = from_be64(&desc->lba);
+		num_blocks = from_be32(&desc->block_count);
+
+		if (num_blocks == 0) {
+			continue;
+		}
+
+		ctx->count++;
+		rc = spdk_bdev_unmap_blocks(bdev_desc, bdev_ch, offset_blocks, num_blocks,
+					    spdk_bdev_scsi_task_complete_unmap_cmd, ctx);
+
+		if (rc) {
+			if (rc == -ENOMEM) {
+				spdk_bdev_scsi_queue_io(task, spdk_bdev_scsi_unmap_resubmit, ctx);
+				/* Unmap was not yet submitted to bdev */
+				ctx->count--;
+				return SPDK_SCSI_TASK_PENDING;
+			}
+			SPDK_ERRLOG("SCSI Unmapping failed\n");
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_NO_SENSE,
+						  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			ctx->count--;
+			/* We can't complete here - we may have to wait for previously
+			 * submitted unmaps to complete */
+			break;
+		}
+	}
+
+	if (ctx->count == 0) {
+		free(ctx);
+		return SPDK_SCSI_TASK_COMPLETE;
+	}
+
+	return SPDK_SCSI_TASK_PENDING;
+}
+
+static int
+spdk_bdev_scsi_process_block(struct spdk_scsi_task *task)
+{
+	struct spdk_scsi_lun *lun = task->lun;
+	struct spdk_bdev *bdev = lun->bdev;
+	uint64_t lba;
+	uint32_t xfer_len;
+	uint32_t len = 0;
+	uint8_t *cdb = task->cdb;
+
+	/* XXX: We need to support FUA bit for writes! */
+	switch (cdb[0]) {
+	case SPDK_SBC_READ_6:
+	case SPDK_SBC_WRITE_6:
+		lba = (uint64_t)cdb[1] << 16;
+		lba |= (uint64_t)cdb[2] << 8;
+		lba |= (uint64_t)cdb[3];
+		xfer_len = cdb[4];
+		if (xfer_len == 0) {
+			xfer_len = 256;
+		}
+		return spdk_bdev_scsi_readwrite(task, lba, xfer_len,
+						cdb[0] == SPDK_SBC_READ_6);
+
+	case SPDK_SBC_READ_10:
+	case SPDK_SBC_WRITE_10:
+		lba = from_be32(&cdb[2]);
+		xfer_len = from_be16(&cdb[7]);
+		return spdk_bdev_scsi_readwrite(task, lba, xfer_len,
+						cdb[0] == SPDK_SBC_READ_10);
+
+	case SPDK_SBC_READ_12:
+	case SPDK_SBC_WRITE_12:
+		lba = from_be32(&cdb[2]);
+		xfer_len = from_be32(&cdb[6]);
+		return spdk_bdev_scsi_readwrite(task, lba, xfer_len,
+						cdb[0] == SPDK_SBC_READ_12);
+	case SPDK_SBC_READ_16:
+	case SPDK_SBC_WRITE_16:
+		lba = from_be64(&cdb[2]);
+		xfer_len = from_be32(&cdb[10]);
+		return spdk_bdev_scsi_readwrite(task, lba, xfer_len,
+						cdb[0] == SPDK_SBC_READ_16);
+
+	case SPDK_SBC_READ_CAPACITY_10: {
+		uint64_t num_blocks = spdk_bdev_get_num_blocks(bdev);
+		uint8_t buffer[8];
+
+		if (num_blocks - 1 > 0xffffffffULL) {
+			memset(buffer, 0xff, 4);
+		} else {
+			to_be32(buffer, num_blocks - 1);
+		}
+		to_be32(&buffer[4], spdk_bdev_get_block_size(bdev));
+
+		len = spdk_min(task->length, sizeof(buffer));
+		if (spdk_scsi_task_scatter_data(task, buffer, len) < 0) {
+			break;
+		}
+
+		task->data_transferred = len;
+		task->status = SPDK_SCSI_STATUS_GOOD;
+		break;
+	}
+
+	case SPDK_SPC_SERVICE_ACTION_IN_16:
+		switch (cdb[1] & 0x1f) { /* SERVICE ACTION */
+		case SPDK_SBC_SAI_READ_CAPACITY_16: {
+			uint8_t buffer[32] = {0};
+
+			to_be64(&buffer[0], spdk_bdev_get_num_blocks(bdev) - 1);
+			to_be32(&buffer[8], spdk_bdev_get_block_size(bdev));
+			/*
+			 * Set the TPE bit to 1 to indicate thin provisioning.
+			 * The position of TPE bit is the 7th bit in 14th byte
+			 * in READ CAPACITY (16) parameter data.
+			 */
+			if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+				buffer[14] |= 1 << 7;
+			}
+
+			len = spdk_min(from_be32(&cdb[10]), sizeof(buffer));
+			if (spdk_scsi_task_scatter_data(task, buffer, len) < 0) {
+				break;
+			}
+
+			task->data_transferred = len;
+			task->status = SPDK_SCSI_STATUS_GOOD;
+			break;
+		}
+
+		default:
+			return SPDK_SCSI_TASK_UNKNOWN;
+		}
+		break;
+
+	case SPDK_SBC_SYNCHRONIZE_CACHE_10:
+	case SPDK_SBC_SYNCHRONIZE_CACHE_16:
+		if (cdb[0] == SPDK_SBC_SYNCHRONIZE_CACHE_10) {
+			lba = from_be32(&cdb[2]);
+			len = from_be16(&cdb[7]);
+		} else {
+			lba = from_be64(&cdb[2]);
+			len = from_be32(&cdb[10]);
+		}
+
+		if (len == 0) {
+			len = spdk_bdev_get_num_blocks(bdev) - lba;
+		}
+
+		return spdk_bdev_scsi_sync(bdev, lun->bdev_desc, lun->io_channel, task, lba, len);
+		break;
+
+	case SPDK_SBC_UNMAP:
+		return spdk_bdev_scsi_unmap(bdev, lun->bdev_desc, lun->io_channel, task, NULL);
+
+	default:
+		return SPDK_SCSI_TASK_UNKNOWN;
+	}
+
+	return SPDK_SCSI_TASK_COMPLETE;
+}
+
+static void
+spdk_bdev_scsi_process_block_resubmit(void *arg)
+{
+	struct spdk_scsi_task *task = arg;
+
+	spdk_bdev_scsi_process_block(task);
+}
+
+static int
+spdk_bdev_scsi_check_len(struct spdk_scsi_task *task, int len, int min_len)
+{
+	if (len >= min_len) {
+		return 0;
+	}
+
+	/* INVALID FIELD IN CDB */
+	spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+				  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+				  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+				  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+	return -1;
+}
+
+static int
+spdk_bdev_scsi_process_primary(struct spdk_scsi_task *task)
+{
+	struct spdk_scsi_lun *lun = task->lun;
+	struct spdk_bdev *bdev = lun->bdev;
+	int alloc_len = -1;
+	int data_len = -1;
+	uint8_t *cdb = task->cdb;
+	uint8_t *data = NULL;
+	int rc = 0;
+	int pllen, md = 0;
+	int pf, sp;
+	int bdlen = 0, llba;
+	int dbd, pc, page, subpage;
+	int cmd_parsed = 0;
+
+
+	switch (cdb[0]) {
+	case SPDK_SPC_INQUIRY:
+		alloc_len = from_be16(&cdb[3]);
+		data_len = spdk_max(4096, alloc_len);
+		data = spdk_dma_zmalloc(data_len, 0, NULL);
+		assert(data != NULL);
+		rc = spdk_bdev_scsi_inquiry(bdev, task, cdb, data, data_len);
+		data_len = spdk_min(rc, data_len);
+		if (rc < 0) {
+			break;
+		}
+
+		SPDK_TRACEDUMP(SPDK_LOG_SCSI, "INQUIRY", data, data_len);
+		break;
+
+	case SPDK_SPC_REPORT_LUNS: {
+		int sel;
+
+		sel = cdb[2];
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "sel=%x\n", sel);
+
+		alloc_len = from_be32(&cdb[6]);
+		rc = spdk_bdev_scsi_check_len(task, alloc_len, 16);
+		if (rc < 0) {
+			break;
+		}
+
+		data_len = spdk_max(4096, alloc_len);
+		data = spdk_dma_zmalloc(data_len, 0, NULL);
+		assert(data != NULL);
+		rc = spdk_bdev_scsi_report_luns(task->lun, sel, data, data_len);
+		data_len = rc;
+		if (rc < 0) {
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_NO_SENSE,
+						  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			break;
+		}
+
+		SPDK_TRACEDUMP(SPDK_LOG_SCSI, "REPORT LUNS", data, data_len);
+		break;
+	}
+
+	case SPDK_SPC_MODE_SELECT_6:
+	case SPDK_SPC_MODE_SELECT_10:
+		if (cdb[0] == SPDK_SPC_MODE_SELECT_6) {
+			/* MODE_SELECT(6) must have at least a 4 byte header. */
+			md = 4;
+			pllen = cdb[4];
+		} else {
+			/* MODE_SELECT(10) must have at least an 8 byte header. */
+			md = 8;
+			pllen = from_be16(&cdb[7]);
+		}
+
+		if (pllen == 0) {
+			break;
+		}
+
+		rc = spdk_bdev_scsi_check_len(task, pllen, md);
+		if (rc < 0) {
+			break;
+		}
+
+		data = spdk_scsi_task_gather_data(task, &rc);
+		if (rc < 0) {
+			break;
+		}
+
+		data_len = rc;
+		if (cdb[0] == SPDK_SPC_MODE_SELECT_6) {
+			rc = spdk_bdev_scsi_check_len(task, data_len, 4);
+			if (rc >= 0) {
+				bdlen = data[3];
+			}
+
+		} else {
+			rc = spdk_bdev_scsi_check_len(task, data_len, 8);
+			if (rc >= 0) {
+				bdlen = from_be16(&data[6]);
+			}
+		}
+
+		if (rc < 0) {
+			break;
+		}
+		pf = !!(cdb[1] & 0x10);
+		sp = !!(cdb[1] & 0x1);
+
+		/* page data */
+		rc = spdk_bdev_scsi_mode_select_page(
+			     bdev, cdb,
+			     pf, sp,
+			     &data[md + bdlen],
+			     pllen - (md + bdlen));
+		if (rc < 0) {
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_NO_SENSE,
+						  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			break;
+		}
+
+		rc = pllen;
+		data_len = 0;
+		break;
+
+	case SPDK_SPC_MODE_SENSE_6:
+		alloc_len = cdb[4];
+		md = 6;
+	/* FALLTHROUGH */
+	case SPDK_SPC_MODE_SENSE_10:
+		llba = 0;
+
+		if (md == 0) {
+			alloc_len = from_be16(&cdb[7]);
+			llba = !!(cdb[1] & 0x10);
+			md = 10;
+		}
+
+		dbd = !!(cdb[1] & 0x8);
+		pc = (cdb[2] & 0xc0) >> 6;
+		page = cdb[2] & 0x3f;
+		subpage = cdb[3];
+
+		/* First call with no buffer to discover needed buffer size */
+		rc = spdk_bdev_scsi_mode_sense(bdev, md,
+					       cdb, dbd, llba, pc,
+					       page, subpage,
+					       NULL, task);
+		if (rc < 0) {
+			break;
+		}
+
+		data_len = rc;
+		data = spdk_dma_zmalloc(data_len, 0, NULL);
+		assert(data != NULL);
+
+		/* First call with no buffer to discover needed buffer size */
+		rc = spdk_bdev_scsi_mode_sense(bdev, md,
+					       cdb, dbd, llba, pc,
+					       page, subpage,
+					       data, task);
+		if (rc < 0) {
+			/* INVALID FIELD IN CDB */
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+						  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			break;
+		}
+		break;
+
+	case SPDK_SPC_REQUEST_SENSE: {
+		int desc;
+		int sk, asc, ascq;
+
+		desc = cdb[1] & 0x1;
+		if (desc != 0) {
+			/* INVALID FIELD IN CDB */
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+						  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			break;
+		}
+
+		alloc_len = cdb[4];
+
+		/* NO ADDITIONAL SENSE INFORMATION */
+		sk = SPDK_SCSI_SENSE_NO_SENSE;
+		asc = 0x00;
+		ascq = 0x00;
+
+		spdk_scsi_task_build_sense_data(task, sk, asc, ascq);
+
+		data_len = task->sense_data_len;
+		data = spdk_dma_zmalloc(data_len, 0, NULL);
+		assert(data != NULL);
+		memcpy(data, task->sense_data, data_len);
+		break;
+	}
+
+	case SPDK_SPC_LOG_SELECT:
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "LOG_SELECT\n");
+		cmd_parsed = 1;
+	/* FALLTHROUGH */
+	case SPDK_SPC_LOG_SENSE:
+		if (!cmd_parsed) {
+			SPDK_DEBUGLOG(SPDK_LOG_SCSI, "LOG_SENSE\n");
+		}
+
+		/* INVALID COMMAND OPERATION CODE */
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		rc = -1;
+		break;
+
+	case SPDK_SPC_TEST_UNIT_READY:
+		SPDK_DEBUGLOG(SPDK_LOG_SCSI, "TEST_UNIT_READY\n");
+		cmd_parsed = 1;
+	/* FALLTHROUGH */
+	case SPDK_SBC_START_STOP_UNIT:
+		if (!cmd_parsed) {
+			SPDK_DEBUGLOG(SPDK_LOG_SCSI, "START_STOP_UNIT\n");
+		}
+
+		rc = 0;
+		break;
+
+	default:
+		return SPDK_SCSI_TASK_UNKNOWN;
+	}
+
+	if (rc >= 0 && data_len > 0) {
+		assert(alloc_len >= 0);
+		spdk_scsi_task_scatter_data(task, data, spdk_min(alloc_len, data_len));
+		rc = spdk_min(data_len, alloc_len);
+	}
+
+	if (rc >= 0) {
+		task->data_transferred = rc;
+		task->status = SPDK_SCSI_STATUS_GOOD;
+	}
+
+	if (data) {
+		spdk_dma_free(data);
+	}
+
+	return SPDK_SCSI_TASK_COMPLETE;
+}
+
+int
+spdk_bdev_scsi_execute(struct spdk_scsi_task *task)
+{
+	int rc;
+
+	if ((rc = spdk_bdev_scsi_process_block(task)) == SPDK_SCSI_TASK_UNKNOWN) {
+		if ((rc = spdk_bdev_scsi_process_primary(task)) == SPDK_SCSI_TASK_UNKNOWN) {
+			SPDK_DEBUGLOG(SPDK_LOG_SCSI, "unsupported SCSI OP=0x%x\n", task->cdb[0]);
+			/* INVALID COMMAND OPERATION CODE */
+			spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+						  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+						  SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE,
+						  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			return SPDK_SCSI_TASK_COMPLETE;
+		}
+	}
+
+	return rc;
+}
+
+static void
+spdk_bdev_scsi_reset_resubmit(void *arg)
+{
+	struct spdk_scsi_task *task = arg;
+
+	spdk_bdev_scsi_reset(task);
+}
+
+void
+spdk_bdev_scsi_reset(struct spdk_scsi_task *task)
+{
+	struct spdk_scsi_lun *lun = task->lun;
+	int rc;
+
+	rc = spdk_bdev_reset(lun->bdev_desc, lun->io_channel, spdk_bdev_scsi_task_complete_mgmt, task);
+	if (rc == -ENOMEM) {
+		spdk_bdev_scsi_queue_io(task, spdk_bdev_scsi_reset_resubmit, task);
+	}
+}
diff --git a/src/spdk/lib/scsi/scsi_internal.h b/src/spdk/lib/scsi/scsi_internal.h
new file mode 100644
index 00000000..85caf762
--- /dev/null
+++ b/src/spdk/lib/scsi/scsi_internal.h
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_SCSI_INTERNAL_H
+#define SPDK_SCSI_INTERNAL_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/scsi.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/trace.h"
+
+#include "spdk_internal/log.h"
+
+enum {
+	SPDK_SCSI_TASK_UNKNOWN = -1,
+	SPDK_SCSI_TASK_COMPLETE,
+	SPDK_SCSI_TASK_PENDING,
+};
+
+struct spdk_scsi_port {
+	uint8_t			is_used;
+	uint64_t		id;
+	uint16_t		index;
+	char			name[SPDK_SCSI_PORT_MAX_NAME_LENGTH];
+};
+
+struct spdk_scsi_dev {
+	int			id;
+	int			is_allocated;
+	bool			removed;
+
+	char			name[SPDK_SCSI_DEV_MAX_NAME + 1];
+
+	struct spdk_scsi_lun	*lun[SPDK_SCSI_DEV_MAX_LUN];
+
+	int			num_ports;
+	struct spdk_scsi_port	port[SPDK_SCSI_DEV_MAX_PORTS];
+
+	uint8_t			protocol_id;
+};
+
+struct spdk_scsi_desc {
+	struct spdk_scsi_lun		*lun;
+	spdk_scsi_remove_cb_t		hotremove_cb;
+	void				*hotremove_ctx;
+	TAILQ_ENTRY(spdk_scsi_desc)	link;
+};
+
+struct spdk_scsi_lun {
+	/** LUN id for this logical unit. */
+	int id;
+
+	/** Pointer to the SCSI device containing this LUN. */
+	struct spdk_scsi_dev *dev;
+
+	/** The bdev associated with this LUN. */
+	struct spdk_bdev *bdev;
+
+	/** Descriptor for opened block device. */
+	struct spdk_bdev_desc *bdev_desc;
+
+	/** I/O channel for the bdev associated with this LUN. */
+	struct spdk_io_channel *io_channel;
+
+	/**  The reference number for this LUN, thus we can correctly free the io_channel */
+	uint32_t ref;
+
+	/** Poller to release the resource of the lun when it is hot removed */
+	struct spdk_poller *hotremove_poller;
+
+	/** The LUN is removed */
+	bool removed;
+
+	/** Callback to be fired when LUN removal is first triggered. */
+	void (*hotremove_cb)(const struct spdk_scsi_lun *lun, void *arg);
+
+	/** Argument for hotremove_cb */
+	void *hotremove_ctx;
+
+	/** List of open descriptors for this LUN. */
+	TAILQ_HEAD(, spdk_scsi_desc) open_descs;
+
+	/** pending tasks */
+	TAILQ_HEAD(tasks, spdk_scsi_task) tasks;
+};
+
+struct spdk_lun_db_entry {
+	struct spdk_scsi_lun *lun;
+	struct spdk_lun_db_entry *next;
+};
+
+extern struct spdk_lun_db_entry *spdk_scsi_lun_list_head;
+
+/* This typedef exists to work around an astyle 2.05 bug.
+ * Remove it when astyle is fixed.
+ */
+typedef struct spdk_scsi_lun _spdk_scsi_lun;
+
+_spdk_scsi_lun *spdk_scsi_lun_construct(struct spdk_bdev *bdev,
+					void (*hotremove_cb)(const struct spdk_scsi_lun *, void *),
+					void *hotremove_ctx);
+void spdk_scsi_lun_destruct(struct spdk_scsi_lun *lun);
+
+void spdk_scsi_lun_execute_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task);
+int spdk_scsi_lun_task_mgmt_execute(struct spdk_scsi_task *task, enum spdk_scsi_task_func func);
+void spdk_scsi_lun_complete_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task);
+void spdk_scsi_lun_complete_mgmt_task(struct spdk_scsi_lun *lun, struct spdk_scsi_task *task);
+bool spdk_scsi_lun_has_pending_tasks(const struct spdk_scsi_lun *lun);
+int _spdk_scsi_lun_allocate_io_channel(struct spdk_scsi_lun *lun);
+void _spdk_scsi_lun_free_io_channel(struct spdk_scsi_lun *lun);
+
+struct spdk_scsi_dev *spdk_scsi_dev_get_list(void);
+
+int spdk_scsi_port_construct(struct spdk_scsi_port *port, uint64_t id,
+			     uint16_t index, const char *name);
+void spdk_scsi_port_destruct(struct spdk_scsi_port *port);
+
+int spdk_bdev_scsi_execute(struct spdk_scsi_task *task);
+void spdk_bdev_scsi_reset(struct spdk_scsi_task *task);
+
+struct spdk_scsi_globals {
+	pthread_mutex_t mutex;
+};
+
+extern struct spdk_scsi_globals g_spdk_scsi;
+
+#endif /* SPDK_SCSI_INTERNAL_H */
diff --git a/src/spdk/lib/scsi/scsi_rpc.c b/src/spdk/lib/scsi/scsi_rpc.c
new file mode 100644
index 00000000..150069a9
--- /dev/null
+++ b/src/spdk/lib/scsi/scsi_rpc.c
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+static void
+spdk_rpc_get_scsi_devices(struct spdk_jsonrpc_request *request,
+			  const struct spdk_json_val *params)
+{
+	struct spdk_json_write_ctx *w;
+	struct spdk_scsi_dev *devs = spdk_scsi_dev_get_list();
+	int i;
+
+	if (params != NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "get_scsi_devices requires no parameters");
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	for (i = 0; i < SPDK_SCSI_MAX_DEVS; i++) {
+		struct spdk_scsi_dev *dev = &devs[i];
+
+		if (!dev->is_allocated) {
+			continue;
+		}
+
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "id");
+		spdk_json_write_int32(w, dev->id);
+
+		spdk_json_write_name(w, "device_name");
+		spdk_json_write_string(w, dev->name);
+
+		spdk_json_write_object_end(w);
+	}
+	spdk_json_write_array_end(w);
+
+	spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_scsi_devices", spdk_rpc_get_scsi_devices, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/scsi/task.c b/src/spdk/lib/scsi/task.c
new file mode 100644
index 00000000..6ddc0085
--- /dev/null
+++ b/src/spdk/lib/scsi/task.c
@@ -0,0 +1,256 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "scsi_internal.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/util.h"
+
+static void
+spdk_scsi_task_free_data(struct spdk_scsi_task *task);
+
+void
+spdk_scsi_task_put(struct spdk_scsi_task *task)
+{
+	if (!task) {
+		return;
+	}
+
+	task->ref--;
+
+	if (task->ref == 0) {
+		struct spdk_bdev_io *bdev_io = task->bdev_io;
+
+		if (bdev_io) {
+			spdk_bdev_free_io(bdev_io);
+		}
+
+		spdk_scsi_task_free_data(task);
+
+		task->free_fn(task);
+	}
+}
+
+void
+spdk_scsi_task_construct(struct spdk_scsi_task *task,
+			 spdk_scsi_task_cpl cpl_fn,
+			 spdk_scsi_task_free free_fn)
+{
+	assert(task != NULL);
+	assert(cpl_fn != NULL);
+	assert(free_fn != NULL);
+
+	task->cpl_fn = cpl_fn;
+	task->free_fn = free_fn;
+
+	task->ref++;
+
+	/*
+	 * Pre-fill the iov_buffers to point to the embedded iov
+	 */
+	assert(task->iov.iov_base == NULL);
+	task->iovs = &task->iov;
+	task->iovcnt = 1;
+}
+
+static void
+spdk_scsi_task_free_data(struct spdk_scsi_task *task)
+{
+	if (task->alloc_len != 0) {
+		spdk_dma_free(task->iov.iov_base);
+		task->alloc_len = 0;
+	}
+
+	task->iov.iov_base = NULL;
+	task->iov.iov_len = 0;
+}
+
+static void *
+spdk_scsi_task_alloc_data(struct spdk_scsi_task *task, uint32_t alloc_len)
+{
+	assert(task->alloc_len == 0);
+
+	task->iov.iov_base = spdk_dma_zmalloc(alloc_len, 0, NULL);
+	task->iov.iov_len = alloc_len;
+	task->alloc_len = alloc_len;
+
+	return task->iov.iov_base;
+}
+
+int
+spdk_scsi_task_scatter_data(struct spdk_scsi_task *task, const void *src, size_t buf_len)
+{
+	size_t len = 0;
+	size_t buf_left = buf_len;
+	int i;
+	struct iovec *iovs = task->iovs;
+	const uint8_t *pos;
+
+	if (buf_len == 0) {
+		return 0;
+	}
+
+	if (task->iovcnt == 1 && iovs[0].iov_base == NULL) {
+		spdk_scsi_task_alloc_data(task, buf_len);
+		iovs[0] = task->iov;
+	}
+
+	for (i = 0; i < task->iovcnt; i++) {
+		assert(iovs[i].iov_base != NULL);
+		len += iovs[i].iov_len;
+	}
+
+	if (len < buf_len) {
+		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
+					  SPDK_SCSI_SENSE_ILLEGAL_REQUEST,
+					  SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB,
+					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		return -1;
+	}
+
+	pos = src;
+
+	for (i = 0; i < task->iovcnt; i++) {
+		len = spdk_min(iovs[i].iov_len, buf_left);
+		buf_left -= len;
+		memcpy(iovs[i].iov_base, pos, len);
+		pos += len;
+	}
+
+	return buf_len;
+}
+
+void *
+spdk_scsi_task_gather_data(struct spdk_scsi_task *task, int *len)
+{
+	int i;
+	struct iovec *iovs = task->iovs;
+	size_t buf_len = 0;
+	uint8_t *buf, *pos;
+
+	for (i = 0; i < task->iovcnt; i++) {
+		assert(iovs[i].iov_base != NULL);
+		buf_len += iovs[i].iov_len;
+	}
+
+	if (buf_len == 0) {
+		*len = 0;
+		return NULL;
+	}
+
+	buf = spdk_dma_malloc(buf_len, 0, NULL);
+	if (buf == NULL) {
+		*len = -1;
+		return NULL;
+	}
+
+	pos = buf;
+	for (i = 0; i < task->iovcnt; i++) {
+		memcpy(pos, iovs[i].iov_base, iovs[i].iov_len);
+		pos += iovs[i].iov_len;
+	}
+
+	*len = buf_len;
+	return buf;
+}
+
+void
+spdk_scsi_task_set_data(struct spdk_scsi_task *task, void *data, uint32_t len)
+{
+	assert(task->iovcnt == 1);
+	assert(task->alloc_len == 0);
+
+	task->iovs[0].iov_base = data;
+	task->iovs[0].iov_len = len;
+}
+
+void
+spdk_scsi_task_build_sense_data(struct spdk_scsi_task *task, int sk, int asc, int ascq)
+{
+	uint8_t *cp;
+	int resp_code;
+
+	resp_code = 0x70; /* Current + Fixed format */
+
+	/* Sense Data */
+	cp = task->sense_data;
+
+	/* VALID(7) RESPONSE CODE(6-0) */
+	cp[0] = 0x80 | resp_code;
+	/* Obsolete */
+	cp[1] = 0;
+	/* FILEMARK(7) EOM(6) ILI(5) SENSE KEY(3-0) */
+	cp[2] = sk & 0xf;
+	/* INFORMATION */
+	memset(&cp[3], 0, 4);
+
+	/* ADDITIONAL SENSE LENGTH */
+	cp[7] = 10;
+
+	/* COMMAND-SPECIFIC INFORMATION */
+	memset(&cp[8], 0, 4);
+	/* ADDITIONAL SENSE CODE */
+	cp[12] = asc;
+	/* ADDITIONAL SENSE CODE QUALIFIER */
+	cp[13] = ascq;
+	/* FIELD REPLACEABLE UNIT CODE */
+	cp[14] = 0;
+
+	/* SKSV(7) SENSE KEY SPECIFIC(6-0,7-0,7-0) */
+	cp[15] = 0;
+	cp[16] = 0;
+	cp[17] = 0;
+
+	/* SenseLength */
+	task->sense_data_len = 18;
+}
+
+void
+spdk_scsi_task_set_status(struct spdk_scsi_task *task, int sc, int sk,
+			  int asc, int ascq)
+{
+	if (sc == SPDK_SCSI_STATUS_CHECK_CONDITION) {
+		spdk_scsi_task_build_sense_data(task, sk, asc, ascq);
+	}
+	task->status = sc;
+}
+
+void
+spdk_scsi_task_copy_status(struct spdk_scsi_task *dst,
+			   struct spdk_scsi_task *src)
+{
+	memcpy(dst->sense_data, src->sense_data, src->sense_data_len);
+	dst->sense_data_len = src->sense_data_len;
+	dst->status = src->status;
+}
diff --git a/src/spdk/lib/sock/Makefile b/src/spdk/lib/sock/Makefile
new file mode 100644
index 00000000..8860556d
--- /dev/null
+++ b/src/spdk/lib/sock/Makefile
@@ -0,0 +1,44 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = sock.c net_framework.c
+
+LIBNAME = sock
+
+DIRS-y += posix
+DIRS-$(CONFIG_VPP) += vpp
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/sock/net_framework.c b/src/spdk/lib/sock/net_framework.c
new file mode 100644
index 00000000..5d5a568f
--- /dev/null
+++ b/src/spdk/lib/sock/net_framework.c
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/log.h"
+#include "spdk/net.h"
+#include "spdk/queue.h"
+
+static STAILQ_HEAD(, spdk_net_framework) g_net_frameworks =
+	STAILQ_HEAD_INITIALIZER(g_net_frameworks);
+
+int spdk_net_framework_start(void)
+{
+	struct spdk_net_framework *net_framework = NULL;
+	int rc;
+
+	STAILQ_FOREACH_FROM(net_framework, &g_net_frameworks, link) {
+		rc = net_framework->init();
+		if (rc != 0) {
+			SPDK_ERRLOG("Net framework %s failed to initalize\n", net_framework->name);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+void spdk_net_framework_fini(void)
+{
+	struct spdk_net_framework *net_framework = NULL;
+
+	STAILQ_FOREACH_FROM(net_framework, &g_net_frameworks, link) {
+		net_framework->fini();
+	}
+}
+
+void
+spdk_net_framework_register(struct spdk_net_framework *frame)
+{
+	STAILQ_INSERT_TAIL(&g_net_frameworks, frame, link);
+}
diff --git a/src/spdk/lib/sock/posix/Makefile b/src/spdk/lib/sock/posix/Makefile
new file mode 100644
index 00000000..540694c4
--- /dev/null
+++ b/src/spdk/lib/sock/posix/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+LIBNAME = sock_posix
+C_SRCS = posix.c
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/sock/posix/posix.c b/src/spdk/lib/sock/posix/posix.c
new file mode 100644
index 00000000..565d3892
--- /dev/null
+++ b/src/spdk/lib/sock/posix/posix.c
@@ -0,0 +1,604 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#if defined(__linux__)
+#include <sys/epoll.h>
+#elif defined(__FreeBSD__)
+#include <sys/event.h>
+#endif
+
+#include "spdk/log.h"
+#include "spdk/sock.h"
+#include "spdk_internal/sock.h"
+
+#define MAX_TMPBUF 1024
+#define PORTNUMLEN 32
+
+struct spdk_posix_sock {
+	struct spdk_sock	base;
+	int			fd;
+};
+
+struct spdk_posix_sock_group_impl {
+	struct spdk_sock_group_impl	base;
+	int				fd;
+};
+
+static int
+get_addr_str(struct sockaddr *sa, char *host, size_t hlen)
+{
+	const char *result = NULL;
+
+	if (sa == NULL || host == NULL) {
+		return -1;
+	}
+
+	switch (sa->sa_family) {
+	case AF_INET:
+		result = inet_ntop(AF_INET, &(((struct sockaddr_in *)sa)->sin_addr),
+				   host, hlen);
+		break;
+	case AF_INET6:
+		result = inet_ntop(AF_INET6, &(((struct sockaddr_in6 *)sa)->sin6_addr),
+				   host, hlen);
+		break;
+	default:
+		break;
+	}
+
+	if (result != NULL) {
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+#define __posix_sock(sock) (struct spdk_posix_sock *)sock
+#define __posix_group_impl(group) (struct spdk_posix_sock_group_impl *)group
+
+static int
+spdk_posix_sock_getaddr(struct spdk_sock *_sock, char *saddr, int slen, uint16_t *sport,
+			char *caddr, int clen, uint16_t *cport)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	struct sockaddr_storage sa;
+	socklen_t salen;
+	int rc;
+
+	assert(sock != NULL);
+
+	memset(&sa, 0, sizeof sa);
+	salen = sizeof sa;
+	rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	switch (sa.ss_family) {
+	case AF_UNIX:
+		/* Acceptable connection types that don't have IPs */
+		return 0;
+	case AF_INET:
+	case AF_INET6:
+		/* Code below will get IP addresses */
+		break;
+	default:
+		/* Unsupported socket family */
+		return -1;
+	}
+
+	rc = get_addr_str((struct sockaddr *)&sa, saddr, slen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	if (sport) {
+		if (sa.ss_family == AF_INET) {
+			*sport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
+		} else if (sa.ss_family == AF_INET6) {
+			*sport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
+		}
+	}
+
+	memset(&sa, 0, sizeof sa);
+	salen = sizeof sa;
+	rc = getpeername(sock->fd, (struct sockaddr *) &sa, &salen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getpeername() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	rc = get_addr_str((struct sockaddr *)&sa, caddr, clen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	if (cport) {
+		if (sa.ss_family == AF_INET) {
+			*cport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
+		} else if (sa.ss_family == AF_INET6) {
+			*cport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
+		}
+	}
+
+	return 0;
+}
+
+enum spdk_posix_sock_create_type {
+	SPDK_SOCK_CREATE_LISTEN,
+	SPDK_SOCK_CREATE_CONNECT,
+};
+
+static struct spdk_sock *
+spdk_posix_sock_create(const char *ip, int port, enum spdk_posix_sock_create_type type)
+{
+	struct spdk_posix_sock *sock;
+	char buf[MAX_TMPBUF];
+	char portnum[PORTNUMLEN];
+	char *p;
+	struct addrinfo hints, *res, *res0;
+	int fd, flag;
+	int val = 1;
+	int rc;
+
+	if (ip == NULL) {
+		return NULL;
+	}
+	if (ip[0] == '[') {
+		snprintf(buf, sizeof(buf), "%s", ip + 1);
+		p = strchr(buf, ']');
+		if (p != NULL) {
+			*p = '\0';
+		}
+		ip = (const char *) &buf[0];
+	}
+
+	snprintf(portnum, sizeof portnum, "%d", port);
+	memset(&hints, 0, sizeof hints);
+	hints.ai_family = PF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_flags = AI_NUMERICSERV;
+	hints.ai_flags |= AI_PASSIVE;
+	hints.ai_flags |= AI_NUMERICHOST;
+	rc = getaddrinfo(ip, portnum, &hints, &res0);
+	if (rc != 0) {
+		SPDK_ERRLOG("getaddrinfo() failed (errno=%d)\n", errno);
+		return NULL;
+	}
+
+	/* try listen */
+	fd = -1;
+	for (res = res0; res != NULL; res = res->ai_next) {
+retry:
+		fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
+		if (fd < 0) {
+			/* error */
+			continue;
+		}
+		rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
+		if (rc != 0) {
+			close(fd);
+			/* error */
+			continue;
+		}
+		rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val);
+		if (rc != 0) {
+			close(fd);
+			/* error */
+			continue;
+		}
+
+		if (res->ai_family == AF_INET6) {
+			rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val);
+			if (rc != 0) {
+				close(fd);
+				/* error */
+				continue;
+			}
+		}
+
+		if (type == SPDK_SOCK_CREATE_LISTEN) {
+			rc = bind(fd, res->ai_addr, res->ai_addrlen);
+			if (rc != 0) {
+				SPDK_ERRLOG("bind() failed, errno = %d\n", errno);
+				switch (errno) {
+				case EINTR:
+					/* interrupted? */
+					close(fd);
+					goto retry;
+				case EADDRNOTAVAIL:
+					SPDK_ERRLOG("IP address %s not available. "
+						    "Verify IP address in config file "
+						    "and make sure setup script is "
+						    "run before starting spdk app.\n", ip);
+				/* FALLTHROUGH */
+				default:
+					/* try next family */
+					close(fd);
+					fd = -1;
+					continue;
+				}
+			}
+			/* bind OK */
+			rc = listen(fd, 512);
+			if (rc != 0) {
+				SPDK_ERRLOG("listen() failed, errno = %d\n", errno);
+				close(fd);
+				fd = -1;
+				break;
+			}
+		} else if (type == SPDK_SOCK_CREATE_CONNECT) {
+			rc = connect(fd, res->ai_addr, res->ai_addrlen);
+			if (rc != 0) {
+				SPDK_ERRLOG("connect() failed, errno = %d\n", errno);
+				/* try next family */
+				close(fd);
+				fd = -1;
+				continue;
+			}
+		}
+
+		flag = fcntl(fd, F_GETFL);
+		if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
+			SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno);
+			close(fd);
+			fd = -1;
+			break;
+		}
+		break;
+	}
+	freeaddrinfo(res0);
+
+	if (fd < 0) {
+		return NULL;
+	}
+
+	sock = calloc(1, sizeof(*sock));
+	if (sock == NULL) {
+		SPDK_ERRLOG("sock allocation failed\n");
+		close(fd);
+		return NULL;
+	}
+
+	sock->fd = fd;
+	return &sock->base;
+}
+
+static struct spdk_sock *
+spdk_posix_sock_listen(const char *ip, int port)
+{
+	return spdk_posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN);
+}
+
+static struct spdk_sock *
+spdk_posix_sock_connect(const char *ip, int port)
+{
+	return spdk_posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT);
+}
+
+static struct spdk_sock *
+spdk_posix_sock_accept(struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock		*sock = __posix_sock(_sock);
+	struct sockaddr_storage		sa;
+	socklen_t			salen;
+	int				rc;
+	struct spdk_posix_sock		*new_sock;
+	int				flag;
+
+	memset(&sa, 0, sizeof(sa));
+	salen = sizeof(sa);
+
+	assert(sock != NULL);
+
+	rc = accept(sock->fd, (struct sockaddr *)&sa, &salen);
+
+	if (rc == -1) {
+		return NULL;
+	}
+
+	flag = fcntl(rc, F_GETFL);
+	if ((!(flag & O_NONBLOCK)) && (fcntl(rc, F_SETFL, flag | O_NONBLOCK) < 0)) {
+		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", rc, errno);
+		close(rc);
+		return NULL;
+	}
+
+	new_sock = calloc(1, sizeof(*sock));
+	if (new_sock == NULL) {
+		SPDK_ERRLOG("sock allocation failed\n");
+		close(rc);
+		return NULL;
+	}
+
+	new_sock->fd = rc;
+	return &new_sock->base;
+}
+
+static int
+spdk_posix_sock_close(struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	int rc;
+
+	rc = close(sock->fd);
+	if (rc == 0) {
+		free(sock);
+	}
+
+	return rc;
+}
+
+static ssize_t
+spdk_posix_sock_recv(struct spdk_sock *_sock, void *buf, size_t len)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+
+	return recv(sock->fd, buf, len, MSG_DONTWAIT);
+}
+
+static ssize_t
+spdk_posix_sock_writev(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+
+	return writev(sock->fd, iov, iovcnt);
+}
+
+static int
+spdk_posix_sock_set_recvlowat(struct spdk_sock *_sock, int nbytes)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	int val;
+	int rc;
+
+	assert(sock != NULL);
+
+	val = nbytes;
+	rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVLOWAT, &val, sizeof val);
+	if (rc != 0) {
+		return -1;
+	}
+	return 0;
+}
+
+static int
+spdk_posix_sock_set_recvbuf(struct spdk_sock *_sock, int sz)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+
+	assert(sock != NULL);
+
+	return setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
+			  &sz, sizeof(sz));
+}
+
+static int
+spdk_posix_sock_set_sendbuf(struct spdk_sock *_sock, int sz)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+
+	assert(sock != NULL);
+
+	return setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF,
+			  &sz, sizeof(sz));
+}
+
+static bool
+spdk_posix_sock_is_ipv6(struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	struct sockaddr_storage sa;
+	socklen_t salen;
+	int rc;
+
+	assert(sock != NULL);
+
+	memset(&sa, 0, sizeof sa);
+	salen = sizeof sa;
+	rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
+		return false;
+	}
+
+	return (sa.ss_family == AF_INET6);
+}
+
+static bool
+spdk_posix_sock_is_ipv4(struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	struct sockaddr_storage sa;
+	socklen_t salen;
+	int rc;
+
+	assert(sock != NULL);
+
+	memset(&sa, 0, sizeof sa);
+	salen = sizeof sa;
+	rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
+	if (rc != 0) {
+		SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
+		return false;
+	}
+
+	return (sa.ss_family == AF_INET);
+}
+
+static struct spdk_sock_group_impl *
+spdk_posix_sock_group_impl_create(void)
+{
+	struct spdk_posix_sock_group_impl *group_impl;
+	int fd;
+
+#if defined(__linux__)
+	fd = epoll_create1(0);
+#elif defined(__FreeBSD__)
+	fd = kqueue();
+#endif
+	if (fd == -1) {
+		return NULL;
+	}
+
+	group_impl = calloc(1, sizeof(*group_impl));
+	if (group_impl == NULL) {
+		SPDK_ERRLOG("group_impl allocation failed\n");
+		close(fd);
+		return NULL;
+	}
+
+	group_impl->fd = fd;
+
+	return &group_impl->base;
+}
+
+static int
+spdk_posix_sock_group_impl_add_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	int rc;
+
+#if defined(__linux__)
+	struct epoll_event event;
+
+	event.events = EPOLLIN;
+	event.data.ptr = sock;
+
+	rc = epoll_ctl(group->fd, EPOLL_CTL_ADD, sock->fd, &event);
+#elif defined(__FreeBSD__)
+	struct kevent event;
+	struct timespec ts = {0};
+
+	EV_SET(&event, sock->fd, EVFILT_READ, EV_ADD, 0, 0, sock);
+
+	rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
+#endif
+	return rc;
+}
+
+static int
+spdk_posix_sock_group_impl_remove_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
+{
+	struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
+	struct spdk_posix_sock *sock = __posix_sock(_sock);
+	int rc;
+#if defined(__linux__)
+	struct epoll_event event;
+
+	/* Event parameter is ignored but some old kernel version still require it. */
+	rc = epoll_ctl(group->fd, EPOLL_CTL_DEL, sock->fd, &event);
+#elif defined(__FreeBSD__)
+	struct kevent event;
+	struct timespec ts = {0};
+
+	EV_SET(&event, sock->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+
+	rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
+	if (rc == 0 && event.flags & EV_ERROR) {
+		rc = -1;
+		errno = event.data;
+	}
+#endif
+	return rc;
+}
+
+static int
+spdk_posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
+				struct spdk_sock **socks)
+{
+	struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
+	int num_events, i;
+
+#if defined(__linux__)
+	struct epoll_event events[MAX_EVENTS_PER_POLL];
+
+	num_events = epoll_wait(group->fd, events, max_events, 0);
+#elif defined(__FreeBSD__)
+	struct kevent events[MAX_EVENTS_PER_POLL];
+	struct timespec ts = {0};
+
+	num_events = kevent(group->fd, NULL, 0, events, max_events, &ts);
+#endif
+
+	if (num_events == -1) {
+		return -1;
+	}
+
+	for (i = 0; i < num_events; i++) {
+#if defined(__linux__)
+		socks[i] = events[i].data.ptr;
+#elif defined(__FreeBSD__)
+		socks[i] = events[i].udata;
+#endif
+	}
+
+	return num_events;
+}
+
+static int
+spdk_posix_sock_group_impl_close(struct spdk_sock_group_impl *_group)
+{
+	struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
+
+	return close(group->fd);
+}
+
+static struct spdk_net_impl g_posix_net_impl = {
+	.name		= "posix",
+	.getaddr	= spdk_posix_sock_getaddr,
+	.connect	= spdk_posix_sock_connect,
+	.listen		= spdk_posix_sock_listen,
+	.accept		= spdk_posix_sock_accept,
+	.close		= spdk_posix_sock_close,
+	.recv		= spdk_posix_sock_recv,
+	.writev		= spdk_posix_sock_writev,
+	.set_recvlowat	= spdk_posix_sock_set_recvlowat,
+	.set_recvbuf	= spdk_posix_sock_set_recvbuf,
+	.set_sendbuf	= spdk_posix_sock_set_sendbuf,
+	.is_ipv6	= spdk_posix_sock_is_ipv6,
+	.is_ipv4	= spdk_posix_sock_is_ipv4,
+	.group_impl_create	= spdk_posix_sock_group_impl_create,
+	.group_impl_add_sock	= spdk_posix_sock_group_impl_add_sock,
+	.group_impl_remove_sock = spdk_posix_sock_group_impl_remove_sock,
+	.group_impl_poll	= spdk_posix_sock_group_impl_poll,
+	.group_impl_close	= spdk_posix_sock_group_impl_close,
+};
+
+SPDK_NET_IMPL_REGISTER(posix, &g_posix_net_impl);
diff --git a/src/spdk/lib/sock/sock.c b/src/spdk/lib/sock/sock.c
new file mode 100644
index 00000000..d31aa9b0
--- /dev/null
+++ b/src/spdk/lib/sock/sock.c
@@ -0,0 +1,373 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/log.h"
+#include "spdk/sock.h"
+#include "spdk_internal/sock.h"
+#include "spdk/queue.h"
+
+static STAILQ_HEAD(, spdk_net_impl) g_net_impls = STAILQ_HEAD_INITIALIZER(g_net_impls);
+
+int
+spdk_sock_getaddr(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport,
+		  char *caddr, int clen, uint16_t *cport)
+{
+	return sock->net_impl->getaddr(sock, saddr, slen, sport, caddr, clen, cport);
+}
+
+struct spdk_sock *
+spdk_sock_connect(const char *ip, int port)
+{
+	struct spdk_net_impl *impl = NULL;
+	struct spdk_sock *sock;
+
+	STAILQ_FOREACH_FROM(impl, &g_net_impls, link) {
+		sock = impl->connect(ip, port);
+		if (sock != NULL) {
+			sock->net_impl = impl;
+			return sock;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_sock *
+spdk_sock_listen(const char *ip, int port)
+{
+	struct spdk_net_impl *impl = NULL;
+	struct spdk_sock *sock;
+
+	STAILQ_FOREACH_FROM(impl, &g_net_impls, link) {
+		sock = impl->listen(ip, port);
+		if (sock != NULL) {
+			sock->net_impl = impl;
+			return sock;
+		}
+	}
+
+	return NULL;
+}
+
+struct spdk_sock *
+spdk_sock_accept(struct spdk_sock *sock)
+{
+	struct spdk_sock *new_sock;
+
+	new_sock = sock->net_impl->accept(sock);
+	if (new_sock != NULL) {
+		new_sock->net_impl = sock->net_impl;
+	}
+
+	return new_sock;
+}
+
+int
+spdk_sock_close(struct spdk_sock **sock)
+{
+	int rc;
+
+	if (*sock == NULL) {
+		errno = EBADF;
+		return -1;
+	}
+
+	if ((*sock)->cb_fn != NULL) {
+		/* This sock is still part of a sock_group. */
+		errno = EBUSY;
+		return -1;
+	}
+
+	rc = (*sock)->net_impl->close(*sock);
+	if (rc == 0) {
+		*sock = NULL;
+	}
+
+	return rc;
+}
+
+ssize_t
+spdk_sock_recv(struct spdk_sock *sock, void *buf, size_t len)
+{
+	if (sock == NULL) {
+		errno = EBADF;
+		return -1;
+	}
+
+	return sock->net_impl->recv(sock, buf, len);
+}
+
+ssize_t
+spdk_sock_writev(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
+{
+	if (sock == NULL) {
+		errno = EBADF;
+		return -1;
+	}
+
+	return sock->net_impl->writev(sock, iov, iovcnt);
+}
+
+
+int
+spdk_sock_set_recvlowat(struct spdk_sock *sock, int nbytes)
+{
+	return sock->net_impl->set_recvlowat(sock, nbytes);
+}
+
+int
+spdk_sock_set_recvbuf(struct spdk_sock *sock, int sz)
+{
+	return sock->net_impl->set_recvbuf(sock, sz);
+}
+
+int
+spdk_sock_set_sendbuf(struct spdk_sock *sock, int sz)
+{
+	return sock->net_impl->set_sendbuf(sock, sz);
+}
+
+bool
+spdk_sock_is_ipv6(struct spdk_sock *sock)
+{
+	return sock->net_impl->is_ipv6(sock);
+}
+
+bool
+spdk_sock_is_ipv4(struct spdk_sock *sock)
+{
+	return sock->net_impl->is_ipv4(sock);
+}
+
+struct spdk_sock_group *
+spdk_sock_group_create(void)
+{
+	struct spdk_net_impl *impl = NULL;
+	struct spdk_sock_group *group;
+	struct spdk_sock_group_impl *group_impl;
+
+	group = calloc(1, sizeof(*group));
+	if (group == NULL) {
+		return NULL;
+	}
+
+	STAILQ_INIT(&group->group_impls);
+
+	STAILQ_FOREACH_FROM(impl, &g_net_impls, link) {
+		group_impl = impl->group_impl_create();
+		if (group_impl != NULL) {
+			STAILQ_INSERT_TAIL(&group->group_impls, group_impl, link);
+			TAILQ_INIT(&group_impl->socks);
+			group_impl->net_impl = impl;
+		}
+	}
+
+	return group;
+}
+
+int
+spdk_sock_group_add_sock(struct spdk_sock_group *group, struct spdk_sock *sock,
+			 spdk_sock_cb cb_fn, void *cb_arg)
+{
+	struct spdk_sock_group_impl *group_impl = NULL;
+	int rc;
+
+	if (cb_fn == NULL) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	if (sock->cb_fn != NULL) {
+		/*
+		 * This sock is already part of a sock_group.  Currently we don't
+		 *  support this.
+		 */
+		errno = EBUSY;
+		return -1;
+	}
+
+	STAILQ_FOREACH_FROM(group_impl, &group->group_impls, link) {
+		if (sock->net_impl == group_impl->net_impl) {
+			break;
+		}
+	}
+
+	if (group_impl == NULL) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	rc = group_impl->net_impl->group_impl_add_sock(group_impl, sock);
+	if (rc == 0) {
+		TAILQ_INSERT_TAIL(&group_impl->socks, sock, link);
+		sock->cb_fn = cb_fn;
+		sock->cb_arg = cb_arg;
+	}
+
+	return rc;
+}
+
+int
+spdk_sock_group_remove_sock(struct spdk_sock_group *group, struct spdk_sock *sock)
+{
+	struct spdk_sock_group_impl *group_impl = NULL;
+	int rc;
+
+	STAILQ_FOREACH_FROM(group_impl, &group->group_impls, link) {
+		if (sock->net_impl == group_impl->net_impl) {
+			break;
+		}
+	}
+
+	if (group_impl == NULL) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	rc = group_impl->net_impl->group_impl_remove_sock(group_impl, sock);
+	if (rc == 0) {
+		TAILQ_REMOVE(&group_impl->socks, sock, link);
+		sock->cb_fn = NULL;
+		sock->cb_arg = NULL;
+	}
+
+	return rc;
+}
+
+int
+spdk_sock_group_poll(struct spdk_sock_group *group)
+{
+	return spdk_sock_group_poll_count(group, MAX_EVENTS_PER_POLL);
+}
+
+static int
+spdk_sock_group_impl_poll_count(struct spdk_sock_group_impl *group_impl,
+				struct spdk_sock_group *group,
+				int max_events)
+{
+	struct spdk_sock *socks[MAX_EVENTS_PER_POLL];
+	int num_events, i;
+
+	if (TAILQ_EMPTY(&group_impl->socks)) {
+		return 0;
+	}
+
+	num_events = group_impl->net_impl->group_impl_poll(group_impl, max_events, socks);
+	if (num_events == -1) {
+		return -1;
+	}
+
+	for (i = 0; i < num_events; i++) {
+		struct spdk_sock *sock = socks[i];
+
+		assert(sock->cb_fn != NULL);
+		sock->cb_fn(sock->cb_arg, group, sock);
+	}
+	return 0;
+}
+
+int
+spdk_sock_group_poll_count(struct spdk_sock_group *group, int max_events)
+{
+	struct spdk_sock_group_impl *group_impl = NULL;
+	int rc, final_rc = 0;
+
+	if (max_events < 1) {
+		errno = -EINVAL;
+		return -1;
+	}
+
+	/*
+	 * Only poll for up to 32 events at a time - if more events are pending,
+	 *  the next call to this function will reap them.
+	 */
+	if (max_events > MAX_EVENTS_PER_POLL) {
+		max_events = MAX_EVENTS_PER_POLL;
+	}
+
+	STAILQ_FOREACH_FROM(group_impl, &group->group_impls, link) {
+		rc = spdk_sock_group_impl_poll_count(group_impl, group, max_events);
+		if (rc != 0) {
+			final_rc = rc;
+			SPDK_ERRLOG("group_impl_poll_count for net(%s) failed\n",
+				    group_impl->net_impl->name);
+		}
+	}
+
+	return final_rc;
+}
+
+int
+spdk_sock_group_close(struct spdk_sock_group **group)
+{
+	struct spdk_sock_group_impl *group_impl = NULL, *tmp;
+	int rc;
+
+	if (*group == NULL) {
+		errno = EBADF;
+		return -1;
+	}
+
+	STAILQ_FOREACH_SAFE(group_impl, &(*group)->group_impls, link, tmp) {
+		if (!TAILQ_EMPTY(&group_impl->socks)) {
+			errno = EBUSY;
+			return -1;
+		}
+	}
+
+	STAILQ_FOREACH_SAFE(group_impl, &(*group)->group_impls, link, tmp) {
+		rc = group_impl->net_impl->group_impl_close(group_impl);
+		if (rc != 0) {
+			SPDK_ERRLOG("group_impl_close for net(%s) failed\n",
+				    group_impl->net_impl->name);
+		}
+		free(group_impl);
+	}
+
+	free(*group);
+	*group = NULL;
+
+	return 0;
+}
+
+void
+spdk_net_impl_register(struct spdk_net_impl *impl)
+{
+	if (!strcmp("posix", impl->name)) {
+		STAILQ_INSERT_TAIL(&g_net_impls, impl, link);
+	} else {
+		STAILQ_INSERT_HEAD(&g_net_impls, impl, link);
+	}
+}
diff --git a/src/spdk/lib/sock/vpp/Makefile b/src/spdk/lib/sock/vpp/Makefile
new file mode 100644
index 00000000..614fd2e3
--- /dev/null
+++ b/src/spdk/lib/sock/vpp/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS += vpp.c
+
+LIBNAME = sock_vpp
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/sock/vpp/vpp.c b/src/spdk/lib/sock/vpp/vpp.c
new file mode 100644
index 00000000..752250eb
--- /dev/null
+++ b/src/spdk/lib/sock/vpp/vpp.c
@@ -0,0 +1,663 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/log.h"
+#include "spdk/sock.h"
+#include "spdk/net.h"
+#include "spdk/string.h"
+#include "spdk_internal/sock.h"
+#include <vcl/vppcom.h>
+
+#define MAX_TMPBUF 1024
+#define PORTNUMLEN 32
+
+static bool g_vpp_initialized = false;
+
+struct spdk_vpp_sock {
+	struct spdk_sock	base;
+	int			fd;
+};
+
+struct spdk_vpp_sock_group_impl {
+	struct spdk_sock_group_impl	base;
+	int				fd;
+};
+
+static int
+get_addr_str(struct sockaddr *sa, char *host, size_t hlen)
+{
+	const char *result = NULL;
+
+	if (sa == NULL || host == NULL) {
+		return -1;
+	}
+
+	if (sa->sa_family == AF_INET) {
+		result = inet_ntop(AF_INET, &(((struct sockaddr_in *)sa)->sin_addr),
+				   host, hlen);
+	} else {
+		result = inet_ntop(AF_INET6, &(((struct sockaddr_in6 *)sa)->sin6_addr),
+				   host, hlen);
+	}
+
+	if (result == NULL) {
+		return -1;
+	}
+
+	return 0;
+}
+
+#define __vpp_sock(sock) (struct spdk_vpp_sock *)sock
+#define __vpp_group_impl(group) (struct spdk_vpp_sock_group_impl *)group
+
+static inline void
+vcom_socket_copy_ep_to_sockaddr(struct sockaddr *addr, socklen_t *len, vppcom_endpt_t *ep)
+{
+	int sa_len, copy_len;
+
+	assert(ep->vrf == VPPCOM_VRF_DEFAULT);
+
+	if (ep->is_ip4 == VPPCOM_IS_IP4) {
+		addr->sa_family = AF_INET;
+		((struct sockaddr_in *) addr)->sin_port = ep->port;
+		if (*len > sizeof(struct sockaddr_in)) {
+			*len = sizeof(struct sockaddr_in);
+		}
+		sa_len = sizeof(struct sockaddr_in) - sizeof(struct in_addr);
+		copy_len = *len - sa_len;
+		if (copy_len > 0) {
+			memcpy(&((struct sockaddr_in *) addr)->sin_addr, ep->ip, copy_len);
+		}
+	} else {
+		addr->sa_family = AF_INET6;
+		((struct sockaddr_in6 *) addr)->sin6_port = ep->port;
+		if (*len > sizeof(struct sockaddr_in6)) {
+			*len = sizeof(struct sockaddr_in6);
+		}
+		sa_len = sizeof(struct sockaddr_in6) - sizeof(struct in6_addr);
+		copy_len = *len - sa_len;
+		if (copy_len > 0) {
+			memcpy(&((struct sockaddr_in6 *) addr)->sin6_addr, ep->ip, copy_len);
+		}
+	}
+}
+
+static int
+getsockname_vpp(int fd, struct sockaddr *addr, socklen_t *len)
+{
+	vppcom_endpt_t ep;
+	uint32_t size = sizeof(ep);
+	uint8_t addr_buf[sizeof(struct in6_addr)];
+	int rc;
+
+	if (!addr || !len) {
+		return -EFAULT;
+	}
+
+	ep.ip = addr_buf;
+
+	rc = vppcom_session_attr(fd, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
+	if (rc == VPPCOM_OK) {
+		vcom_socket_copy_ep_to_sockaddr(addr, len, &ep);
+	}
+
+	return rc;
+}
+
+
+static int
+getpeername_vpp(int sock, struct sockaddr *addr, socklen_t *len)
+{
+	vppcom_endpt_t ep;
+	uint32_t size = sizeof(ep);
+	uint8_t addr_buf[sizeof(struct in6_addr)];
+	int rc;
+
+	if (!addr || !len) {
+		return -EFAULT;
+	}
+
+	ep.ip = addr_buf;
+
+	rc = vppcom_session_attr(sock, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size);
+	if (rc == VPPCOM_OK) {
+		vcom_socket_copy_ep_to_sockaddr(addr, len, &ep);
+	}
+
+	return rc;
+}
+
+static int
+spdk_vpp_sock_getaddr(struct spdk_sock *_sock, char *saddr, int slen, uint16_t *sport,
+		      char *caddr, int clen, uint16_t *cport)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	struct sockaddr sa;
+	socklen_t salen;
+	int rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	memset(&sa, 0, sizeof(sa));
+	salen = sizeof(sa);
+	rc = getsockname_vpp(sock->fd, &sa, &salen);
+	if (rc != 0) {
+		errno = -rc;
+		SPDK_ERRLOG("getsockname_vpp() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	rc = get_addr_str(&sa, saddr, slen);
+	if (rc != 0) {
+		/* Errno already set by get_addr_str() */
+		SPDK_ERRLOG("get_addr_str() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	if (sport) {
+		if (sa.ss_family == AF_INET) {
+			*sport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
+		} else if (sa.ss_family == AF_INET6) {
+			*sport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
+		}
+	}
+
+	memset(&sa, 0, sizeof(sa));
+	salen = sizeof(sa);
+	rc = getpeername_vpp(sock->fd, &sa, &salen);
+	if (rc != 0) {
+		errno = -rc;
+		SPDK_ERRLOG("getpeername_vpp() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	rc = get_addr_str(&sa, caddr, clen);
+	if (rc != 0) {
+		/* Errno already set by get_addr_str() */
+		SPDK_ERRLOG("get_addr_str() failed (errno=%d)\n", errno);
+		return -1;
+	}
+
+	if (cport) {
+		if (sa.ss_family == AF_INET) {
+			*cport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
+		} else if (sa.ss_family == AF_INET6) {
+			*cport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
+		}
+	}
+
+	return 0;
+}
+
+enum spdk_vpp_create_type {
+	SPDK_SOCK_CREATE_LISTEN,
+	SPDK_SOCK_CREATE_CONNECT,
+};
+
+static struct spdk_sock *
+spdk_vpp_sock_create(const char *ip, int port, enum spdk_vpp_create_type type)
+{
+	struct spdk_vpp_sock *sock;
+	int fd, rc;
+	vppcom_endpt_t endpt;
+	uint8_t addr_buf[sizeof(struct in6_addr)];
+
+	if (ip == NULL) {
+		return NULL;
+	}
+
+	/* Check address family */
+	if (inet_pton(AF_INET, ip, &addr_buf)) {
+		endpt.is_ip4 = VPPCOM_IS_IP4;
+	} else if (inet_pton(AF_INET6, ip, &addr_buf)) {
+		endpt.is_ip4 = VPPCOM_IS_IP6;
+	} else {
+		SPDK_ERRLOG("IP address with invalid format\n");
+		return NULL;
+	}
+	endpt.vrf = VPPCOM_VRF_DEFAULT;
+	endpt.ip = (uint8_t *)&addr_buf;
+	endpt.port = htons(port);
+
+	fd = vppcom_session_create(VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP, 1 /* is_nonblocking */);
+	if (fd < 0) {
+		errno = -fd;
+		SPDK_ERRLOG("vppcom_session_create() failed, errno = %d\n", errno);
+		return NULL;
+	}
+
+	if (type == SPDK_SOCK_CREATE_LISTEN) {
+		rc = vppcom_session_bind(fd, &endpt);
+		if (rc != VPPCOM_OK) {
+			errno = -rc;
+			SPDK_ERRLOG("vppcom_session_bind() failed, errno = %d\n", errno);
+			vppcom_session_close(fd);
+			return NULL;
+		}
+
+		rc = vppcom_session_listen(fd, 512);
+		if (rc != VPPCOM_OK) {
+			errno = -rc;
+			SPDK_ERRLOG("vppcom_session_listen() failed, errno = %d\n", errno);
+			vppcom_session_close(fd);
+			return NULL;
+		}
+	} else if (type == SPDK_SOCK_CREATE_CONNECT) {
+		rc = vppcom_session_connect(fd, &endpt);
+		if (rc != VPPCOM_OK) {
+			errno = -rc;
+			SPDK_ERRLOG("vppcom_session_connect() failed, errno = %d\n", errno);
+			vppcom_session_close(fd);
+			return NULL;
+		}
+	}
+
+	sock = calloc(1, sizeof(*sock));
+	if (sock == NULL) {
+		errno = -ENOMEM;
+		SPDK_ERRLOG("sock allocation failed\n");
+		vppcom_session_close(fd);
+		return NULL;
+	}
+
+	sock->fd = fd;
+	return &sock->base;
+}
+
+static struct spdk_sock *
+spdk_vpp_sock_listen(const char *ip, int port)
+{
+	if (!g_vpp_initialized) {
+		return NULL;
+	}
+
+	return spdk_vpp_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN);
+}
+
+static struct spdk_sock *
+spdk_vpp_sock_connect(const char *ip, int port)
+{
+	if (!g_vpp_initialized) {
+		return NULL;
+	}
+
+	return spdk_vpp_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT);
+}
+
+static struct spdk_sock *
+spdk_vpp_sock_accept(struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	vppcom_endpt_t		endpt;
+	uint8_t			ip[16];
+	int			rc;
+	struct spdk_vpp_sock	*new_sock;
+	double			wait_time = -1.0;
+
+	endpt.ip = ip;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	rc = vppcom_session_accept(sock->fd, &endpt, O_NONBLOCK, wait_time);
+	if (rc < 0) {
+		errno = -rc;
+		return NULL;
+	}
+
+	new_sock = calloc(1, sizeof(*sock));
+	if (new_sock == NULL) {
+		SPDK_ERRLOG("sock allocation failed\n");
+		vppcom_session_close(rc);
+		return NULL;
+	}
+
+	new_sock->fd = rc;
+	return &new_sock->base;
+}
+
+static int
+spdk_vpp_sock_close(struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	int rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	rc = vppcom_session_close(sock->fd);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return -1;
+	}
+	free(sock);
+
+	return 0;
+}
+
+static ssize_t
+spdk_vpp_sock_recv(struct spdk_sock *_sock, void *buf, size_t len)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	int rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	rc = vppcom_session_read(sock->fd, buf, len);
+	if (rc < 0) {
+		errno = -rc;
+		return -1;
+	}
+	return rc;
+}
+
+static ssize_t
+spdk_vpp_sock_writev(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	ssize_t total = 0;
+	int i, rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	for (i = 0; i < iovcnt; ++i) {
+		rc = vppcom_session_write(sock->fd, iov[i].iov_base, iov[i].iov_len);
+		if (rc < 0) {
+			if (total > 0) {
+				break;
+			} else {
+				errno = -rc;
+				return -1;
+			}
+		} else {
+			total += rc;
+		}
+	}
+	return total;
+}
+
+
+/*
+ * TODO: Check if there are similar parameters to configure in VPP
+ * to three below.
+ */
+static int
+spdk_vpp_sock_set_recvlowat(struct spdk_sock *_sock, int nbytes)
+{
+	assert(g_vpp_initialized);
+
+	return 0;
+}
+
+static int
+spdk_vpp_sock_set_recvbuf(struct spdk_sock *_sock, int sz)
+{
+	assert(g_vpp_initialized);
+
+	return 0;
+}
+
+static int
+spdk_vpp_sock_set_sendbuf(struct spdk_sock *_sock, int sz)
+{
+	assert(g_vpp_initialized);
+
+	return 0;
+}
+
+static bool
+spdk_vpp_sock_is_ipv6(struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	vppcom_endpt_t ep;
+	uint32_t size = sizeof(ep);
+	uint8_t addr_buf[sizeof(struct in6_addr)];
+	int rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	ep.ip = addr_buf;
+
+	rc = vppcom_session_attr(sock->fd, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return false;
+	}
+
+	return (ep.is_ip4 == VPPCOM_IS_IP6);
+}
+
+static bool
+spdk_vpp_sock_is_ipv4(struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	vppcom_endpt_t ep;
+	uint32_t size = sizeof(ep);
+	uint8_t addr_buf[sizeof(struct in6_addr)];
+	int rc;
+
+	assert(sock != NULL);
+	assert(g_vpp_initialized);
+
+	ep.ip = addr_buf;
+
+	rc = vppcom_session_attr(sock->fd, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return false;
+	}
+
+	return (ep.is_ip4 == VPPCOM_IS_IP4);
+}
+
+static struct spdk_sock_group_impl *
+spdk_vpp_sock_group_impl_create(void)
+{
+	struct spdk_vpp_sock_group_impl *group_impl;
+	int fd;
+
+	if (!g_vpp_initialized) {
+		return NULL;
+	}
+
+	group_impl = calloc(1, sizeof(*group_impl));
+	if (group_impl == NULL) {
+		SPDK_ERRLOG("sock_group allocation failed\n");
+		return NULL;
+	}
+
+	fd = vppcom_epoll_create();
+	if (fd < 0) {
+		free(group_impl);
+		return NULL;
+	}
+
+	group_impl->fd = fd;
+
+	return &group_impl->base;
+}
+
+static int
+spdk_vpp_sock_group_impl_add_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock_group_impl *group = __vpp_group_impl(_group);
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	int rc;
+	struct epoll_event event;
+
+	assert(sock != NULL);
+	assert(group != NULL);
+	assert(g_vpp_initialized);
+
+	event.events = EPOLLIN;
+	event.data.ptr = sock;
+
+	rc = vppcom_epoll_ctl(group->fd, EPOLL_CTL_ADD, sock->fd, &event);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_vpp_sock_group_impl_remove_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
+{
+	struct spdk_vpp_sock_group_impl *group = __vpp_group_impl(_group);
+	struct spdk_vpp_sock *sock = __vpp_sock(_sock);
+	int rc;
+	struct epoll_event event;
+
+	assert(sock != NULL);
+	assert(group != NULL);
+	assert(g_vpp_initialized);
+
+	rc = vppcom_epoll_ctl(group->fd, EPOLL_CTL_DEL, sock->fd, &event);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+spdk_vpp_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
+			      struct spdk_sock **socks)
+{
+	struct spdk_vpp_sock_group_impl *group = __vpp_group_impl(_group);
+	int num_events, i;
+	struct epoll_event events[MAX_EVENTS_PER_POLL];
+
+	assert(group != NULL);
+	assert(socks != NULL);
+	assert(g_vpp_initialized);
+
+	num_events = vppcom_epoll_wait(group->fd, events, max_events, 0);
+	if (num_events < 0) {
+		errno = -num_events;
+		return -1;
+	}
+
+	for (i = 0; i < num_events; i++) {
+		socks[i] = events[i].data.ptr;
+	}
+
+	return num_events;
+}
+
+static int
+spdk_vpp_sock_group_impl_close(struct spdk_sock_group_impl *_group)
+{
+	struct spdk_vpp_sock_group_impl *group = __vpp_group_impl(_group);
+	int rc;
+
+	assert(group != NULL);
+	assert(g_vpp_initialized);
+
+	rc = vppcom_session_close(group->fd);
+	if (rc != VPPCOM_OK) {
+		errno = -rc;
+		return -1;
+	}
+
+	return 0;
+}
+
+static struct spdk_net_impl g_vpp_net_impl = {
+	.name		= "vpp",
+	.getaddr	= spdk_vpp_sock_getaddr,
+	.connect	= spdk_vpp_sock_connect,
+	.listen		= spdk_vpp_sock_listen,
+	.accept		= spdk_vpp_sock_accept,
+	.close		= spdk_vpp_sock_close,
+	.recv		= spdk_vpp_sock_recv,
+	.writev		= spdk_vpp_sock_writev,
+	.set_recvlowat	= spdk_vpp_sock_set_recvlowat,
+	.set_recvbuf	= spdk_vpp_sock_set_recvbuf,
+	.set_sendbuf	= spdk_vpp_sock_set_sendbuf,
+	.is_ipv6	= spdk_vpp_sock_is_ipv6,
+	.is_ipv4	= spdk_vpp_sock_is_ipv4,
+	.group_impl_create	= spdk_vpp_sock_group_impl_create,
+	.group_impl_add_sock	= spdk_vpp_sock_group_impl_add_sock,
+	.group_impl_remove_sock = spdk_vpp_sock_group_impl_remove_sock,
+	.group_impl_poll	= spdk_vpp_sock_group_impl_poll,
+	.group_impl_close	= spdk_vpp_sock_group_impl_close,
+};
+
+SPDK_NET_IMPL_REGISTER(vpp, &g_vpp_net_impl);
+
+static int
+spdk_vpp_net_framework_init(void)
+{
+	int rc;
+	char *app_name;
+
+	app_name = spdk_sprintf_alloc("SPDK_%d", getpid());
+	if (app_name == NULL) {
+		SPDK_ERRLOG("Cannot alloc memory for SPDK app name\n");
+		return -ENOMEM;
+	}
+
+	rc = vppcom_app_create(app_name);
+	if (rc == 0) {
+		g_vpp_initialized = true;
+	}
+
+	free(app_name);
+
+	return 0;
+}
+
+static void
+spdk_vpp_net_framework_fini(void)
+{
+	if (g_vpp_initialized) {
+		vppcom_app_destroy();
+	}
+}
+
+static struct spdk_net_framework g_vpp_net_framework = {
+	.name	= "vpp",
+	.init	= spdk_vpp_net_framework_init,
+	.fini	= spdk_vpp_net_framework_fini,
+};
+
+SPDK_NET_FRAMEWORK_REGISTER(vpp, &g_vpp_net_framework);
diff --git a/src/spdk/lib/thread/Makefile b/src/spdk/lib/thread/Makefile
new file mode 100644
index 00000000..467e32ff
--- /dev/null
+++ b/src/spdk/lib/thread/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = thread.c
+LIBNAME = thread
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/thread/thread.c b/src/spdk/lib/thread/thread.c
new file mode 100644
index 00000000..c014f4ed
--- /dev/null
+++ b/src/spdk/lib/thread/thread.c
@@ -0,0 +1,768 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/string.h"
+#include "spdk/thread.h"
+
+#include "spdk_internal/log.h"
+
+#ifdef __linux__
+#include <sys/prctl.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#endif
+
+static pthread_mutex_t g_devlist_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+struct io_device {
+	void				*io_device;
+	char				*name;
+	spdk_io_channel_create_cb	create_cb;
+	spdk_io_channel_destroy_cb	destroy_cb;
+	spdk_io_device_unregister_cb	unregister_cb;
+	struct spdk_thread		*unregister_thread;
+	uint32_t			ctx_size;
+	uint32_t			for_each_count;
+	TAILQ_ENTRY(io_device)		tailq;
+
+	uint32_t			refcnt;
+
+	bool				unregistered;
+};
+
+static TAILQ_HEAD(, io_device) g_io_devices = TAILQ_HEAD_INITIALIZER(g_io_devices);
+
+struct spdk_thread {
+	pthread_t			thread_id;
+	spdk_thread_pass_msg		msg_fn;
+	spdk_start_poller		start_poller_fn;
+	spdk_stop_poller		stop_poller_fn;
+	void				*thread_ctx;
+	TAILQ_HEAD(, spdk_io_channel)	io_channels;
+	TAILQ_ENTRY(spdk_thread)	tailq;
+	char				*name;
+};
+
+static TAILQ_HEAD(, spdk_thread) g_threads = TAILQ_HEAD_INITIALIZER(g_threads);
+static uint32_t g_thread_count = 0;
+
+static struct spdk_thread *
+_get_thread(void)
+{
+	pthread_t thread_id;
+	struct spdk_thread *thread;
+
+	thread_id = pthread_self();
+
+	thread = NULL;
+	TAILQ_FOREACH(thread, &g_threads, tailq) {
+		if (thread->thread_id == thread_id) {
+			return thread;
+		}
+	}
+
+	return NULL;
+}
+
+static void
+_set_thread_name(const char *thread_name)
+{
+#if defined(__linux__)
+	prctl(PR_SET_NAME, thread_name, 0, 0, 0);
+#elif defined(__FreeBSD__)
+	pthread_set_name_np(pthread_self(), thread_name);
+#else
+#error missing platform support for thread name
+#endif
+}
+
+int
+spdk_thread_lib_init(void)
+{
+	return 0;
+}
+
+void
+spdk_thread_lib_fini(void)
+{
+}
+
+struct spdk_thread *
+spdk_allocate_thread(spdk_thread_pass_msg msg_fn,
+		     spdk_start_poller start_poller_fn,
+		     spdk_stop_poller stop_poller_fn,
+		     void *thread_ctx, const char *name)
+{
+	struct spdk_thread *thread;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+
+	thread = _get_thread();
+	if (thread) {
+		SPDK_ERRLOG("Double allocated SPDK thread\n");
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	thread = calloc(1, sizeof(*thread));
+	if (!thread) {
+		SPDK_ERRLOG("Unable to allocate memory for thread\n");
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	thread->thread_id = pthread_self();
+	thread->msg_fn = msg_fn;
+	thread->start_poller_fn = start_poller_fn;
+	thread->stop_poller_fn = stop_poller_fn;
+	thread->thread_ctx = thread_ctx;
+	TAILQ_INIT(&thread->io_channels);
+	TAILQ_INSERT_TAIL(&g_threads, thread, tailq);
+	g_thread_count++;
+	if (name) {
+		_set_thread_name(name);
+		thread->name = strdup(name);
+	} else {
+		thread->name = spdk_sprintf_alloc("%p", thread);
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Allocating new thread %s\n", thread->name);
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	return thread;
+}
+
+void
+spdk_free_thread(void)
+{
+	struct spdk_thread *thread;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+
+	thread = _get_thread();
+	if (!thread) {
+		SPDK_ERRLOG("No thread allocated\n");
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Freeing thread %s\n", thread->name);
+
+	assert(g_thread_count > 0);
+	g_thread_count--;
+	TAILQ_REMOVE(&g_threads, thread, tailq);
+	free(thread->name);
+	free(thread);
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+}
+
+uint32_t
+spdk_thread_get_count(void)
+{
+	/*
+	 * Return cached value of the current thread count.  We could acquire the
+	 *  lock and iterate through the TAILQ of threads to count them, but that
+	 *  count could still be invalidated after we release the lock.
+	 */
+	return g_thread_count;
+}
+
+struct spdk_thread *
+spdk_get_thread(void)
+{
+	struct spdk_thread *thread;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+
+	thread = _get_thread();
+	if (!thread) {
+		SPDK_ERRLOG("No thread allocated\n");
+	}
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	return thread;
+}
+
+const char *
+spdk_thread_get_name(const struct spdk_thread *thread)
+{
+	return thread->name;
+}
+
+void
+spdk_thread_send_msg(const struct spdk_thread *thread, spdk_thread_fn fn, void *ctx)
+{
+	thread->msg_fn(fn, ctx, thread->thread_ctx);
+}
+
+
+struct spdk_poller *
+spdk_poller_register(spdk_poller_fn fn,
+		     void *arg,
+		     uint64_t period_microseconds)
+{
+	struct spdk_thread *thread;
+	struct spdk_poller *poller;
+
+	thread = spdk_get_thread();
+	if (!thread) {
+		assert(false);
+		return NULL;
+	}
+
+	if (!thread->start_poller_fn || !thread->stop_poller_fn) {
+		SPDK_ERRLOG("No related functions to start requested poller\n");
+		assert(false);
+		return NULL;
+	}
+
+	poller = thread->start_poller_fn(thread->thread_ctx, fn, arg, period_microseconds);
+	if (!poller) {
+		SPDK_ERRLOG("Unable to start requested poller\n");
+		assert(false);
+		return NULL;
+	}
+
+	return poller;
+}
+
+void
+spdk_poller_unregister(struct spdk_poller **ppoller)
+{
+	struct spdk_thread *thread;
+	struct spdk_poller *poller;
+
+	poller = *ppoller;
+	if (poller == NULL) {
+		return;
+	}
+
+	*ppoller = NULL;
+
+	thread = spdk_get_thread();
+
+	if (thread) {
+		thread->stop_poller_fn(poller, thread->thread_ctx);
+	}
+}
+
+struct call_thread {
+	struct spdk_thread *cur_thread;
+	spdk_thread_fn fn;
+	void *ctx;
+
+	struct spdk_thread *orig_thread;
+	spdk_thread_fn cpl;
+};
+
+static void
+spdk_on_thread(void *ctx)
+{
+	struct call_thread *ct = ctx;
+
+	ct->fn(ct->ctx);
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	ct->cur_thread = TAILQ_NEXT(ct->cur_thread, tailq);
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	if (!ct->cur_thread) {
+		SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Completed thread iteration\n");
+
+		spdk_thread_send_msg(ct->orig_thread, ct->cpl, ct->ctx);
+		free(ctx);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Continuing thread iteration to %s\n",
+			      ct->cur_thread->name);
+
+		spdk_thread_send_msg(ct->cur_thread, spdk_on_thread, ctx);
+	}
+}
+
+void
+spdk_for_each_thread(spdk_thread_fn fn, void *ctx, spdk_thread_fn cpl)
+{
+	struct call_thread *ct;
+
+	ct = calloc(1, sizeof(*ct));
+	if (!ct) {
+		SPDK_ERRLOG("Unable to perform thread iteration\n");
+		cpl(ctx);
+		return;
+	}
+
+	ct->fn = fn;
+	ct->ctx = ctx;
+	ct->cpl = cpl;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	ct->orig_thread = _get_thread();
+	ct->cur_thread = TAILQ_FIRST(&g_threads);
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Starting thread iteration from %s\n",
+		      ct->orig_thread->name);
+
+	spdk_thread_send_msg(ct->cur_thread, spdk_on_thread, ct);
+}
+
+void
+spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb,
+			spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size,
+			const char *name)
+{
+	struct io_device *dev, *tmp;
+
+	assert(io_device != NULL);
+	assert(create_cb != NULL);
+	assert(destroy_cb != NULL);
+
+	dev = calloc(1, sizeof(struct io_device));
+	if (dev == NULL) {
+		SPDK_ERRLOG("could not allocate io_device\n");
+		return;
+	}
+
+	dev->io_device = io_device;
+	if (name) {
+		dev->name = strdup(name);
+	} else {
+		dev->name = spdk_sprintf_alloc("%p", dev);
+	}
+	dev->create_cb = create_cb;
+	dev->destroy_cb = destroy_cb;
+	dev->unregister_cb = NULL;
+	dev->ctx_size = ctx_size;
+	dev->for_each_count = 0;
+	dev->unregistered = false;
+	dev->refcnt = 0;
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Registering io_device %s (%p) on thread %s\n",
+		      dev->name, dev->io_device, spdk_get_thread()->name);
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	TAILQ_FOREACH(tmp, &g_io_devices, tailq) {
+		if (tmp->io_device == io_device) {
+			SPDK_ERRLOG("io_device %p already registered\n", io_device);
+			free(dev->name);
+			free(dev);
+			pthread_mutex_unlock(&g_devlist_mutex);
+			return;
+		}
+	}
+	TAILQ_INSERT_TAIL(&g_io_devices, dev, tailq);
+	pthread_mutex_unlock(&g_devlist_mutex);
+}
+
+static void
+_finish_unregister(void *arg)
+{
+	struct io_device *dev = arg;
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Finishing unregistration of io_device %s (%p) on thread %s\n",
+		      dev->name, dev->io_device, dev->unregister_thread->name);
+
+	dev->unregister_cb(dev->io_device);
+	free(dev->name);
+	free(dev);
+}
+
+static void
+_spdk_io_device_free(struct io_device *dev)
+{
+	if (dev->unregister_cb == NULL) {
+		free(dev->name);
+		free(dev);
+	} else {
+		assert(dev->unregister_thread != NULL);
+		SPDK_DEBUGLOG(SPDK_LOG_THREAD, "io_device %s (%p) needs to unregister from thread %s\n",
+			      dev->name, dev->io_device, dev->unregister_thread->name);
+		spdk_thread_send_msg(dev->unregister_thread, _finish_unregister, dev);
+	}
+}
+
+void
+spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb)
+{
+	struct io_device *dev;
+	uint32_t refcnt;
+	struct spdk_thread *thread;
+
+	thread = spdk_get_thread();
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	TAILQ_FOREACH(dev, &g_io_devices, tailq) {
+		if (dev->io_device == io_device) {
+			break;
+		}
+	}
+
+	if (!dev) {
+		SPDK_ERRLOG("io_device %p not found\n", io_device);
+		assert(false);
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return;
+	}
+
+	if (dev->for_each_count > 0) {
+		SPDK_ERRLOG("io_device %p has %u for_each calls outstanding\n", io_device, dev->for_each_count);
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return;
+	}
+
+	dev->unregister_cb = unregister_cb;
+	dev->unregistered = true;
+	TAILQ_REMOVE(&g_io_devices, dev, tailq);
+	refcnt = dev->refcnt;
+	dev->unregister_thread = thread;
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Unregistering io_device %s (%p) from thread %s\n",
+		      dev->name, dev->io_device, thread->name);
+
+	if (refcnt > 0) {
+		/* defer deletion */
+		return;
+	}
+
+	_spdk_io_device_free(dev);
+}
+
+struct spdk_io_channel *
+spdk_get_io_channel(void *io_device)
+{
+	struct spdk_io_channel *ch;
+	struct spdk_thread *thread;
+	struct io_device *dev;
+	int rc;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	TAILQ_FOREACH(dev, &g_io_devices, tailq) {
+		if (dev->io_device == io_device) {
+			break;
+		}
+	}
+	if (dev == NULL) {
+		SPDK_ERRLOG("could not find io_device %p\n", io_device);
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	thread = _get_thread();
+	if (!thread) {
+		SPDK_ERRLOG("No thread allocated\n");
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	TAILQ_FOREACH(ch, &thread->io_channels, tailq) {
+		if (ch->dev == dev) {
+			ch->ref++;
+
+			SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
+				      ch, dev->name, dev->io_device, thread->name, ch->ref);
+
+			/*
+			 * An I/O channel already exists for this device on this
+			 *  thread, so return it.
+			 */
+			pthread_mutex_unlock(&g_devlist_mutex);
+			return ch;
+		}
+	}
+
+	ch = calloc(1, sizeof(*ch) + dev->ctx_size);
+	if (ch == NULL) {
+		SPDK_ERRLOG("could not calloc spdk_io_channel\n");
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	ch->dev = dev;
+	ch->destroy_cb = dev->destroy_cb;
+	ch->thread = thread;
+	ch->ref = 1;
+	ch->destroy_ref = 0;
+	TAILQ_INSERT_TAIL(&thread->io_channels, ch, tailq);
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
+		      ch, dev->name, dev->io_device, thread->name, ch->ref);
+
+	dev->refcnt++;
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
+	if (rc == -1) {
+		pthread_mutex_lock(&g_devlist_mutex);
+		TAILQ_REMOVE(&ch->thread->io_channels, ch, tailq);
+		dev->refcnt--;
+		free(ch);
+		pthread_mutex_unlock(&g_devlist_mutex);
+		return NULL;
+	}
+
+	return ch;
+}
+
+static void
+_spdk_put_io_channel(void *arg)
+{
+	struct spdk_io_channel *ch = arg;
+	bool do_remove_dev = true;
+
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD,
+		      "Releasing io_channel %p for io_device %s (%p). Channel thread %p. Current thread %s\n",
+		      ch, ch->dev->name, ch->dev->io_device, ch->thread, spdk_get_thread()->name);
+
+	assert(ch->thread == spdk_get_thread());
+
+	ch->destroy_ref--;
+
+	if (ch->ref > 0 || ch->destroy_ref > 0) {
+		/*
+		 * Another reference to the associated io_device was requested
+		 *  after this message was sent but before it had a chance to
+		 *  execute.
+		 */
+		return;
+	}
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	TAILQ_REMOVE(&ch->thread->io_channels, ch, tailq);
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	/* Don't hold the devlist mutex while the destroy_cb is called. */
+	ch->destroy_cb(ch->dev->io_device, spdk_io_channel_get_ctx(ch));
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	ch->dev->refcnt--;
+
+	if (!ch->dev->unregistered) {
+		do_remove_dev = false;
+	}
+
+	if (ch->dev->refcnt > 0) {
+		do_remove_dev = false;
+	}
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	if (do_remove_dev) {
+		_spdk_io_device_free(ch->dev);
+	}
+	free(ch);
+}
+
+void
+spdk_put_io_channel(struct spdk_io_channel *ch)
+{
+	SPDK_DEBUGLOG(SPDK_LOG_THREAD,
+		      "Putting io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
+		      ch, ch->dev->name, ch->dev->io_device, ch->thread->name, ch->ref);
+
+	ch->ref--;
+
+	if (ch->ref == 0) {
+		ch->destroy_ref++;
+		spdk_thread_send_msg(ch->thread, _spdk_put_io_channel, ch);
+	}
+}
+
+struct spdk_io_channel *
+spdk_io_channel_from_ctx(void *ctx)
+{
+	return (struct spdk_io_channel *)((uint8_t *)ctx - sizeof(struct spdk_io_channel));
+}
+
+struct spdk_thread *
+spdk_io_channel_get_thread(struct spdk_io_channel *ch)
+{
+	return ch->thread;
+}
+
+struct spdk_io_channel_iter {
+	void *io_device;
+	struct io_device *dev;
+	spdk_channel_msg fn;
+	int status;
+	void *ctx;
+	struct spdk_io_channel *ch;
+
+	struct spdk_thread *cur_thread;
+
+	struct spdk_thread *orig_thread;
+	spdk_channel_for_each_cpl cpl;
+};
+
+void *
+spdk_io_channel_iter_get_io_device(struct spdk_io_channel_iter *i)
+{
+	return i->io_device;
+}
+
+struct spdk_io_channel *
+spdk_io_channel_iter_get_channel(struct spdk_io_channel_iter *i)
+{
+	return i->ch;
+}
+
+void *
+spdk_io_channel_iter_get_ctx(struct spdk_io_channel_iter *i)
+{
+	return i->ctx;
+}
+
+static void
+_call_completion(void *ctx)
+{
+	struct spdk_io_channel_iter *i = ctx;
+
+	if (i->cpl != NULL) {
+		i->cpl(i, i->status);
+	}
+	free(i);
+}
+
+static void
+_call_channel(void *ctx)
+{
+	struct spdk_io_channel_iter *i = ctx;
+	struct spdk_io_channel *ch;
+
+	/*
+	 * It is possible that the channel was deleted before this
+	 *  message had a chance to execute.  If so, skip calling
+	 *  the fn() on this thread.
+	 */
+	pthread_mutex_lock(&g_devlist_mutex);
+	TAILQ_FOREACH(ch, &i->cur_thread->io_channels, tailq) {
+		if (ch->dev->io_device == i->io_device) {
+			break;
+		}
+	}
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	if (ch) {
+		i->fn(i);
+	} else {
+		spdk_for_each_channel_continue(i, 0);
+	}
+}
+
+void
+spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx,
+		      spdk_channel_for_each_cpl cpl)
+{
+	struct spdk_thread *thread;
+	struct spdk_io_channel *ch;
+	struct spdk_io_channel_iter *i;
+
+	i = calloc(1, sizeof(*i));
+	if (!i) {
+		SPDK_ERRLOG("Unable to allocate iterator\n");
+		return;
+	}
+
+	i->io_device = io_device;
+	i->fn = fn;
+	i->ctx = ctx;
+	i->cpl = cpl;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	i->orig_thread = _get_thread();
+
+	TAILQ_FOREACH(thread, &g_threads, tailq) {
+		TAILQ_FOREACH(ch, &thread->io_channels, tailq) {
+			if (ch->dev->io_device == io_device) {
+				ch->dev->for_each_count++;
+				i->dev = ch->dev;
+				i->cur_thread = thread;
+				i->ch = ch;
+				pthread_mutex_unlock(&g_devlist_mutex);
+				spdk_thread_send_msg(thread, _call_channel, i);
+				return;
+			}
+		}
+	}
+
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	spdk_thread_send_msg(i->orig_thread, _call_completion, i);
+}
+
+void
+spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
+{
+	struct spdk_thread *thread;
+	struct spdk_io_channel *ch;
+
+	assert(i->cur_thread == spdk_get_thread());
+
+	i->status = status;
+
+	pthread_mutex_lock(&g_devlist_mutex);
+	if (status) {
+		goto end;
+	}
+	thread = TAILQ_NEXT(i->cur_thread, tailq);
+	while (thread) {
+		TAILQ_FOREACH(ch, &thread->io_channels, tailq) {
+			if (ch->dev->io_device == i->io_device) {
+				i->cur_thread = thread;
+				i->ch = ch;
+				pthread_mutex_unlock(&g_devlist_mutex);
+				spdk_thread_send_msg(thread, _call_channel, i);
+				return;
+			}
+		}
+		thread = TAILQ_NEXT(thread, tailq);
+	}
+
+end:
+	i->dev->for_each_count--;
+	i->ch = NULL;
+	pthread_mutex_unlock(&g_devlist_mutex);
+
+	spdk_thread_send_msg(i->orig_thread, _call_completion, i);
+}
+
+
+SPDK_LOG_REGISTER_COMPONENT("thread", SPDK_LOG_THREAD)
diff --git a/src/spdk/lib/trace/Makefile b/src/spdk/lib/trace/Makefile
new file mode 100644
index 00000000..8bd9ec17
--- /dev/null
+++ b/src/spdk/lib/trace/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = trace.c trace_flags.c
+LIBNAME = trace
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/trace/trace.c b/src/spdk/lib/trace/trace.c
new file mode 100644
index 00000000..8981bcbd
--- /dev/null
+++ b/src/spdk/lib/trace/trace.c
@@ -0,0 +1,168 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/string.h"
+#include "spdk/trace.h"
+
+static int g_trace_fd = -1;
+static char g_shm_name[64];
+
+struct spdk_trace_histories *g_trace_histories;
+
+void
+_spdk_trace_record(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id, uint32_t size,
+		   uint64_t object_id, uint64_t arg1)
+{
+	struct spdk_trace_history *lcore_history;
+	struct spdk_trace_entry *next_entry;
+	unsigned lcore;
+
+	lcore = spdk_env_get_current_core();
+	if (lcore >= SPDK_TRACE_MAX_LCORE) {
+		return;
+	}
+
+	lcore_history = &g_trace_histories->per_lcore_history[lcore];
+	if (tsc == 0) {
+		tsc = spdk_get_ticks();
+	}
+
+	lcore_history->tpoint_count[tpoint_id]++;
+
+	next_entry = &lcore_history->entries[lcore_history->next_entry];
+	next_entry->tsc = tsc;
+	next_entry->tpoint_id = tpoint_id;
+	next_entry->poller_id = poller_id;
+	next_entry->size = size;
+	next_entry->object_id = object_id;
+	next_entry->arg1 = arg1;
+
+	lcore_history->next_entry++;
+	if (lcore_history->next_entry == SPDK_TRACE_SIZE) {
+		lcore_history->next_entry = 0;
+	}
+}
+
+int
+spdk_trace_init(const char *shm_name)
+{
+	int i = 0;
+
+	snprintf(g_shm_name, sizeof(g_shm_name), "%s", shm_name);
+
+	g_trace_fd = shm_open(shm_name, O_RDWR | O_CREAT, 0600);
+	if (g_trace_fd == -1) {
+		fprintf(stderr, "could not shm_open spdk_trace\n");
+		fprintf(stderr, "errno=%d %s\n", errno, spdk_strerror(errno));
+		return 1;
+	}
+
+	if (ftruncate(g_trace_fd, sizeof(*g_trace_histories)) != 0) {
+		fprintf(stderr, "could not truncate shm\n");
+		goto trace_init_err;
+	}
+
+	g_trace_histories = mmap(NULL, sizeof(*g_trace_histories), PROT_READ | PROT_WRITE,
+				 MAP_SHARED, g_trace_fd, 0);
+	if (g_trace_histories == MAP_FAILED) {
+		fprintf(stderr, "could not mmap shm\n");
+		goto trace_init_err;
+	}
+
+	/* TODO: On FreeBSD, mlock on shm_open'd memory doesn't seem to work.  Docs say that kern.ipc.shm_use_phys=1
+	 * should allow it, but forcing that doesn't seem to work either.  So for now just skip mlock on FreeBSD
+	 * altogether.
+	 */
+#if defined(__linux__)
+	if (mlock(g_trace_histories, sizeof(*g_trace_histories)) != 0) {
+		fprintf(stderr, "Could not mlock shm for tracing - %s.\n", spdk_strerror(errno));
+		if (errno == ENOMEM) {
+			fprintf(stderr, "Check /dev/shm for old tracing files that can be deleted.\n");
+		}
+		goto trace_init_err;
+	}
+#endif
+
+	memset(g_trace_histories, 0, sizeof(*g_trace_histories));
+
+	g_trace_flags = &g_trace_histories->flags;
+
+	g_trace_flags->tsc_rate = spdk_get_ticks_hz();
+
+	for (i = 0; i < SPDK_TRACE_MAX_LCORE; i++) {
+		g_trace_histories->per_lcore_history[i].lcore = i;
+	}
+
+	spdk_trace_flags_init();
+
+	return 0;
+
+trace_init_err:
+	if (g_trace_histories != MAP_FAILED) {
+		munmap(g_trace_histories, sizeof(*g_trace_histories));
+	}
+	close(g_trace_fd);
+	g_trace_fd = -1;
+	shm_unlink(shm_name);
+	g_trace_histories = NULL;
+
+	return 1;
+
+}
+
+void
+spdk_trace_cleanup(void)
+{
+	bool unlink;
+
+	if (g_trace_histories == NULL) {
+		return;
+	}
+
+	/*
+	 * Only unlink the shm if there were no tracepoints enabled.  This ensures the file
+	 * can be used after this process exits/crashes for debugging.
+	 * Note that we have to calculate this value before g_trace_histories gets unmapped.
+	 */
+	unlink = spdk_mem_all_zero(g_trace_flags->tpoint_mask, sizeof(g_trace_flags->tpoint_mask));
+	munmap(g_trace_histories, sizeof(struct spdk_trace_histories));
+	g_trace_histories = NULL;
+	close(g_trace_fd);
+
+	if (unlink) {
+		shm_unlink(g_shm_name);
+	}
+}
diff --git a/src/spdk/lib/trace/trace_flags.c b/src/spdk/lib/trace/trace_flags.c
new file mode 100644
index 00000000..69ca0bdf
--- /dev/null
+++ b/src/spdk/lib/trace/trace_flags.c
@@ -0,0 +1,179 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/trace.h"
+#include "spdk/log.h"
+
+struct spdk_trace_flags *g_trace_flags = NULL;
+static struct spdk_trace_register_fn *g_reg_fn_head = NULL;
+
+uint64_t
+spdk_trace_get_tpoint_mask(uint32_t group_id)
+{
+	if (group_id >= SPDK_TRACE_MAX_GROUP_ID) {
+		SPDK_ERRLOG("%s: invalid group ID %d\n", __func__, group_id);
+		return 0ULL;
+	}
+
+	return g_trace_flags->tpoint_mask[group_id];
+}
+
+void
+spdk_trace_set_tpoints(uint32_t group_id, uint64_t tpoint_mask)
+{
+	if (group_id >= SPDK_TRACE_MAX_GROUP_ID) {
+		SPDK_ERRLOG("%s: invalid group ID %d\n", __func__, group_id);
+		return;
+	}
+
+	g_trace_flags->tpoint_mask[group_id] |= tpoint_mask;
+}
+
+void
+spdk_trace_clear_tpoints(uint32_t group_id, uint64_t tpoint_mask)
+{
+	if (group_id >= SPDK_TRACE_MAX_GROUP_ID) {
+		SPDK_ERRLOG("%s: invalid group ID %d\n", __func__, group_id);
+		return;
+	}
+
+	g_trace_flags->tpoint_mask[group_id] &= ~tpoint_mask;
+}
+
+uint64_t
+spdk_trace_get_tpoint_group_mask(void)
+{
+	uint64_t mask = 0x0;
+	int i;
+
+	for (i = 0; i < SPDK_TRACE_MAX_GROUP_ID; i++) {
+		if (spdk_trace_get_tpoint_mask(i) != 0) {
+			mask |= (1ULL << i);
+		}
+	}
+
+	return mask;
+}
+
+void
+spdk_trace_set_tpoint_group_mask(uint64_t tpoint_group_mask)
+{
+	int i;
+
+	for (i = 0; i < SPDK_TRACE_MAX_GROUP_ID; i++) {
+		if (tpoint_group_mask & (1ULL << i)) {
+			spdk_trace_set_tpoints(i, -1ULL);
+		}
+	}
+}
+
+void
+spdk_trace_register_owner(uint8_t type, char id_prefix)
+{
+	struct spdk_trace_owner *owner;
+
+	assert(type != OWNER_NONE);
+
+	/* 'owner' has 256 entries and since 'type' is a uint8_t, it
+	 * can't overrun the array.
+	 */
+	owner = &g_trace_flags->owner[type];
+	assert(owner->type == 0);
+
+	owner->type = type;
+	owner->id_prefix = id_prefix;
+}
+
+void
+spdk_trace_register_object(uint8_t type, char id_prefix)
+{
+	struct spdk_trace_object *object;
+
+	assert(type != OBJECT_NONE);
+
+	/* 'object' has 256 entries and since 'type' is a uint8_t, it
+	 * can't overrun the array.
+	 */
+	object = &g_trace_flags->object[type];
+	assert(object->type == 0);
+
+	object->type = type;
+	object->id_prefix = id_prefix;
+}
+
+void
+spdk_trace_register_description(const char *name, const char *short_name,
+				uint16_t tpoint_id, uint8_t owner_type,
+				uint8_t object_type, uint8_t new_object,
+				uint8_t arg1_is_ptr, const char *arg1_name)
+{
+	struct spdk_trace_tpoint *tpoint;
+
+	assert(tpoint_id != 0);
+	assert(tpoint_id < SPDK_TRACE_MAX_TPOINT_ID);
+
+	tpoint = &g_trace_flags->tpoint[tpoint_id];
+	assert(tpoint->tpoint_id == 0);
+
+	snprintf(tpoint->name, sizeof(tpoint->name), "%s", name);
+	snprintf(tpoint->short_name, sizeof(tpoint->short_name), "%s", short_name);
+	tpoint->tpoint_id = tpoint_id;
+	tpoint->object_type = object_type;
+	tpoint->owner_type = owner_type;
+	tpoint->new_object = new_object;
+	tpoint->arg1_is_ptr = arg1_is_ptr;
+	snprintf(tpoint->arg1_name, sizeof(tpoint->arg1_name), "%s", arg1_name);
+}
+
+void
+spdk_trace_add_register_fn(struct spdk_trace_register_fn *reg_fn)
+{
+	reg_fn->next = g_reg_fn_head;
+	g_reg_fn_head = reg_fn;
+}
+
+
+void
+spdk_trace_flags_init(void)
+{
+	struct spdk_trace_register_fn *reg_fn;
+
+	reg_fn = g_reg_fn_head;
+	while (reg_fn) {
+		reg_fn->reg_fn();
+		reg_fn = reg_fn->next;
+	}
+}
diff --git a/src/spdk/lib/ut_mock/Makefile b/src/spdk/lib/ut_mock/Makefile
new file mode 100644
index 00000000..99584181
--- /dev/null
+++ b/src/spdk/lib/ut_mock/Makefile
@@ -0,0 +1,40 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = mock.c
+LIBNAME = spdk_mock
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/ut_mock/mock.c b/src/spdk/lib/ut_mock/mock.c
new file mode 100644
index 00000000..6d141b40
--- /dev/null
+++ b/src/spdk/lib/ut_mock/mock.c
@@ -0,0 +1,45 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk_internal/mock.h"
+
+DEFINE_WRAPPER(calloc, void *, (size_t nmemb, size_t size), (nmemb, size))
+
+DEFINE_WRAPPER(pthread_mutex_init, int,
+	       (pthread_mutex_t *mtx, const pthread_mutexattr_t *attr),
+	       (mtx, attr))
+
+DEFINE_WRAPPER(pthread_mutexattr_init, int,
+	       (pthread_mutexattr_t *attr), (attr))
+
+DEFINE_WRAPPER(pthread_self, pthread_t, (void), ())
diff --git a/src/spdk/lib/util/Makefile b/src/spdk/lib/util/Makefile
new file mode 100644
index 00000000..c31a506b
--- /dev/null
+++ b/src/spdk/lib/util/Makefile
@@ -0,0 +1,41 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = base64.c bit_array.c cpuset.c crc16.c crc32.c crc32c.c crc32_ieee.c fd.c strerror_tls.c string.c uuid.c
+LIBNAME = util
+LOCAL_SYS_LIBS = -luuid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/util/base64.c b/src/spdk/lib/util/base64.c
new file mode 100644
index 00000000..81361263
--- /dev/null
+++ b/src/spdk/lib/util/base64.c
@@ -0,0 +1,228 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/endian.h"
+#include "spdk/base64.h"
+
+#define BASE64_ENC_BITMASK 0x3FUL
+#define BASE64_PADDING_CHAR '='
+
+static const char base64_enc_table[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	"abcdefghijklmnopqrstuvwxyz"
+	"0123456789+/";
+
+static const char base64_urfsafe_enc_table[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	"abcdefghijklmnopqrstuvwxyz"
+	"0123456789-_";
+
+static const uint8_t
+base64_dec_table[] = {
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,
+	52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 255, 255, 255,
+	255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+	15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,
+	255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+	41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static const uint8_t
+base64_urlsafe_dec_table[] = {
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255, 255,
+	52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 255, 255, 255,
+	255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+	15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255,  63,
+	255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+	41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static int
+_spdk_base64_encode(char *dst, const char *enc_table, const void *src, size_t src_len)
+{
+	uint32_t raw_u32;
+
+	if (!dst || !src || src_len <= 0) {
+		return -EINVAL;
+	}
+
+	while (src_len >= 4) {
+		raw_u32 = from_be32(src);
+
+		*dst++ = enc_table[(raw_u32 >> 26) & BASE64_ENC_BITMASK];
+		*dst++ = enc_table[(raw_u32 >> 20) & BASE64_ENC_BITMASK];
+		*dst++ = enc_table[(raw_u32 >> 14) & BASE64_ENC_BITMASK];
+		*dst++ = enc_table[(raw_u32 >> 8) & BASE64_ENC_BITMASK];
+
+		src_len -= 3;
+		src += 3;
+	}
+
+	if (src_len == 0) {
+		goto out;
+	}
+
+	raw_u32 = 0;
+	memcpy(&raw_u32, src, src_len);
+	raw_u32 = from_be32(&raw_u32);
+
+	*dst++ = enc_table[(raw_u32 >> 26) & BASE64_ENC_BITMASK];
+	*dst++ = enc_table[(raw_u32 >> 20) & BASE64_ENC_BITMASK];
+	*dst++ = (src_len >= 2) ? enc_table[(raw_u32 >> 14) & BASE64_ENC_BITMASK] : BASE64_PADDING_CHAR;
+	*dst++ = (src_len == 3) ? enc_table[(raw_u32 >> 8) & BASE64_ENC_BITMASK] : BASE64_PADDING_CHAR;
+
+out:
+	*dst = '\0';
+
+	return 0;
+}
+
+int
+spdk_base64_encode(char *dst, const void *src, size_t src_len)
+{
+	return _spdk_base64_encode(dst, base64_enc_table, src, src_len);
+}
+
+int
+spdk_base64_urlsafe_encode(char *dst, const void *src, size_t src_len)
+{
+	return _spdk_base64_encode(dst, base64_urfsafe_enc_table, src, src_len);
+}
+
+static int
+_spdk_base64_decode(void *dst, size_t *_dst_len, const uint8_t *dec_table, const char *src)
+{
+	size_t src_strlen, dst_len;
+	size_t tail_len = 0;
+	const uint8_t *src_in;
+	uint32_t tmp[4];
+	int i;
+
+	if (!dst || !src) {
+		return -EINVAL;
+	}
+
+	src_strlen = strlen(src);
+
+	/* strlen of src should be 4n */
+	if (src_strlen == 0 || src_strlen % 4 != 0) {
+		return -EINVAL;
+	}
+
+	/* Consider Base64 padding, it at most has 2 padding characters. */
+	for (i = 0; i < 2; i++) {
+		if (src[src_strlen - 1] != BASE64_PADDING_CHAR) {
+			break;
+		}
+		src_strlen--;
+	}
+
+	/* strlen of src without padding shouldn't be 4n+1 */
+	if (src_strlen == 0 || src_strlen % 4 == 1) {
+		return -EINVAL;
+	}
+
+	dst_len = spdk_base64_get_decoded_len(src_strlen);
+	src_in = (const uint8_t *) src;
+
+	/* space of dst can be used by to_be32 */
+	while (src_strlen > 4) {
+		tmp[0] = dec_table[*src_in++];
+		tmp[1] = dec_table[*src_in++];
+		tmp[2] = dec_table[*src_in++];
+		tmp[3] = dec_table[*src_in++];
+
+		if (tmp[0] == 255 || tmp[1] == 255 || tmp[2] == 255 || tmp[3] == 255) {
+			return -EINVAL;
+		}
+
+		to_be32(dst, tmp[3] << 8 | tmp[2] << 14 | tmp[1] << 20 | tmp[0] << 26);
+
+		dst += 3;
+		src_strlen -= 4;
+	}
+
+	/* space of dst is not enough to be used by to_be32 */
+	tmp[0] = dec_table[src_in[0]];
+	tmp[1] = dec_table[src_in[1]];
+	tmp[2] = (src_strlen >= 3) ? dec_table[src_in[2]] : 0;
+	tmp[3] = (src_strlen == 4) ? dec_table[src_in[3]] : 0;
+	tail_len = src_strlen - 1;
+
+	if (tmp[0] == 255 || tmp[1] == 255 || tmp[2] == 255 || tmp[3] == 255) {
+		return -EINVAL;
+	}
+
+	to_be32(&tmp[3], tmp[3] << 8 | tmp[2] << 14 | tmp[1] << 20 | tmp[0] << 26);
+	memcpy(dst, (uint8_t *)&tmp[3], tail_len);
+
+	/* Assign pointers */
+	if (_dst_len) {
+		*_dst_len = dst_len;
+	}
+
+	return 0;
+}
+
+int
+spdk_base64_decode(void *dst, size_t *dst_len, const char *src)
+{
+	return _spdk_base64_decode(dst, dst_len, base64_dec_table, src);
+}
+
+int
+spdk_base64_urlsafe_decode(void *dst, size_t *dst_len, const char *src)
+{
+	return _spdk_base64_decode(dst, dst_len, base64_urlsafe_dec_table, src);
+}
diff --git a/src/spdk/lib/util/bit_array.c b/src/spdk/lib/util/bit_array.c
new file mode 100644
index 00000000..d6c112f7
--- /dev/null
+++ b/src/spdk/lib/util/bit_array.c
@@ -0,0 +1,313 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bit_array.h"
+#include "spdk/env.h"
+
+#include "spdk/likely.h"
+#include "spdk/util.h"
+
+typedef uint64_t spdk_bit_array_word;
+#define SPDK_BIT_ARRAY_WORD_TZCNT(x)	(__builtin_ctzll(x))
+#define SPDK_BIT_ARRAY_WORD_POPCNT(x)	(__builtin_popcountll(x))
+#define SPDK_BIT_ARRAY_WORD_C(x)	((spdk_bit_array_word)(x))
+#define SPDK_BIT_ARRAY_WORD_BYTES	sizeof(spdk_bit_array_word)
+#define SPDK_BIT_ARRAY_WORD_BITS	(SPDK_BIT_ARRAY_WORD_BYTES * 8)
+#define SPDK_BIT_ARRAY_WORD_INDEX_SHIFT	spdk_u32log2(SPDK_BIT_ARRAY_WORD_BITS)
+#define SPDK_BIT_ARRAY_WORD_INDEX_MASK	((1u << SPDK_BIT_ARRAY_WORD_INDEX_SHIFT) - 1)
+
+struct spdk_bit_array {
+	uint32_t bit_count;
+	spdk_bit_array_word words[];
+};
+
+struct spdk_bit_array *
+spdk_bit_array_create(uint32_t num_bits)
+{
+	struct spdk_bit_array *ba = NULL;
+
+	spdk_bit_array_resize(&ba, num_bits);
+
+	return ba;
+}
+
+void
+spdk_bit_array_free(struct spdk_bit_array **bap)
+{
+	struct spdk_bit_array *ba;
+
+	if (!bap) {
+		return;
+	}
+
+	ba = *bap;
+	*bap = NULL;
+	spdk_dma_free(ba);
+}
+
+static inline uint32_t
+spdk_bit_array_word_count(uint32_t num_bits)
+{
+	return (num_bits + SPDK_BIT_ARRAY_WORD_BITS - 1) >> SPDK_BIT_ARRAY_WORD_INDEX_SHIFT;
+}
+
+static inline spdk_bit_array_word
+spdk_bit_array_word_mask(uint32_t num_bits)
+{
+	assert(num_bits < SPDK_BIT_ARRAY_WORD_BITS);
+	return (SPDK_BIT_ARRAY_WORD_C(1) << num_bits) - 1;
+}
+
+int
+spdk_bit_array_resize(struct spdk_bit_array **bap, uint32_t num_bits)
+{
+	struct spdk_bit_array *new_ba;
+	uint32_t old_word_count, new_word_count;
+	size_t new_size;
+
+	/*
+	 * Max number of bits allowed is UINT32_MAX - 1, because we use UINT32_MAX to denote
+	 * when a set or cleared bit cannot be found.
+	 */
+	if (!bap || num_bits == UINT32_MAX) {
+		return -EINVAL;
+	}
+
+	new_word_count = spdk_bit_array_word_count(num_bits);
+	new_size = offsetof(struct spdk_bit_array, words) + new_word_count * SPDK_BIT_ARRAY_WORD_BYTES;
+
+	/*
+	 * Always keep one extra word with a 0 and a 1 past the actual required size so that the
+	 * find_first functions can just keep going until they match.
+	 */
+	new_size += SPDK_BIT_ARRAY_WORD_BYTES;
+
+	new_ba = (struct spdk_bit_array *)spdk_dma_realloc(*bap, new_size, 64, NULL);
+	if (!new_ba) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Set up special extra word (see above comment about find_first_clear).
+	 *
+	 * This is set to 0b10 so that find_first_clear will find a 0 at the very first
+	 * bit past the end of the buffer, and find_first_set will find a 1 at the next bit
+	 * past that.
+	 */
+	new_ba->words[new_word_count] = 0x2;
+
+	if (*bap == NULL) {
+		old_word_count = 0;
+		new_ba->bit_count = 0;
+	} else {
+		old_word_count = spdk_bit_array_word_count(new_ba->bit_count);
+	}
+
+	if (new_word_count > old_word_count) {
+		/* Zero out new entries */
+		memset(&new_ba->words[old_word_count], 0,
+		       (new_word_count - old_word_count) * SPDK_BIT_ARRAY_WORD_BYTES);
+	} else if (new_word_count == old_word_count && num_bits < new_ba->bit_count) {
+		/* Make sure any existing partial last word is cleared beyond the new num_bits. */
+		uint32_t last_word_bits;
+		spdk_bit_array_word mask;
+
+		last_word_bits = num_bits & SPDK_BIT_ARRAY_WORD_INDEX_MASK;
+		mask = spdk_bit_array_word_mask(last_word_bits);
+		new_ba->words[old_word_count - 1] &= mask;
+	}
+
+	new_ba->bit_count = num_bits;
+	*bap = new_ba;
+	return 0;
+}
+
+uint32_t
+spdk_bit_array_capacity(const struct spdk_bit_array *ba)
+{
+	return ba->bit_count;
+}
+
+static inline int
+_spdk_bit_array_get_word(const struct spdk_bit_array *ba, uint32_t bit_index,
+			 uint32_t *word_index, uint32_t *word_bit_index)
+{
+	if (spdk_unlikely(bit_index >= ba->bit_count)) {
+		return -EINVAL;
+	}
+
+	*word_index = bit_index >> SPDK_BIT_ARRAY_WORD_INDEX_SHIFT;
+	*word_bit_index = bit_index & SPDK_BIT_ARRAY_WORD_INDEX_MASK;
+
+	return 0;
+}
+
+bool
+spdk_bit_array_get(const struct spdk_bit_array *ba, uint32_t bit_index)
+{
+	uint32_t word_index, word_bit_index;
+
+	if (_spdk_bit_array_get_word(ba, bit_index, &word_index, &word_bit_index)) {
+		return false;
+	}
+
+	return (ba->words[word_index] >> word_bit_index) & 1U;
+}
+
+int
+spdk_bit_array_set(struct spdk_bit_array *ba, uint32_t bit_index)
+{
+	uint32_t word_index, word_bit_index;
+
+	if (_spdk_bit_array_get_word(ba, bit_index, &word_index, &word_bit_index)) {
+		return -EINVAL;
+	}
+
+	ba->words[word_index] |= (SPDK_BIT_ARRAY_WORD_C(1) << word_bit_index);
+	return 0;
+}
+
+void
+spdk_bit_array_clear(struct spdk_bit_array *ba, uint32_t bit_index)
+{
+	uint32_t word_index, word_bit_index;
+
+	if (_spdk_bit_array_get_word(ba, bit_index, &word_index, &word_bit_index)) {
+		/*
+		 * Clearing past the end of the bit array is a no-op, since bit past the end
+		 * are implicitly 0.
+		 */
+		return;
+	}
+
+	ba->words[word_index] &= ~(SPDK_BIT_ARRAY_WORD_C(1) << word_bit_index);
+}
+
+static inline uint32_t
+_spdk_bit_array_find_first(const struct spdk_bit_array *ba, uint32_t start_bit_index,
+			   spdk_bit_array_word xor_mask)
+{
+	uint32_t word_index, first_word_bit_index;
+	spdk_bit_array_word word, first_word_mask;
+	const spdk_bit_array_word *words, *cur_word;
+
+	if (spdk_unlikely(start_bit_index >= ba->bit_count)) {
+		return ba->bit_count;
+	}
+
+	word_index = start_bit_index >> SPDK_BIT_ARRAY_WORD_INDEX_SHIFT;
+	words = ba->words;
+	cur_word = &words[word_index];
+
+	/*
+	 * Special case for first word: skip start_bit_index % SPDK_BIT_ARRAY_WORD_BITS bits
+	 * within the first word.
+	 */
+	first_word_bit_index = start_bit_index & SPDK_BIT_ARRAY_WORD_INDEX_MASK;
+	first_word_mask = spdk_bit_array_word_mask(first_word_bit_index);
+
+	word = (*cur_word ^ xor_mask) & ~first_word_mask;
+
+	/*
+	 * spdk_bit_array_resize() guarantees that an extra word with a 1 and a 0 will always be
+	 * at the end of the words[] array, so just keep going until a word matches.
+	 */
+	while (word == 0) {
+		word = *++cur_word ^ xor_mask;
+	}
+
+	return ((uintptr_t)cur_word - (uintptr_t)words) * 8 + SPDK_BIT_ARRAY_WORD_TZCNT(word);
+}
+
+
+uint32_t
+spdk_bit_array_find_first_set(const struct spdk_bit_array *ba, uint32_t start_bit_index)
+{
+	uint32_t bit_index;
+
+	bit_index = _spdk_bit_array_find_first(ba, start_bit_index, 0);
+
+	/*
+	 * If we ran off the end of the array and found the 1 bit in the extra word,
+	 * return UINT32_MAX to indicate no actual 1 bits were found.
+	 */
+	if (bit_index >= ba->bit_count) {
+		bit_index = UINT32_MAX;
+	}
+
+	return bit_index;
+}
+
+uint32_t
+spdk_bit_array_find_first_clear(const struct spdk_bit_array *ba, uint32_t start_bit_index)
+{
+	uint32_t bit_index;
+
+	bit_index = _spdk_bit_array_find_first(ba, start_bit_index, SPDK_BIT_ARRAY_WORD_C(-1));
+
+	/*
+	 * If we ran off the end of the array and found the 0 bit in the extra word,
+	 * return UINT32_MAX to indicate no actual 0 bits were found.
+	 */
+	if (bit_index >= ba->bit_count) {
+		bit_index = UINT32_MAX;
+	}
+
+	return bit_index;
+}
+
+uint32_t
+spdk_bit_array_count_set(const struct spdk_bit_array *ba)
+{
+	const spdk_bit_array_word *cur_word = ba->words;
+	uint32_t word_count = spdk_bit_array_word_count(ba->bit_count);
+	uint32_t set_count = 0;
+
+	while (word_count--) {
+		/*
+		 * No special treatment is needed for the last (potentially partial) word, since
+		 * spdk_bit_array_resize() makes sure the bits past bit_count are cleared.
+		 */
+		set_count += SPDK_BIT_ARRAY_WORD_POPCNT(*cur_word++);
+	}
+
+	return set_count;
+}
+
+uint32_t
+spdk_bit_array_count_clear(const struct spdk_bit_array *ba)
+{
+	return ba->bit_count - spdk_bit_array_count_set(ba);
+}
diff --git a/src/spdk/lib/util/cpuset.c b/src/spdk/lib/util/cpuset.c
new file mode 100644
index 00000000..1a02e59f
--- /dev/null
+++ b/src/spdk/lib/util/cpuset.c
@@ -0,0 +1,320 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/cpuset.h"
+#include "spdk/log.h"
+
+struct spdk_cpuset {
+	char str[SPDK_CPUSET_SIZE / 4];
+	uint8_t cpus[SPDK_CPUSET_SIZE / 8];
+};
+
+struct spdk_cpuset *
+spdk_cpuset_alloc(void)
+{
+	return (struct spdk_cpuset *)calloc(sizeof(struct spdk_cpuset), 1);
+}
+
+void
+spdk_cpuset_free(struct spdk_cpuset *set)
+{
+	free(set);
+}
+
+bool
+spdk_cpuset_equal(const struct spdk_cpuset *set1, const struct spdk_cpuset *set2)
+{
+	assert(set1 != NULL);
+	assert(set2 != NULL);
+	return memcmp(set1->cpus, set2->cpus, sizeof(set2->cpus)) == 0;
+}
+
+void
+spdk_cpuset_copy(struct spdk_cpuset *set1, const struct spdk_cpuset *set2)
+{
+	assert(set1 != NULL);
+	assert(set2 != NULL);
+	memcpy(&set1->cpus, &set2->cpus, sizeof(set2->cpus));
+}
+
+void
+spdk_cpuset_and(struct spdk_cpuset *set1, const struct spdk_cpuset *set2)
+{
+	unsigned int i;
+	assert(set1 != NULL);
+	assert(set2 != NULL);
+	for (i = 0; i < sizeof(set2->cpus); i++) {
+		set1->cpus[i] &= set2->cpus[i];
+	}
+}
+
+void
+spdk_cpuset_or(struct spdk_cpuset *set1, const struct spdk_cpuset *set2)
+{
+	unsigned int i;
+	assert(set1 != NULL);
+	assert(set2 != NULL);
+	for (i = 0; i < sizeof(set2->cpus); i++) {
+		set1->cpus[i] |= set2->cpus[i];
+	}
+}
+
+void
+spdk_cpuset_zero(struct spdk_cpuset *set)
+{
+	assert(set != NULL);
+	memset(set->cpus, 0, sizeof(set->cpus));
+}
+
+void
+spdk_cpuset_set_cpu(struct spdk_cpuset *set, uint32_t cpu, bool state)
+{
+	assert(set != NULL);
+	assert(cpu < sizeof(set->cpus) * 8);
+	if (state) {
+		set->cpus[cpu / 8] |= (1U << (cpu % 8));
+	} else {
+		set->cpus[cpu / 8] &= ~(1U << (cpu % 8));
+	}
+}
+
+bool
+spdk_cpuset_get_cpu(const struct spdk_cpuset *set, uint32_t cpu)
+{
+	assert(set != NULL);
+	assert(cpu < sizeof(set->cpus) * 8);
+	return (set->cpus[cpu / 8] >> (cpu % 8)) & 1U;
+}
+
+uint32_t
+spdk_cpuset_count(const struct spdk_cpuset *set)
+{
+	uint32_t count = 0;
+	uint8_t n;
+	unsigned int i;
+	for (i = 0; i < sizeof(set->cpus); i++) {
+		n = set->cpus[i];
+		while (n) {
+			n &= (n - 1);
+			count++;
+		}
+	}
+	return count;
+}
+
+const char *
+spdk_cpuset_fmt(struct spdk_cpuset *set)
+{
+	uint32_t lcore, lcore_max = 0;
+	int val, i, n;
+	char *ptr;
+	static const char *hex = "0123456789abcdef";
+
+	assert(set != NULL);
+
+	for (lcore = 0; lcore < sizeof(set->cpus) * 8; lcore++) {
+		if (spdk_cpuset_get_cpu(set, lcore)) {
+			lcore_max = lcore;
+		}
+	}
+
+	ptr = set->str;
+	n = lcore_max / 8;
+	val = set->cpus[n];
+
+	/* Store first number only if it is not leading zero */
+	if ((val & 0xf0) != 0) {
+		*(ptr++) = hex[(val & 0xf0) >> 4];
+	}
+	*(ptr++) = hex[val & 0x0f];
+
+	for (i = n - 1; i >= 0; i--) {
+		val = set->cpus[i];
+		*(ptr++) = hex[(val & 0xf0) >> 4];
+		*(ptr++) = hex[val & 0x0f];
+	}
+	*ptr = '\0';
+
+	return set->str;
+}
+
+static int
+hex_value(uint8_t c)
+{
+#define V(x, y) [x] = y + 1
+	static const int8_t val[256] = {
+		V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
+		V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
+		V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
+		V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
+	};
+#undef V
+
+	return val[c] - 1;
+}
+
+static int
+parse_list(const char *mask, struct spdk_cpuset *set)
+{
+	char *end;
+	const char *ptr = mask;
+	uint32_t lcore;
+	uint32_t lcore_min, lcore_max;
+
+	spdk_cpuset_zero(set);
+	lcore_min = UINT32_MAX;
+
+	ptr++;
+	end = (char *)ptr;
+	do {
+		while (isblank(*ptr)) {
+			ptr++;
+		}
+		if (*ptr == '\0' || *ptr == ']' || *ptr == '-' || *ptr == ',') {
+			goto invalid_character;
+		}
+
+		errno = 0;
+		lcore = strtoul(ptr, &end, 10);
+		if (errno) {
+			SPDK_ERRLOG("Conversion of core mask in '%s' failed\n", mask);
+			return -1;
+		}
+
+		if (lcore >= sizeof(set->cpus) * 8) {
+			SPDK_ERRLOG("Core number %" PRIu32 " is out of range in '%s'\n", lcore, mask);
+			return -1;
+		}
+
+		while (isblank(*end)) {
+			end++;
+		}
+
+		if (*end == '-') {
+			lcore_min = lcore;
+		} else if (*end == ',' || *end == ']') {
+			lcore_max = lcore;
+			if (lcore_min == UINT32_MAX) {
+				lcore_min = lcore;
+			}
+			if (lcore_min > lcore_max) {
+				SPDK_ERRLOG("Invalid range of CPUs (%" PRIu32 " > %" PRIu32 ")\n",
+					    lcore_min, lcore_max);
+				return -1;
+			}
+			for (lcore = lcore_min; lcore <= lcore_max; lcore++) {
+				spdk_cpuset_set_cpu(set, lcore, true);
+			}
+			lcore_min = UINT32_MAX;
+		} else {
+			goto invalid_character;
+		}
+
+		ptr = end + 1;
+
+	} while (*end != ']');
+
+	return 0;
+
+invalid_character:
+	if (*end == '\0') {
+		SPDK_ERRLOG("Unexpected end of core list '%s'\n", mask);
+	} else {
+		SPDK_ERRLOG("Parsing of core list '%s' failed on character '%c'\n", mask, *end);
+	}
+	return -1;
+}
+
+static int
+parse_mask(const char *mask, struct spdk_cpuset *set, size_t len)
+{
+	int i, j;
+	char c;
+	int val;
+	uint32_t lcore = 0;
+
+	if (mask[0] == '0' && (mask[1] == 'x' || mask[1] == 'X')) {
+		mask += 2;
+		len -= 2;
+	}
+
+	spdk_cpuset_zero(set);
+	for (i = len - 1; i >= 0; i--) {
+		c = mask[i];
+		val = hex_value(c);
+		if (val < 0) {
+			/* Invalid character */
+			SPDK_ERRLOG("Invalid character in core mask '%s' (%c)\n", mask, c);
+			return -1;
+		}
+		for (j = 0; j < 4 && lcore < sizeof(set->cpus); j++, lcore++) {
+			if ((1 << j) & val) {
+				spdk_cpuset_set_cpu(set, lcore, true);
+			}
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_cpuset_parse(struct spdk_cpuset *set, const char *mask)
+{
+	int ret;
+	size_t len;
+
+	if (mask == NULL || set == NULL) {
+		return -1;
+	}
+
+	while (isblank(*mask)) {
+		mask++;
+	}
+
+	len = strlen(mask);
+	while (len > 0 && isblank(mask[len - 1])) {
+		len--;
+	}
+
+	if (len == 0) {
+		return -1;
+	}
+
+	if (mask[0] == '[') {
+		ret = parse_list(mask, set);
+	} else {
+		ret = parse_mask(mask, set, len);
+	}
+
+	return ret;
+}
diff --git a/src/spdk/lib/util/crc16.c b/src/spdk/lib/util/crc16.c
new file mode 100644
index 00000000..491c9058
--- /dev/null
+++ b/src/spdk/lib/util/crc16.c
@@ -0,0 +1,53 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/crc16.h"
+
+uint16_t
+spdk_crc16_t10dif(const void *buf, size_t len)
+{
+	uint32_t j, rem = 0;
+	const uint8_t *data = (const uint8_t *)buf;
+	size_t i;
+
+	uint16_t poly = SPDK_T10DIF_CRC16_POLYNOMIAL;
+
+	for (i = 0; i < len; i++) {
+		rem = rem ^ (data[i] << 8);
+		for (j = 0; j < 8; j++) {
+			rem = rem << 1;
+			rem = (rem & 0x10000) ? rem ^ poly : rem;
+		}
+	}
+	return (uint16_t)rem;
+}
diff --git a/src/spdk/lib/util/crc32.c b/src/spdk/lib/util/crc32.c
new file mode 100644
index 00000000..dfef9c54
--- /dev/null
+++ b/src/spdk/lib/util/crc32.c
@@ -0,0 +1,66 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/crc32.h"
+
+void
+spdk_crc32_table_init(struct spdk_crc32_table *table, uint32_t polynomial_reflect)
+{
+	int i, j;
+	uint32_t val;
+
+	for (i = 0; i < 256; i++) {
+		val = i;
+		for (j = 0; j < 8; j++) {
+			if (val & 1) {
+				val = (val >> 1) ^ polynomial_reflect;
+			} else {
+				val = (val >> 1);
+			}
+		}
+		table->table[i] = val;
+	}
+}
+
+uint32_t
+spdk_crc32_update(const struct spdk_crc32_table *table, const void *buf, size_t len, uint32_t crc)
+{
+	const uint8_t *buf_u8 = buf;
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		crc = (crc >> 8) ^ table->table[(crc ^ buf_u8[i]) & 0xff];
+	}
+
+	return crc;
+}
diff --git a/src/spdk/lib/util/crc32_ieee.c b/src/spdk/lib/util/crc32_ieee.c
new file mode 100644
index 00000000..2956e3fc
--- /dev/null
+++ b/src/spdk/lib/util/crc32_ieee.c
@@ -0,0 +1,48 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/crc32.h"
+
+static struct spdk_crc32_table g_crc32_ieee_table;
+
+__attribute__((constructor)) static void
+spdk_crc32_ieee_init(void)
+{
+	spdk_crc32_table_init(&g_crc32_ieee_table, SPDK_CRC32_POLYNOMIAL_REFLECT);
+}
+
+uint32_t
+spdk_crc32_ieee_update(const void *buf, size_t len, uint32_t crc)
+{
+	return spdk_crc32_update(&g_crc32_ieee_table, buf, len, crc);
+}
diff --git a/src/spdk/lib/util/crc32c.c b/src/spdk/lib/util/crc32c.c
new file mode 100644
index 00000000..e95283b3
--- /dev/null
+++ b/src/spdk/lib/util/crc32c.c
@@ -0,0 +1,89 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/crc32.h"
+
+#if defined(__x86_64__) && defined(__SSE4_2__)
+#include <x86intrin.h>
+
+uint32_t
+spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
+{
+	uint64_t crc_tmp64;
+	size_t count;
+
+	/* _mm_crc32_u64() needs a 64-bit intermediate value */
+	crc_tmp64 = crc;
+
+	/* Process as much of the buffer as possible in 64-bit blocks. */
+	count = len / 8;
+	while (count--) {
+		uint64_t block;
+
+		/*
+		 * Use memcpy() to avoid unaligned loads, which are undefined behavior in C.
+		 * The compiler will optimize out the memcpy() in release builds.
+		 */
+		memcpy(&block, buf, sizeof(block));
+		crc_tmp64 = _mm_crc32_u64(crc_tmp64, block);
+		buf += sizeof(block);
+	}
+	crc = (uint32_t)crc_tmp64;
+
+	/* Handle any trailing bytes. */
+	count = len & 7;
+	while (count--) {
+		crc = _mm_crc32_u8(crc, *(const uint8_t *)buf);
+		buf++;
+	}
+
+	return crc;
+}
+
+#else /* SSE 4.2 (CRC32 instruction) not available */
+
+static struct spdk_crc32_table g_crc32c_table;
+
+__attribute__((constructor)) static void
+spdk_crc32c_init(void)
+{
+	spdk_crc32_table_init(&g_crc32c_table, SPDK_CRC32C_POLYNOMIAL_REFLECT);
+}
+
+uint32_t
+spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
+{
+	return spdk_crc32_update(&g_crc32c_table, buf, len, crc);
+}
+
+#endif
diff --git a/src/spdk/lib/util/fd.c b/src/spdk/lib/util/fd.c
new file mode 100644
index 00000000..6b0d0d55
--- /dev/null
+++ b/src/spdk/lib/util/fd.c
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/fd.h"
+
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+static uint64_t
+dev_get_size(int fd)
+{
+#if defined(DIOCGMEDIASIZE) /* FreeBSD */
+	off_t size;
+
+	if (ioctl(fd, DIOCGMEDIASIZE, &size) == 0) {
+		return size;
+	}
+#elif defined(__linux__) && defined(BLKGETSIZE64)
+	uint64_t size;
+
+	if (ioctl(fd, BLKGETSIZE64, &size) == 0) {
+		return size;
+	}
+#endif
+
+	return 0;
+}
+
+uint32_t
+spdk_fd_get_blocklen(int fd)
+{
+#if defined(DKIOCGETBLOCKSIZE) /* FreeBSD */
+	uint32_t blocklen;
+
+	if (ioctl(fd, DKIOCGETBLOCKSIZE, &blocklen) == 0) {
+		return blocklen;
+	}
+#elif defined(__linux__) && defined(BLKSSZGET)
+	uint32_t blocklen;
+
+	if (ioctl(fd, BLKSSZGET, &blocklen) == 0) {
+		return blocklen;
+	}
+#endif
+
+	return 0;
+}
+
+uint64_t
+spdk_fd_get_size(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) != 0) {
+		return 0;
+	}
+
+	if (S_ISLNK(st.st_mode)) {
+		return 0;
+	}
+
+	if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
+		return dev_get_size(fd);
+	} else if (S_ISREG(st.st_mode)) {
+		return st.st_size;
+	}
+
+	/* Not REG, CHR or BLK */
+	return 0;
+}
diff --git a/src/spdk/lib/util/strerror_tls.c b/src/spdk/lib/util/strerror_tls.c
new file mode 100644
index 00000000..c9dc8f13
--- /dev/null
+++ b/src/spdk/lib/util/strerror_tls.c
@@ -0,0 +1,43 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/string.h"
+
+static __thread char strerror_message[64];
+
+const char *
+spdk_strerror(int errnum)
+{
+	spdk_strerror_r(errnum, strerror_message, sizeof(strerror_message));
+	return strerror_message;
+}
diff --git a/src/spdk/lib/util/string.c b/src/spdk/lib/util/string.c
new file mode 100644
index 00000000..455aa20f
--- /dev/null
+++ b/src/spdk/lib/util/string.c
@@ -0,0 +1,405 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/string.h"
+
+char *
+spdk_vsprintf_alloc(const char *format, va_list args)
+{
+	va_list args_copy;
+	char *buf;
+	size_t bufsize;
+	int rc;
+
+	/* Try with a small buffer first. */
+	bufsize = 32;
+
+	/* Limit maximum buffer size to something reasonable so we don't loop forever. */
+	while (bufsize <= 1024 * 1024) {
+		buf = malloc(bufsize);
+		if (buf == NULL) {
+			return NULL;
+		}
+
+		va_copy(args_copy, args);
+		rc = vsnprintf(buf, bufsize, format, args_copy);
+		va_end(args_copy);
+
+		/*
+		 * If vsnprintf() returned a count within our current buffer size, we are done.
+		 * The count does not include the \0 terminator, so rc == bufsize is not OK.
+		 */
+		if (rc >= 0 && (size_t)rc < bufsize) {
+			return buf;
+		}
+
+		/*
+		 * vsnprintf() should return the required space, but some libc versions do not
+		 * implement this correctly, so just double the buffer size and try again.
+		 *
+		 * We don't need the data in buf, so rather than realloc(), use free() and malloc()
+		 * again to avoid a copy.
+		 */
+		free(buf);
+		bufsize *= 2;
+	}
+
+	return NULL;
+}
+
+char *
+spdk_sprintf_alloc(const char *format, ...)
+{
+	va_list args;
+	char *ret;
+
+	va_start(args, format);
+	ret = spdk_vsprintf_alloc(format, args);
+	va_end(args);
+
+	return ret;
+}
+
+char *
+spdk_strlwr(char *s)
+{
+	char *p;
+
+	if (s == NULL) {
+		return NULL;
+	}
+
+	p = s;
+	while (*p != '\0') {
+		*p = tolower(*p);
+		p++;
+	}
+
+	return s;
+}
+
+char *
+spdk_strsepq(char **stringp, const char *delim)
+{
+	char *p, *q, *r;
+	int quoted = 0, bslash = 0;
+
+	p = *stringp;
+	if (p == NULL) {
+		return NULL;
+	}
+
+	r = q = p;
+	while (*q != '\0' && *q != '\n') {
+		/* eat quoted characters */
+		if (bslash) {
+			bslash = 0;
+			*r++ = *q++;
+			continue;
+		} else if (quoted) {
+			if (quoted == '"' && *q == '\\') {
+				bslash = 1;
+				q++;
+				continue;
+			} else if (*q == quoted) {
+				quoted = 0;
+				q++;
+				continue;
+			}
+			*r++ = *q++;
+			continue;
+		} else if (*q == '\\') {
+			bslash = 1;
+			q++;
+			continue;
+		} else if (*q == '"' || *q == '\'') {
+			quoted = *q;
+			q++;
+			continue;
+		}
+
+		/* separator? */
+		if (strchr(delim, *q) == NULL) {
+			*r++ = *q++;
+			continue;
+		}
+
+		/* new string */
+		q++;
+		break;
+	}
+	*r = '\0';
+
+	/* skip tailer */
+	while (*q != '\0' && strchr(delim, *q) != NULL) {
+		q++;
+	}
+	if (*q != '\0') {
+		*stringp = q;
+	} else {
+		*stringp = NULL;
+	}
+
+	return p;
+}
+
+char *
+spdk_str_trim(char *s)
+{
+	char *p, *q;
+
+	if (s == NULL) {
+		return NULL;
+	}
+
+	/* remove header */
+	p = s;
+	while (*p != '\0' && isspace(*p)) {
+		p++;
+	}
+
+	/* remove tailer */
+	q = p + strlen(p);
+	while (q - 1 >= p && isspace(*(q - 1))) {
+		q--;
+		*q = '\0';
+	}
+
+	/* if remove header, move */
+	if (p != s) {
+		q = s;
+		while (*p != '\0') {
+			*q++ = *p++;
+		}
+		*q = '\0';
+	}
+
+	return s;
+}
+
+void
+spdk_strcpy_pad(void *dst, const char *src, size_t size, int pad)
+{
+	size_t len;
+
+	len = strlen(src);
+	if (len < size) {
+		memcpy(dst, src, len);
+		memset((char *)dst + len, pad, size - len);
+	} else {
+		memcpy(dst, src, size);
+	}
+}
+
+size_t
+spdk_strlen_pad(const void *str, size_t size, int pad)
+{
+	const uint8_t *start;
+	const uint8_t *iter;
+	uint8_t pad_byte;
+
+	pad_byte = (uint8_t)pad;
+	start = (const uint8_t *)str;
+
+	if (size == 0) {
+		return 0;
+	}
+
+	iter = start + size - 1;
+	while (1) {
+		if (*iter != pad_byte) {
+			return iter - start + 1;
+		}
+
+		if (iter == start) {
+			/* Hit the start of the string finding only pad_byte. */
+			return 0;
+		}
+		iter--;
+	}
+}
+
+int
+spdk_parse_ip_addr(char *ip, char **host, char **port)
+{
+	char *p;
+
+	if (ip == NULL) {
+		return -EINVAL;
+	}
+
+	*host = NULL;
+	*port = NULL;
+
+	if (ip[0] == '[') {
+		/* IPv6 */
+		p = strchr(ip, ']');
+		if (p == NULL) {
+			return -EINVAL;
+		}
+		*host = &ip[1];
+		*p = '\0';
+
+		p++;
+		if (*p == '\0') {
+			return 0;
+		} else if (*p != ':') {
+			return -EINVAL;
+		}
+
+		p++;
+		if (*p == '\0') {
+			return 0;
+		}
+
+		*port = p;
+	} else {
+		/* IPv4 */
+		p = strchr(ip, ':');
+		if (p == NULL) {
+			*host = ip;
+			return 0;
+		}
+
+		*host = ip;
+		*p = '\0';
+
+		p++;
+		if (*p == '\0') {
+			return 0;
+		}
+
+		*port = p;
+	}
+
+	return 0;
+}
+
+size_t
+spdk_str_chomp(char *s)
+{
+	size_t len = strlen(s);
+	size_t removed = 0;
+
+	while (len > 0) {
+		if (s[len - 1] != '\r' && s[len - 1] != '\n') {
+			break;
+		}
+
+		s[len - 1] = '\0';
+		len--;
+		removed++;
+	}
+
+	return removed;
+}
+
+void
+spdk_strerror_r(int errnum, char *buf, size_t buflen)
+{
+	int rc;
+
+#if defined(__USE_GNU)
+	char *new_buffer;
+	new_buffer = strerror_r(errnum, buf, buflen);
+	if (new_buffer != NULL) {
+		snprintf(buf, buflen, "%s", new_buffer);
+		rc = 0;
+	} else {
+		rc = 1;
+	}
+#else
+	rc = strerror_r(errnum, buf, buflen);
+#endif
+
+	if (rc != 0) {
+		snprintf(buf, buflen, "Unknown error %d", errnum);
+	}
+}
+
+int
+spdk_parse_capacity(const char *cap_str, uint64_t *cap, bool *has_prefix)
+{
+	int rc;
+	char bin_prefix;
+
+	rc = sscanf(cap_str, "%"SCNu64"%c", cap, &bin_prefix);
+	if (rc == 1) {
+		*has_prefix = false;
+		return 0;
+	} else if (rc == 0) {
+		if (errno == 0) {
+			/* No scanf matches - the string does not start with a digit */
+			return -EINVAL;
+		} else {
+			/* Parsing error */
+			return -errno;
+		}
+	}
+
+	*has_prefix = true;
+	switch (bin_prefix) {
+	case 'k':
+	case 'K':
+		*cap *= 1024;
+		break;
+	case 'm':
+	case 'M':
+		*cap *= 1024 * 1024;
+		break;
+	case 'g':
+	case 'G':
+		*cap *= 1024 * 1024 * 1024;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+bool
+spdk_mem_all_zero(const void *data, size_t size)
+{
+	const uint8_t *buf = data;
+
+	while (size--) {
+		if (*buf++ != 0) {
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/spdk/lib/util/uuid.c b/src/spdk/lib/util/uuid.c
new file mode 100644
index 00000000..1af7368f
--- /dev/null
+++ b/src/spdk/lib/util/uuid.c
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/uuid.h"
+
+#include <uuid/uuid.h>
+
+SPDK_STATIC_ASSERT(sizeof(struct spdk_uuid) == sizeof(uuid_t), "Size mismatch");
+
+int
+spdk_uuid_parse(struct spdk_uuid *uuid, const char *uuid_str)
+{
+	return uuid_parse(uuid_str, (void *)uuid) == 0 ? 0 : -EINVAL;
+}
+
+int
+spdk_uuid_fmt_lower(char *uuid_str, size_t uuid_str_size, const struct spdk_uuid *uuid)
+{
+	if (uuid_str_size < SPDK_UUID_STRING_LEN) {
+		return -EINVAL;
+	}
+
+	uuid_unparse_lower((void *)uuid, uuid_str);
+	return 0;
+}
+
+int
+spdk_uuid_compare(const struct spdk_uuid *u1, const struct spdk_uuid *u2)
+{
+	return uuid_compare((void *)u1, (void *)u2);
+}
+
+void
+spdk_uuid_generate(struct spdk_uuid *uuid)
+{
+	uuid_generate((void *)uuid);
+}
diff --git a/src/spdk/lib/vhost/Makefile b/src/spdk/lib/vhost/Makefile
new file mode 100644
index 00000000..b46978e2
--- /dev/null
+++ b/src/spdk/lib/vhost/Makefile
@@ -0,0 +1,47 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I.
+CFLAGS += -Irte_vhost
+CFLAGS += $(ENV_CFLAGS)
+
+C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c vhost_nvme.c
+
+LIBNAME = vhost
+
+DIRS-y += rte_vhost
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/vhost/rte_vhost/Makefile b/src/spdk/lib/vhost/rte_vhost/Makefile
new file mode 100644
index 00000000..b0ae6335
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/Makefile
@@ -0,0 +1,46 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I.
+CFLAGS += $(ENV_CFLAGS)
+CFLAGS += -include rte_config.h
+
+# These are the DPDK vhost files copied (for now) into SPDK
+C_SRCS += fd_man.c socket.c vhost_user.c vhost.c
+
+LIBNAME = rte_vhost
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/vhost/rte_vhost/fd_man.c b/src/spdk/lib/vhost/rte_vhost/fd_man.c
new file mode 100644
index 00000000..2ceacc9a
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/fd_man.c
@@ -0,0 +1,300 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
+
+static int
+get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
+{
+	int i;
+
+	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
+		;
+
+	return i;
+}
+
+static void
+fdset_move(struct fdset *pfdset, int dst, int src)
+{
+	pfdset->fd[dst]    = pfdset->fd[src];
+	pfdset->rwfds[dst] = pfdset->rwfds[src];
+}
+
+static void
+fdset_shrink_nolock(struct fdset *pfdset)
+{
+	int i;
+	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
+
+	for (i = 0; i < last_valid_idx; i++) {
+		if (pfdset->fd[i].fd != -1)
+			continue;
+
+		fdset_move(pfdset, i, last_valid_idx);
+		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
+	}
+	pfdset->num = last_valid_idx + 1;
+}
+
+/*
+ * Find deleted fd entries and remove them
+ */
+static void
+fdset_shrink(struct fdset *pfdset)
+{
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	fdset_shrink_nolock(pfdset);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i == pfdset->num ? -1 : i;
+}
+
+static void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct fdentry *pfdentry = &pfdset->fd[idx];
+	struct pollfd *pfd = &pfdset->rwfds[idx];
+
+	pfdentry->fd  = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+
+	pfd->fd = fd;
+	pfd->events  = rcb ? POLLIN : 0;
+	pfd->events |= wcb ? POLLOUT : 0;
+	pfd->revents = 0;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < MAX_FDS; i++) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].dat = NULL;
+	}
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+	if (i == -1) {
+		fdset_shrink_nolock(pfdset);
+		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+		if (i == -1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			return -2;
+		}
+	}
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ *  Returns context of a given fd or NULL.
+ */
+void *
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+	void *dat = NULL;
+
+	if (pfdset == NULL || fd == -1)
+		return NULL;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			dat = pfdset->fd[i].dat;
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->fd[i].dat = NULL;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+
+	return dat;
+}
+
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void *
+fdset_event_dispatch(void *arg)
+{
+	int i;
+	struct pollfd *pfd;
+	struct fdentry *pfdentry;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd, numfds;
+	int remove1, remove2;
+	int need_shrink;
+	struct fdset *pfdset = arg;
+
+	if (pfdset == NULL)
+		return NULL;
+
+	while (1) {
+
+		/*
+		 * When poll is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When poll returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		pthread_mutex_lock(&pfdset->fd_mutex);
+		numfds = pfdset->num;
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+
+		need_shrink = 0;
+		for (i = 0; i < numfds; i++) {
+			pthread_mutex_lock(&pfdset->fd_mutex);
+
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			pfd = &pfdset->rwfds[i];
+
+			if (fd < 0) {
+				need_shrink = 1;
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			if (!pfd->revents) {
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			remove1 = remove2 = 0;
+
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+
+			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
+				rcb(fd, dat, &remove1);
+			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2) {
+				pfdentry->fd = -1;
+				need_shrink = 1;
+			}
+		}
+
+		if (need_shrink)
+			fdset_shrink(pfdset);
+	}
+
+	return NULL;
+}
diff --git a/src/spdk/lib/vhost/rte_vhost/fd_man.h b/src/spdk/lib/vhost/rte_vhost/fd_man.h
new file mode 100644
index 00000000..3a9d269b
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/fd_man.h
@@ -0,0 +1,69 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+#include <pthread.h>
+#include <poll.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct fdentry {
+	int fd;		/* -1 indicates this entry is empty */
+	fd_cb rcb;	/* callback when this fd is readable. */
+	fd_cb wcb;	/* callback when this fd is writeable. */
+	void *dat;	/* fd context */
+	int busy;	/* whether this entry is being used in cb. */
+};
+
+struct fdset {
+	struct pollfd rwfds[MAX_FDS];
+	struct fdentry fd[MAX_FDS];
+	pthread_mutex_t fd_mutex;
+	int num;	/* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat);
+
+void *fdset_del(struct fdset *pfdset, int fd);
+
+void *fdset_event_dispatch(void *arg);
+
+#endif
diff --git a/src/spdk/lib/vhost/rte_vhost/rte_vhost.h b/src/spdk/lib/vhost/rte_vhost/rte_vhost.h
new file mode 100644
index 00000000..29f5b613
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/rte_vhost.h
@@ -0,0 +1,474 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VHOST_H_
+#define _RTE_VHOST_H_
+
+/**
+ * @file
+ * Interface to vhost-user
+ */
+
+#include <stdint.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <sys/eventfd.h>
+
+#include <rte_config.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+
+#define RTE_VHOST_USER_CLIENT		(1ULL << 0)
+#define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
+#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
+
+/**
+ * Information relating to memory regions including offsets to
+ * addresses in QEMUs memory file.
+ */
+struct rte_vhost_mem_region {
+	uint64_t guest_phys_addr;
+	uint64_t guest_user_addr;
+	uint64_t host_user_addr;
+	uint64_t size;
+	void	 *mmap_addr;
+	uint64_t mmap_size;
+	int fd;
+};
+
+/**
+ * Memory structure includes region and mapping information.
+ */
+struct rte_vhost_memory {
+	uint32_t nregions;
+	struct rte_vhost_mem_region regions[0];
+};
+
+struct rte_vhost_vring {
+	struct vring_desc	*desc;
+	struct vring_avail	*avail;
+	struct vring_used	*used;
+	uint64_t		log_guest_addr;
+
+	int			callfd;
+	int			kickfd;
+	uint16_t		size;
+
+	uint16_t		last_avail_idx;
+	uint16_t		last_used_idx;
+};
+
+/**
+ * Device and vring operations.
+ */
+struct vhost_device_ops {
+	int (*new_device)(int vid);		/**< Add device. */
+	void (*destroy_device)(int vid);	/**< Remove device. */
+
+	int (*vring_state_changed)(int vid, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */
+
+	/**
+	 * Features could be changed after the feature negotiation.
+	 * For example, VHOST_F_LOG_ALL will be set/cleared at the
+	 * start/end of live migration, respectively. This callback
+	 * is used to inform the application on such change.
+	 */
+	int (*features_changed)(int vid, uint64_t features);
+	int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
+	int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
+	int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);
+
+	int (*new_connection)(int vid);
+	void (*destroy_connection)(int vid);
+
+	int (*get_config)(int vid, uint8_t *config, uint32_t config_len);
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+			  uint32_t len, uint32_t flags);
+
+	void *reserved[2]; /**< Reserved for future extension */
+};
+
+/**
+ * Convert guest physical address to host virtual address
+ *
+ * @param mem
+ *  the guest memory regions
+ * @param gpa
+ *  the guest physical address for querying
+ * @return
+ *  the host virtual address on success, 0 on failure
+ */
+static inline uint64_t __attribute__((always_inline))
+rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
+{
+	struct rte_vhost_mem_region *reg;
+	uint32_t i;
+
+	for (i = 0; i < mem->nregions; i++) {
+		reg = &mem->regions[i];
+		if (gpa >= reg->guest_phys_addr &&
+		    gpa <  reg->guest_phys_addr + reg->size) {
+			return gpa - reg->guest_phys_addr +
+			       reg->host_user_addr;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Convert guest physical address to host virtual address safely
+ *
+ * This variant of rte_vhost_gpa_to_vva() takes care all the
+ * requested length is mapped and contiguous in process address
+ * space.
+ *
+ * @param mem
+ *  the guest memory regions
+ * @param gpa
+ *  the guest physical address for querying
+ * @param len
+ *  the size of the requested area to map,
+ *  updated with actual size mapped
+ * @return
+ *  the host virtual address on success, 0 on failure  */
+static inline uint64_t
+rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
+	uint64_t gpa, uint64_t *len)
+{
+	struct rte_vhost_mem_region *r;
+	uint32_t i;
+
+	for (i = 0; i < mem->nregions; i++) {
+		r = &mem->regions[i];
+		if (gpa >= r->guest_phys_addr &&
+		    gpa <  r->guest_phys_addr + r->size) {
+
+			if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
+				*len = r->guest_phys_addr + r->size - gpa;
+
+			return gpa - r->guest_phys_addr +
+			       r->host_user_addr;
+		}
+	}
+	*len = 0;
+
+	return 0;
+}
+
+#define RTE_VHOST_NEED_LOG(features)	((features) & (1ULL << VHOST_F_LOG_ALL))
+
+/**
+ * Log the memory write start with given address.
+ *
+ * This function only need be invoked when the live migration starts.
+ * Therefore, we won't need call it at all in the most of time. For
+ * making the performance impact be minimum, it's suggested to do a
+ * check before calling it:
+ *
+ *        if (unlikely(RTE_VHOST_NEED_LOG(features)))
+ *                rte_vhost_log_write(vid, addr, len);
+ *
+ * @param vid
+ *  vhost device ID
+ * @param addr
+ *  the starting address for write
+ * @param len
+ *  the length to write
+ */
+void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
+
+/**
+ * Log the used ring update start at given offset.
+ *
+ * Same as rte_vhost_log_write, it's suggested to do a check before
+ * calling it:
+ *
+ *        if (unlikely(RTE_VHOST_NEED_LOG(features)))
+ *                rte_vhost_log_used_vring(vid, vring_idx, offset, len);
+ *
+ * @param vid
+ *  vhost device ID
+ * @param vring_idx
+ *  the vring index
+ * @param offset
+ *  the offset inside the used ring
+ * @param len
+ *  the length to write
+ */
+void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
+			      uint64_t offset, uint64_t len);
+
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
+
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
+
+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *path);
+
+/**
+ * Set the feature bits the vhost-user driver supports.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_set_features(const char *path, uint64_t features);
+
+/**
+ * Enable vhost-user driver features.
+ *
+ * Note that
+ * - the param @features should be a subset of the feature bits provided
+ *   by rte_vhost_driver_set_features().
+ * - it must be invoked before vhost-user negotiation starts.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param features
+ *  Features to enable
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_enable_features(const char *path, uint64_t features);
+
+/**
+ * Disable vhost-user driver features.
+ *
+ * The two notes at rte_vhost_driver_enable_features() also apply here.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param features
+ *  Features to disable
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_disable_features(const char *path, uint64_t features);
+
+/**
+ * Get the feature bits before feature negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param features
+ *  A pointer to store the queried feature bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_get_features(const char *path, uint64_t *features);
+
+/**
+ * Get the feature bits after negotiation
+ *
+ * @param vid
+ *  Vhost device ID
+ * @param features
+ *  A pointer to store the queried feature bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
+
+/* Register callbacks. */
+int rte_vhost_driver_callback_register(const char *path,
+	struct vhost_device_ops const * const ops);
+
+/**
+ *
+ * Start the vhost-user driver.
+ *
+ * This function triggers the vhost-user negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_start(const char *path);
+
+/**
+ * Get the MTU value of the device if set in QEMU.
+ *
+ * @param vid
+ *  virtio-net device ID
+ * @param mtu
+ *  The variable to store the MTU value
+ *
+ * @return
+ *  0: success
+ *  -EAGAIN: device not yet started
+ *  -ENOTSUP: device does not support MTU feature
+ */
+int rte_vhost_get_mtu(int vid, uint16_t *mtu);
+
+/**
+ * Get the numa node from which the virtio net device's memory
+ * is allocated.
+ *
+ * @param vid
+ *  vhost device ID
+ *
+ * @return
+ *  The numa node, -1 on failure
+ */
+int rte_vhost_get_numa_node(int vid);
+
+/**
+ * Get the virtio net device's ifname, which is the vhost-user socket
+ * file path.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param buf
+ *  The buffer to stored the queried ifname
+ * @param len
+ *  The length of buf
+ *
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_ifname(int vid, char *buf, size_t len);
+
+/**
+ * Get how many avail entries are left in the queue
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  virtio queue index
+ *
+ * @return
+ *  num of avail entires left
+ */
+uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
+
+struct rte_mbuf;
+struct rte_mempool;
+/**
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtual device. A packet
+ * count is returned to indicate the number of packets that were succesfully
+ * added to the RX queue.
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param pkts
+ *  array to contain packets to be enqueued
+ * @param count
+ *  packets num to be enqueued
+ * @return
+ *  num of packets enqueued
+ */
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count);
+
+/**
+ * This function gets guest buffers from the virtio device TX virtqueue,
+ * construct host mbufs, copies guest buffer content to host mbufs and
+ * store them in pkts to be processed.
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param mbuf_pool
+ *  mbuf_pool where host mbuf is allocated.
+ * @param pkts
+ *  array to contain packets to be dequeued
+ * @param count
+ *  packets num to be dequeued
+ * @return
+ *  num of packets dequeued
+ */
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+
+/**
+ * Get guest mem table: a list of memory regions.
+ *
+ * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
+ * guest memory regions. Application should free it at destroy_device()
+ * callback.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param mem
+ *  To store the returned mem regions
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
+
+/**
+ * Get guest vring info, including the vring address, vring size, etc.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param vring_idx
+ *  vring index
+ * @param vring
+ *  the structure to hold the requested vring info
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
+			      struct rte_vhost_vring *vring);
+
+/**
+ * Set id of the last descriptors in avail and used guest vrings.
+ *
+ * In case user application operates directly on buffers, it should use this
+ * function on device destruction to retrieve the same values later on in device
+ * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *)
+ *
+ * @param vid
+ *  vhost device ID
+ * @param vring_idx
+ *  vring index
+ * @param last_avail_idx
+ *  id of the last descriptor in avail ring to be set
+ * @param last_used_idx
+ *  id of the last descriptor in used ring to be set
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_set_vhost_vring_last_idx(int vid, uint16_t vring_idx,
+			      uint16_t last_avail_idx, uint16_t last_used_idx);
+
+#endif /* _RTE_VHOST_H_ */
diff --git a/src/spdk/lib/vhost/rte_vhost/socket.c b/src/spdk/lib/vhost/rte_vhost/socket.c
new file mode 100644
index 00000000..1bc1e64b
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/socket.c
@@ -0,0 +1,819 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#include <rte_log.h>
+
+#include "fd_man.h"
+#include "vhost.h"
+#include "vhost_user.h"
+
+
+TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
+
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+	struct vhost_user_connection_list conn_list;
+	pthread_mutex_t conn_mutex;
+	char *path;
+	int socket_fd;
+	struct sockaddr_un un;
+	bool is_server;
+	bool reconnect;
+	bool dequeue_zero_copy;
+
+	/*
+	 * The "supported_features" indicates the feature bits the
+	 * vhost driver supports. The "features" indicates the feature
+	 * bits after the rte_vhost_driver_features_disable/enable().
+	 * It is also the final feature bits used for vhost-user
+	 * features negotiation.
+	 */
+	uint64_t supported_features;
+	uint64_t features;
+
+	struct vhost_device_ops const *notify_ops;
+};
+
+struct vhost_user_connection {
+	struct vhost_user_socket *vsocket;
+	int connfd;
+	int vid;
+
+	TAILQ_ENTRY(vhost_user_connection) next;
+};
+
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
+	struct fdset fdset;
+	int vsocket_cnt;
+	pthread_mutex_t mutex;
+};
+
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
+static void vhost_user_read_cb(int fd, void *dat, int *remove);
+static int create_unix_socket(struct vhost_user_socket *vsocket);
+static int vhost_user_start_client(struct vhost_user_socket *vsocket);
+
+static struct vhost_user vhost_user = {
+	.fdset = {
+		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.num = 0
+	},
+	.vsocket_cnt = 0,
+	.mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+/* return bytes# of read on success or negative val on failure. */
+int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+		return ret;
+	}
+
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		if (cmsg == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG,  "cmsg == NULL\n");
+			errno = EINVAL;
+			return -1;
+		}
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
+{
+	int vid;
+	size_t size;
+	struct vhost_user_connection *conn;
+	int ret;
+
+	conn = malloc(sizeof(*conn));
+	if (conn == NULL) {
+		close(fd);
+		return;
+	}
+
+	vid = vhost_new_device(vsocket->features);
+	if (vid == -1) {
+		goto err;
+	}
+
+	size = strnlen(vsocket->path, PATH_MAX);
+	vhost_set_ifname(vid, vsocket->path, size);
+
+	if (vsocket->dequeue_zero_copy)
+		vhost_enable_dequeue_zero_copy(vid);
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
+
+	if (vsocket->notify_ops->new_connection) {
+		ret = vsocket->notify_ops->new_connection(vid);
+		if (ret < 0) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to add vhost user connection with fd %d\n",
+				fd);
+			goto err;
+		}
+	}
+
+	conn->connfd = fd;
+	conn->vsocket = vsocket;
+	conn->vid = vid;
+	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
+			NULL, conn);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to add fd %d into vhost server fdset\n",
+			fd);
+
+		if (vsocket->notify_ops->destroy_connection)
+			vsocket->notify_ops->destroy_connection(conn->vid);
+
+		goto err;
+	}
+
+	pthread_mutex_lock(&vsocket->conn_mutex);
+	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
+	pthread_mutex_unlock(&vsocket->conn_mutex);
+	return;
+
+err:
+	free(conn);
+	close(fd);
+}
+
+/* call back when there is new vhost-user connection from client  */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+	struct vhost_user_socket *vsocket = dat;
+
+	fd = accept(fd, NULL, NULL);
+	if (fd < 0)
+		return;
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+	vhost_user_add_connection(fd, vsocket);
+}
+
+static void
+vhost_user_read_cb(int connfd, void *dat, int *remove)
+{
+	struct vhost_user_connection *conn = dat;
+	struct vhost_user_socket *vsocket = conn->vsocket;
+	int ret;
+
+	ret = vhost_user_msg_handler(conn->vid, connfd);
+	if (ret < 0) {
+		close(connfd);
+		*remove = 1;
+		vhost_destroy_device(conn->vid);
+
+		if (vsocket->notify_ops->destroy_connection)
+			vsocket->notify_ops->destroy_connection(conn->vid);
+
+		pthread_mutex_lock(&vsocket->conn_mutex);
+		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
+		pthread_mutex_unlock(&vsocket->conn_mutex);
+
+		free(conn);
+
+		if (vsocket->reconnect) {
+			create_unix_socket(vsocket);
+			vhost_user_start_client(vsocket);
+		}
+	}
+}
+
+static int
+create_unix_socket(struct vhost_user_socket *vsocket)
+{
+	int fd;
+	struct sockaddr_un *un = &vsocket->un;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+		vsocket->is_server ? "server" : "client", fd);
+
+	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-user: can't set nonblocking mode for socket, fd: "
+			"%d (%s)\n", fd, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
+	memset(un, 0, sizeof(*un));
+	un->sun_family = AF_UNIX;
+	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
+	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
+
+	vsocket->socket_fd = fd;
+	return 0;
+}
+
+static int
+vhost_user_start_server(struct vhost_user_socket *vsocket)
+{
+	int ret;
+	int fd = vsocket->socket_fd;
+	const char *path = vsocket->path;
+
+	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to bind to %s: %s; remove it and try again\n",
+			path, strerror(errno));
+		goto err;
+	}
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+	ret = listen(fd, MAX_VIRTIO_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+		  NULL, vsocket);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to add listen fd %d to vhost server fdset\n",
+			fd);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	close(fd);
+	return -1;
+}
+
+struct vhost_user_reconnect {
+	struct sockaddr_un un;
+	int fd;
+	struct vhost_user_socket *vsocket;
+
+	TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+	struct vhost_user_reconnect_tailq_list head;
+	pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+	int ret, flags;
+
+	ret = connect(fd, un, sz);
+	if (ret < 0 && errno != EISCONN)
+		return -1;
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"can't get flags for connfd %d\n", fd);
+		return -2;
+	}
+	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"can't disable nonblocking on fd %d\n", fd);
+		return -2;
+	}
+	return 0;
+}
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+	int ret;
+	struct vhost_user_reconnect *reconn, *next;
+
+	while (1) {
+		pthread_mutex_lock(&reconn_list.mutex);
+
+		/*
+		 * An equal implementation of TAILQ_FOREACH_SAFE,
+		 * which does not exist on all platforms.
+		 */
+		for (reconn = TAILQ_FIRST(&reconn_list.head);
+		     reconn != NULL; reconn = next) {
+			next = TAILQ_NEXT(reconn, next);
+
+			ret = vhost_user_connect_nonblock(reconn->fd,
+						(struct sockaddr *)&reconn->un,
+						sizeof(reconn->un));
+			if (ret == -2) {
+				close(reconn->fd);
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"reconnection for fd %d failed\n",
+					reconn->fd);
+				goto remove_fd;
+			}
+			if (ret == -1)
+				continue;
+
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"%s: connected\n", reconn->vsocket->path);
+			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
+			TAILQ_REMOVE(&reconn_list.head, reconn, next);
+			free(reconn);
+		}
+
+		pthread_mutex_unlock(&reconn_list.mutex);
+		sleep(1);
+	}
+
+	return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+	int ret;
+
+	pthread_mutex_init(&reconn_list.mutex, NULL);
+	TAILQ_INIT(&reconn_list.head);
+
+	ret = pthread_create(&reconn_tid, NULL,
+			     vhost_user_client_reconnect, NULL);
+	if (ret < 0)
+		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+	return ret;
+}
+
+static int
+vhost_user_start_client(struct vhost_user_socket *vsocket)
+{
+	int ret;
+	int fd = vsocket->socket_fd;
+	const char *path = vsocket->path;
+	struct vhost_user_reconnect *reconn;
+
+	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
+					  sizeof(vsocket->un));
+	if (ret == 0) {
+		vhost_user_add_connection(fd, vsocket);
+		return 0;
+	}
+
+	RTE_LOG(WARNING, VHOST_CONFIG,
+		"failed to connect to %s: %s\n",
+		path, strerror(errno));
+
+	if (ret == -2 || !vsocket->reconnect) {
+		close(fd);
+		return -1;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
+	reconn = malloc(sizeof(*reconn));
+	if (reconn == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to allocate memory for reconnect\n");
+		close(fd);
+		return -1;
+	}
+	reconn->un = vsocket->un;
+	reconn->fd = fd;
+	reconn->vsocket = vsocket;
+	pthread_mutex_lock(&reconn_list.mutex);
+	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+	pthread_mutex_unlock(&reconn_list.mutex);
+
+	return 0;
+}
+
+static struct vhost_user_socket *
+find_vhost_user_socket(const char *path)
+{
+	int i;
+
+	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
+
+		if (!strcmp(vsocket->path, path))
+			return vsocket;
+	}
+
+	return NULL;
+}
+
+int
+rte_vhost_driver_disable_features(const char *path, uint64_t features)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->features &= ~features;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_enable_features(const char *path, uint64_t features)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket) {
+		if ((vsocket->supported_features & features) != features) {
+			/*
+			 * trying to enable features the driver doesn't
+			 * support.
+			 */
+			pthread_mutex_unlock(&vhost_user.mutex);
+			return -1;
+		}
+		vsocket->features |= features;
+	}
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_set_features(const char *path, uint64_t features)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket) {
+		vsocket->supported_features = features;
+		vsocket->features = features;
+	}
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_get_features(const char *path, uint64_t *features)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		*features = vsocket->features;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	if (!vsocket) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"socket file %s is not registered yet.\n", path);
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+	int ret = -1;
+	struct vhost_user_socket *vsocket;
+
+	if (!path)
+		return -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+
+	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"error: the number of vhost sockets reaches maximum\n");
+		goto out;
+	}
+
+	vsocket = malloc(sizeof(struct vhost_user_socket));
+	if (!vsocket)
+		goto out;
+	memset(vsocket, 0, sizeof(struct vhost_user_socket));
+	vsocket->path = strdup(path);
+	if (!vsocket->path) {
+		free(vsocket);
+		goto out;
+	}
+	TAILQ_INIT(&vsocket->conn_list);
+	pthread_mutex_init(&vsocket->conn_mutex, NULL);
+	vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
+
+	/*
+	 * Set the supported features correctly for the builtin vhost-user
+	 * net driver.
+	 *
+	 * Applications know nothing about features the builtin virtio net
+	 * driver (virtio_net.c) supports, thus it's not possible for them
+	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
+	 * we set it unconditionally. If the application want to implement
+	 * another vhost-user driver (say SCSI), it should call the
+	 * rte_vhost_driver_set_features(), which will overwrite following
+	 * two values.
+	 */
+	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
+	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
+
+	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+		if (vsocket->reconnect && reconn_tid == 0) {
+			if (vhost_user_reconnect_init() < 0) {
+				free(vsocket->path);
+				free(vsocket);
+				goto out;
+			}
+		}
+	} else {
+		vsocket->is_server = true;
+	}
+	ret = create_unix_socket(vsocket);
+	if (ret < 0) {
+		free(vsocket->path);
+		free(vsocket);
+		goto out;
+	}
+
+	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return ret;
+}
+
+static bool
+vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
+{
+	int found = false;
+	struct vhost_user_reconnect *reconn, *next;
+
+	pthread_mutex_lock(&reconn_list.mutex);
+
+	for (reconn = TAILQ_FIRST(&reconn_list.head);
+	     reconn != NULL; reconn = next) {
+		next = TAILQ_NEXT(reconn, next);
+
+		if (reconn->vsocket == vsocket) {
+			TAILQ_REMOVE(&reconn_list.head, reconn, next);
+			close(reconn->fd);
+			free(reconn);
+			found = true;
+			break;
+		}
+	}
+	pthread_mutex_unlock(&reconn_list.mutex);
+	return found;
+}
+
+/**
+ * Unregister the specified vhost socket
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+	int i;
+	int count;
+	struct vhost_user_connection *conn;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+
+	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
+
+		if (!strcmp(vsocket->path, path)) {
+			if (vsocket->is_server) {
+				fdset_del(&vhost_user.fdset, vsocket->socket_fd);
+				close(vsocket->socket_fd);
+				unlink(path);
+			} else if (vsocket->reconnect) {
+				vhost_user_remove_reconnect(vsocket);
+			}
+
+			pthread_mutex_lock(&vsocket->conn_mutex);
+			TAILQ_FOREACH(conn, &vsocket->conn_list, next) {
+				close(conn->connfd);
+			}
+			pthread_mutex_unlock(&vsocket->conn_mutex);
+
+			do {
+				pthread_mutex_lock(&vsocket->conn_mutex);
+				conn = TAILQ_FIRST(&vsocket->conn_list);
+				pthread_mutex_unlock(&vsocket->conn_mutex);
+			} while (conn != NULL);
+
+			free(vsocket->path);
+			free(vsocket);
+
+			count = --vhost_user.vsocket_cnt;
+			vhost_user.vsockets[i] = vhost_user.vsockets[count];
+			vhost_user.vsockets[count] = NULL;
+			pthread_mutex_unlock(&vhost_user.mutex);
+
+			return 0;
+		}
+	}
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return -1;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+rte_vhost_driver_callback_register(const char *path,
+	struct vhost_device_ops const * const ops)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->notify_ops = ops;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+struct vhost_device_ops const *
+vhost_driver_callback_get(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? vsocket->notify_ops : NULL;
+}
+
+int
+rte_vhost_driver_start(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+	static pthread_t fdset_tid;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	if (!vsocket)
+		return -1;
+
+	if (fdset_tid == 0) {
+		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
+				     &vhost_user.fdset);
+		if (ret < 0)
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to create fdset handling thread");
+	}
+
+	if (vsocket->is_server)
+		return vhost_user_start_server(vsocket);
+	else
+		return vhost_user_start_client(vsocket);
+}
diff --git a/src/spdk/lib/vhost/rte_vhost/vhost.c b/src/spdk/lib/vhost/rte_vhost/vhost.c
new file mode 100644
index 00000000..9d4ae71b
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/vhost.c
@@ -0,0 +1,482 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+
+#include "vhost.h"
+
+struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+struct virtio_net *
+get_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) device not found.\n", vid);
+	}
+
+	return dev;
+}
+
+static void
+cleanup_vq(struct vhost_virtqueue *vq, int destroy)
+{
+	if ((vq->callfd >= 0) && (destroy != 0))
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
+/*
+ * Unmap any memory, close any file descriptors and
+ * free any memory owned by a device.
+ */
+void
+cleanup_device(struct virtio_net *dev, int destroy)
+{
+	uint32_t i;
+
+	vhost_backend_cleanup(dev);
+
+	for (i = 0; i < dev->nr_vring; i++)
+		cleanup_vq(dev->virtqueue[i], destroy);
+}
+
+/*
+ * Release virtqueues and device memory.
+ */
+static void
+free_device(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct vhost_virtqueue *vq;
+
+	for (i = 0; i < dev->nr_vring; i++) {
+		vq = dev->virtqueue[i];
+
+		rte_free(vq->shadow_used_ring);
+
+		rte_free(vq);
+	}
+
+	rte_free(dev);
+}
+
+static void
+init_vring_queue(struct vhost_virtqueue *vq)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+
+	/*
+	 * always set the vq to enabled; this is to keep compatibility
+	 * with the old QEMU, whereas there is no SET_VRING_ENABLE message.
+	 */
+	vq->enabled = 1;
+
+	TAILQ_INIT(&vq->zmbuf_list);
+}
+
+static void
+reset_vring_queue(struct vhost_virtqueue *vq)
+{
+	int callfd;
+
+	callfd = vq->callfd;
+	init_vring_queue(vq);
+	vq->callfd = callfd;
+}
+
+int
+alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
+{
+	struct vhost_virtqueue *vq;
+
+	vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
+	if (vq == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for vring:%u.\n", vring_idx);
+		return -1;
+	}
+
+	dev->virtqueue[vring_idx] = vq;
+	init_vring_queue(vq);
+
+	dev->nr_vring += 1;
+
+	return 0;
+}
+
+/*
+ * Reset some variables in device structure, while keeping few
+ * others untouched, such as vid, ifname, nr_vring: they
+ * should be same unless the device is removed.
+ */
+void
+reset_device(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	dev->negotiated_features = 0;
+	dev->protocol_features = 0;
+	dev->flags = 0;
+
+	for (i = 0; i < dev->nr_vring; i++)
+		reset_vring_queue(dev->virtqueue[i]);
+}
+
+/*
+ * Invoked when there is a new vhost-user connection established (when
+ * there is a new virtio device being attached).
+ */
+int
+vhost_new_device(uint64_t features)
+{
+	struct virtio_net *dev;
+	int i;
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
+		return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vhost_devices[i] == NULL)
+			break;
+	}
+	if (i == MAX_VHOST_DEVICE) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find a free slot for new device.\n");
+		rte_free(dev);
+		return -1;
+	}
+
+	vhost_devices[i] = dev;
+	dev->vid = i;
+	dev->features = features;
+
+	return i;
+}
+
+/*
+ * Invoked when there is the vhost-user connection is broken (when
+ * the virtio device is being detached).
+ */
+void
+vhost_destroy_device(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(vid);
+	}
+
+	cleanup_device(dev, 1);
+	free_device(dev);
+
+	vhost_devices[vid] = NULL;
+}
+
+void
+vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+{
+	struct virtio_net *dev;
+	unsigned int len;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return;
+
+	len = if_len > sizeof(dev->ifname) ?
+		sizeof(dev->ifname) : if_len;
+
+	strncpy(dev->ifname, if_name, len);
+	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
+}
+
+void
+vhost_enable_dequeue_zero_copy(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	dev->dequeue_zero_copy = 1;
+}
+
+int
+rte_vhost_get_mtu(int vid, uint16_t *mtu)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -ENODEV;
+
+	if (!(dev->flags & VIRTIO_DEV_READY))
+		return -EAGAIN;
+
+	if (!(dev->negotiated_features & VIRTIO_NET_F_MTU))
+		return -ENOTSUP;
+
+	*mtu = dev->mtu;
+
+	return 0;
+}
+
+int
+rte_vhost_get_numa_node(int vid)
+{
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	struct virtio_net *dev = get_device(vid);
+	int numa_node;
+	int ret;
+
+	if (dev == NULL)
+		return -1;
+
+	ret = get_mempolicy(&numa_node, NULL, 0, dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to query numa node: %d\n", vid, ret);
+		return -1;
+	}
+
+	return numa_node;
+#else
+	RTE_SET_USED(vid);
+	return -1;
+#endif
+}
+
+int
+rte_vhost_get_ifname(int vid, char *buf, size_t len)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	len = RTE_MIN(len, sizeof(dev->ifname));
+
+	strncpy(buf, dev->ifname, len);
+	buf[len - 1] = '\0';
+
+	return 0;
+}
+
+int
+rte_vhost_get_negotiated_features(int vid, uint64_t *features)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -1;
+
+	*features = dev->negotiated_features;
+	return 0;
+}
+
+int
+rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
+{
+	struct virtio_net *dev;
+	struct rte_vhost_memory *m;
+	size_t size;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -1;
+
+	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
+	m = malloc(sizeof(struct rte_vhost_memory) + size);
+	if (!m)
+		return -1;
+
+	m->nregions = dev->mem->nregions;
+	memcpy(m->regions, dev->mem->regions, size);
+	*mem = m;
+
+	return 0;
+}
+
+int
+rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
+			  struct rte_vhost_vring *vring)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -1;
+
+	if (vring_idx >= VHOST_MAX_VRING)
+		return -1;
+
+	vq = dev->virtqueue[vring_idx];
+	if (!vq)
+		return -1;
+
+	vring->desc  = vq->desc;
+	vring->avail = vq->avail;
+	vring->used  = vq->used;
+	vring->log_guest_addr  = vq->log_guest_addr;
+
+	vring->callfd  = vq->callfd;
+	vring->kickfd  = vq->kickfd;
+	vring->size    = vq->size;
+
+	vring->last_avail_idx = vq->last_avail_idx;
+	vring->last_used_idx = vq->last_used_idx;
+
+	return 0;
+}
+
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq->enabled)
+		return 0;
+
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
+}
+
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	if (enable) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"guest notification isn't supported.\n");
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
+	return 0;
+}
+
+void
+rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	vhost_log_write(dev, addr, len);
+}
+
+void
+rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
+			 uint64_t offset, uint64_t len)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return;
+
+	if (vring_idx >= VHOST_MAX_VRING)
+		return;
+	vq = dev->virtqueue[vring_idx];
+	if (!vq)
+		return;
+
+	vhost_log_used_vring(dev, vq, offset, len);
+}
+
+int
+rte_vhost_set_vhost_vring_last_idx(int vid, uint16_t vring_idx,
+			      uint16_t last_avail_idx, uint16_t last_used_idx) {
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -1;
+
+	if (vring_idx >= VHOST_MAX_VRING)
+		return -1;
+
+	vq = dev->virtqueue[vring_idx];
+	if (!vq)
+		return -1;
+
+	vq->last_avail_idx = last_avail_idx;
+	vq->last_used_idx = last_used_idx;
+
+	return 0;
+}
diff --git a/src/spdk/lib/vhost/rte_vhost/vhost.h b/src/spdk/lib/vhost/rte_vhost/vhost.h
new file mode 100644
index 00000000..b0a0201d
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/vhost.h
@@ -0,0 +1,321 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_CDEV_H_
+#define _VHOST_NET_CDEV_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <unistd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <rte_log.h>
+#include <rte_ether.h>
+
+#include "rte_vhost.h"
+#include "vhost_user.h"
+
+/* Used to indicate that the device is running on a data core */
+#define VIRTIO_DEV_RUNNING 1
+/* Used to indicate that the device is ready to operate */
+#define VIRTIO_DEV_READY 2
+
+/* Backend value set by guest. */
+#define VIRTIO_DEV_STOPPED -1
+
+#define BUF_VECTOR_MAX 256
+
+/**
+ * Structure contains buffer address, length and descriptor index
+ * from vring to do scatter RX.
+ */
+struct buf_vector {
+	uint64_t buf_addr;
+	uint32_t buf_len;
+	uint32_t desc_idx;
+};
+
+/*
+ * A structure to hold some fields needed in zero copy code path,
+ * mainly for associating an mbuf with the right desc_idx.
+ */
+struct zcopy_mbuf {
+	struct rte_mbuf *mbuf;
+	uint32_t desc_idx;
+	uint16_t in_use;
+
+	TAILQ_ENTRY(zcopy_mbuf) next;
+};
+TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
+
+/**
+ * Structure contains variables relevant to RX/TX virtqueues.
+ */
+struct vhost_virtqueue {
+	struct vring_desc	*desc;
+	struct vring_avail	*avail;
+	struct vring_used	*used;
+	uint32_t		size;
+
+	uint16_t		last_avail_idx;
+	uint16_t		last_used_idx;
+#define VIRTIO_INVALID_EVENTFD		(-1)
+#define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
+
+	/* Backend value to determine if device should started/stopped */
+	int			backend;
+	/* Used to notify the guest (trigger interrupt) */
+	int			callfd;
+	/* Currently unused as polling mode is enabled */
+	int			kickfd;
+	int			enabled;
+
+	/* Physical address of used ring, for logging */
+	uint64_t		log_guest_addr;
+
+	uint16_t		nr_zmbuf;
+	uint16_t		zmbuf_size;
+	uint16_t		last_zmbuf_idx;
+	struct zcopy_mbuf	*zmbufs;
+	struct zcopy_mbuf_list	zmbuf_list;
+
+	struct vring_used_elem  *shadow_used_ring;
+	uint16_t                shadow_used_idx;
+} __rte_cache_aligned;
+
+/* Old kernels have no such macros defined */
+#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
+ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21
+#endif
+
+#ifndef VIRTIO_NET_F_MQ
+ #define VIRTIO_NET_F_MQ		22
+#endif
+
+#define VHOST_MAX_VRING			0x100
+#define VHOST_MAX_QUEUE_PAIRS		0x80
+
+#ifndef VIRTIO_NET_F_MTU
+ #define VIRTIO_NET_F_MTU 3
+#endif
+
+/*
+ * Define virtio 1.0 for older kernels
+ */
+#ifndef VIRTIO_F_VERSION_1
+ #define VIRTIO_F_VERSION_1 32
+#endif
+
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+
+/* Features supported by this builtin vhost-user net driver. */
+#define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+				(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
+				(1ULL << VIRTIO_NET_F_MQ)      | \
+				(1ULL << VIRTIO_F_VERSION_1)   | \
+				(1ULL << VHOST_F_LOG_ALL)      | \
+				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_CSUM)    | \
+				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+				(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+				(1ULL << VIRTIO_NET_F_MTU))
+
+
+struct guest_page {
+	uint64_t guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+};
+
+/**
+ * Device structure contains all configuration information relating
+ * to the device.
+ */
+struct virtio_net {
+	/* Frontend (QEMU) memory and memory region information */
+	struct rte_vhost_memory	*mem;
+	uint64_t		features;
+	uint64_t		negotiated_features;
+	uint64_t		protocol_features;
+	int			vid;
+	uint32_t		is_nvme;
+	uint32_t		flags;
+	uint16_t		vhost_hlen;
+	/* to tell if we need broadcast rarp packet */
+	rte_atomic16_t		broadcast_rarp;
+	uint32_t		nr_vring;
+	int			dequeue_zero_copy;
+	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+	char			ifname[IF_NAME_SZ];
+	uint64_t		log_size;
+	uint64_t		log_base;
+	uint64_t		log_addr;
+	struct ether_addr	mac;
+	uint16_t		mtu;
+
+	struct vhost_device_ops const *notify_ops;
+
+	uint32_t		nr_guest_pages;
+	uint32_t		max_guest_pages;
+	struct guest_page       *guest_pages;
+	int                     has_new_mem_table;
+	struct VhostUserMemory  mem_table;
+	int                     mem_table_fds[VHOST_MEMORY_MAX_NREGIONS];
+} __rte_cache_aligned;
+
+
+#define VHOST_LOG_PAGE	4096
+
+static inline void __attribute__((always_inline))
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	log_base[page / 8] |= 1 << (page % 8);
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->negotiated_features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	/* To make sure guest memory updates are committed before logging */
+	rte_smp_wmb();
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+		page += 1;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		     uint64_t offset, uint64_t len)
+{
+	vhost_log_write(dev, vq->log_guest_addr + offset, len);
+}
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VHOST_MAX_PRINT_BUFF 6072
+#define VHOST_LOG_LEVEL RTE_LOG_DEBUG
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define PRINT_PACKET(device, addr, size, header) do { \
+	char *pkt_addr = (char *)(addr); \
+	unsigned int index; \
+	char packet[VHOST_MAX_PRINT_BUFF]; \
+	\
+	if ((header)) \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \
+	else \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \
+	for (index = 0; index < (size); index++) { \
+		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
+			"%02hhx ", pkt_addr[index]); \
+	} \
+	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
+	\
+	VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while (0)
+#else
+#define VHOST_LOG_LEVEL RTE_LOG_INFO
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define PRINT_PACKET(device, addr, size, header) do {} while (0)
+#endif
+
+extern uint64_t VHOST_FEATURES;
+#define MAX_VHOST_DEVICE	1024
+extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* Convert guest physical address to host physical address */
+static inline phys_addr_t __attribute__((always_inline))
+gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
+{
+	uint32_t i;
+	struct guest_page *page;
+
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
+
+		if (gpa >= page->guest_phys_addr &&
+		    gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+			       page->host_phys_addr;
+		}
+	}
+
+	return 0;
+}
+
+struct virtio_net *get_device(int vid);
+
+int vhost_new_device(uint64_t features);
+void cleanup_device(struct virtio_net *dev, int destroy);
+void reset_device(struct virtio_net *dev);
+void vhost_destroy_device(int);
+
+int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
+
+void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+void vhost_enable_dequeue_zero_copy(int vid);
+
+struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
+
+/*
+ * Backend-specific cleanup.
+ *
+ * TODO: fix it; we have one backend now
+ */
+void vhost_backend_cleanup(struct virtio_net *dev);
+
+#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/src/spdk/lib/vhost/rte_vhost/vhost_user.c b/src/spdk/lib/vhost/rte_vhost/vhost_user.c
new file mode 100644
index 00000000..b708a8a7
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/vhost_user.c
@@ -0,0 +1,1360 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <asm/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "vhost.h"
+#include "vhost_user.h"
+
+#define VIRTIO_MIN_MTU 68
+#define VIRTIO_MAX_MTU 65535
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+	[VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
+	[VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
+	[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+	[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
+	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
+	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
+	[VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG",
+	[VHOST_USER_NVME_ADMIN] = "VHOST_USER_NVME_ADMIN",
+	[VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL",
+	[VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP",
+	[VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP",
+	[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD"
+};
+
+static uint64_t
+get_blk_size(int fd)
+{
+	struct stat stat;
+	int ret;
+
+	ret = fstat(fd, &stat);
+	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct rte_vhost_mem_region *reg;
+
+	if (!dev || !dev->mem)
+		return;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (reg->host_user_addr) {
+			munmap(reg->mmap_addr, reg->mmap_size);
+			close(reg->fd);
+		}
+	}
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	if (dev->mem) {
+		if (dev->has_new_mem_table) {
+			for (i = 0; i < dev->mem->nregions; i++) {
+				close(dev->mem_table_fds[i]);
+			}
+			dev->has_new_mem_table = 0;
+		}
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	free(dev->guest_pages);
+	dev->guest_pages = NULL;
+
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+		dev->log_addr = 0;
+	}
+}
+
+/*
+ * This function just returns success at the moment unless
+ * the device hasn't been initialised.
+ */
+static int
+vhost_user_set_owner(void)
+{
+	return 0;
+}
+
+static int
+vhost_user_reset_owner(struct virtio_net *dev)
+{
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	cleanup_device(dev, 0);
+	reset_device(dev);
+	return 0;
+}
+
+/*
+ * The features that we support are requested.
+ */
+static uint64_t
+vhost_user_get_features(struct virtio_net *dev)
+{
+	return dev->features;
+}
+
+/*
+ * We receive the negotiated features supported by us and the virtio device.
+ */
+static int
+vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+{
+	uint64_t vhost_features = 0;
+
+	vhost_features = vhost_user_get_features(dev);
+	if (features & ~vhost_features) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) received invalid negotiated features.\n",
+			dev->vid);
+		return -1;
+	}
+
+	if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->negotiated_features != features) {
+		if (dev->notify_ops->features_changed) {
+			dev->notify_ops->features_changed(dev->vid, features);
+		} else {
+			dev->flags &= ~VIRTIO_DEV_RUNNING;
+			dev->notify_ops->destroy_device(dev->vid);
+		}
+	}
+
+	dev->negotiated_features = features;
+	if (dev->negotiated_features &
+		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
+	}
+	VHOST_LOG_DEBUG(VHOST_CONFIG,
+		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
+		dev->vid,
+		(dev->negotiated_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
+		(dev->negotiated_features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the size of the descriptor ring.
+ */
+static int
+vhost_user_set_vring_num(struct virtio_net *dev,
+			 VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+
+	vq->size = msg->payload.state.num;
+
+	if (dev->dequeue_zero_copy) {
+		vq->nr_zmbuf = 0;
+		vq->last_zmbuf_idx = 0;
+		vq->zmbuf_size = vq->size;
+		vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size *
+					 sizeof(struct zcopy_mbuf), 0);
+		if (vq->zmbufs == NULL) {
+			RTE_LOG(WARNING, VHOST_CONFIG,
+				"failed to allocate mem for zero copy; "
+				"zero copy is force disabled\n");
+			dev->dequeue_zero_copy = 0;
+		}
+	}
+
+	vq->shadow_used_ring = rte_malloc(NULL,
+				vq->size * sizeof(struct vring_used_elem),
+				RTE_CACHE_LINE_SIZE);
+	if (!vq->shadow_used_ring) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to allocate memory for shadow used ring.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+	int oldnode, newnode;
+	struct virtio_net *old_dev;
+	struct vhost_virtqueue *old_vq, *vq;
+	int ret;
+
+	old_dev = dev;
+	vq = old_vq = dev->virtqueue[index];
+
+	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+
+	/* check if we need to reallocate vq */
+	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
+			     MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vq numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate vq from %d to %d node\n", oldnode, newnode);
+		vq = rte_malloc_socket(NULL, sizeof(*vq), 0, newnode);
+		if (!vq)
+			return dev;
+
+		memcpy(vq, old_vq, sizeof(*vq));
+		rte_free(old_vq);
+	}
+
+	/* check if we need to reallocate dev */
+	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get dev numa information.\n");
+		goto out;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate dev from %d to %d node\n",
+			oldnode, newnode);
+		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
+		if (!dev) {
+			dev = old_dev;
+			goto out;
+		}
+
+		memcpy(dev, old_dev, sizeof(*dev));
+		rte_free(old_dev);
+	}
+
+out:
+	dev->virtqueue[index] = vq;
+	vhost_devices[dev->vid] = dev;
+
+	return dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+	return dev;
+}
+#endif
+
+/*
+ * Converts QEMU virtual address to Vhost virtual address. This function is
+ * used to convert the ring addresses to our address space.
+ */
+static uint64_t
+qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
+{
+	struct rte_vhost_mem_region *reg;
+	uint32_t i;
+
+	/* Find the region where the address lives. */
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+
+		if (qva >= reg->guest_user_addr &&
+		    qva <  reg->guest_user_addr + reg->size) {
+
+			if (unlikely(*len > reg->guest_user_addr + reg->size - qva))
+				*len = reg->guest_user_addr + reg->size - qva;
+
+			return qva - reg->guest_user_addr +
+			       reg->host_user_addr;
+		}
+	}
+
+	return 0;
+}
+
+static int vhost_setup_mem_table(struct virtio_net *dev);
+
+/*
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+static int
+vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq;
+	uint64_t len;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	if (dev->has_new_mem_table) {
+		vhost_setup_mem_table(dev);
+		dev->has_new_mem_table = 0;
+	}
+
+	if (dev->mem == NULL)
+		return -1;
+
+	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
+	vq = dev->virtqueue[msg->payload.addr.index];
+
+	/* The addresses are converted from QEMU virtual to Vhost virtual. */
+	len = sizeof(struct vring_desc) * vq->size;
+	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.desc_user_addr, &len);
+	if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to map desc ring.\n",
+			dev->vid);
+		return -1;
+	}
+
+	dev = numa_realloc(dev, msg->payload.addr.index);
+	vq = dev->virtqueue[msg->payload.addr.index];
+
+	len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
+	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.avail_user_addr, &len);
+	if (vq->avail == 0 ||
+			len != sizeof(struct vring_avail)
+			+ sizeof(uint16_t) * vq->size) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find avail ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	len = sizeof(struct vring_used) +
+		sizeof(struct vring_used_elem) * vq->size;
+	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
+			msg->payload.addr.used_user_addr, &len);
+	if (vq->used == 0 || len != sizeof(struct vring_used) +
+			sizeof(struct vring_used_elem) * vq->size) {
+
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find used ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	if (vq->last_used_idx != vq->used->idx) {
+		RTE_LOG(WARNING, VHOST_CONFIG,
+			"last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+			"some packets maybe resent for Tx and dropped for Rx\n",
+			vq->last_used_idx, vq->used->idx);
+		vq->last_used_idx  = vq->used->idx;
+		vq->last_avail_idx = vq->used->idx;
+	}
+
+	vq->log_guest_addr = msg->payload.addr.log_guest_addr;
+
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+			dev->vid, vq->desc);
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+			dev->vid, vq->avail);
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+			dev->vid, vq->used);
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+			dev->vid, vq->log_guest_addr);
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the available ring last used index.
+ */
+static int
+vhost_user_set_vring_base(struct virtio_net *dev,
+			  VhostUserMsg *msg)
+{
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	dev->virtqueue[msg->payload.state.index]->last_used_idx  = msg->payload.state.num;
+	dev->virtqueue[msg->payload.state.index]->last_avail_idx = msg->payload.state.num;
+
+	return 0;
+}
+
+static void
+add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
+		   uint64_t host_phys_addr, uint64_t size)
+{
+	struct guest_page *page, *last_page;
+
+	if (dev->nr_guest_pages == dev->max_guest_pages) {
+		dev->max_guest_pages = RTE_MAX(8U, dev->max_guest_pages * 2);
+		dev->guest_pages = realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page));
+	}
+
+	if (dev->nr_guest_pages > 0) {
+		last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
+		/* merge if the two pages are continuous */
+		if (host_phys_addr == last_page->host_phys_addr +
+				      last_page->size) {
+			last_page->size += size;
+			return;
+		}
+	}
+
+	page = &dev->guest_pages[dev->nr_guest_pages++];
+	page->guest_phys_addr = guest_phys_addr;
+	page->host_phys_addr  = host_phys_addr;
+	page->size = size;
+}
+
+static void
+add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
+		uint64_t page_size)
+{
+	uint64_t reg_size = reg->size;
+	uint64_t host_user_addr  = reg->host_user_addr;
+	uint64_t guest_phys_addr = reg->guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+
+	host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
+	size = page_size - (guest_phys_addr & (page_size - 1));
+	size = RTE_MIN(size, reg_size);
+
+	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+	host_user_addr  += size;
+	guest_phys_addr += size;
+	reg_size -= size;
+
+	while (reg_size > 0) {
+		size = RTE_MIN(reg_size, page_size);
+		host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)
+						  host_user_addr);
+		add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+
+		host_user_addr  += size;
+		guest_phys_addr += size;
+		reg_size -= size;
+	}
+}
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+/* TODO: enable it only in debug mode? */
+static void
+dump_guest_pages(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct guest_page *page;
+
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest physical page region %u\n"
+			"\t guest_phys_addr: %" PRIx64 "\n"
+			"\t host_phys_addr : %" PRIx64 "\n"
+			"\t size           : %" PRIx64 "\n",
+			i,
+			page->guest_phys_addr,
+			page->host_phys_addr,
+			page->size);
+	}
+}
+#else
+#define dump_guest_pages(dev)
+#endif
+
+static int
+vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	uint32_t i;
+
+	if (dev->has_new_mem_table) {
+		/*
+		 * The previous mem table was not consumed, so close the
+		 *  file descriptors from that mem table before copying
+		 *  the new one.
+		 */
+		for (i = 0; i < dev->mem_table.nregions; i++) {
+			close(dev->mem_table_fds[i]);
+		}
+	}
+
+	memcpy(&dev->mem_table, &pmsg->payload.memory, sizeof(dev->mem_table));
+	memcpy(dev->mem_table_fds, pmsg->fds, sizeof(dev->mem_table_fds));
+	dev->has_new_mem_table = 1;
+	/* vhost-user-nvme will not send
+	 * set vring addr message, enable
+	 * memory address table now.
+	 */
+	if (dev->has_new_mem_table && dev->is_nvme) {
+		vhost_setup_mem_table(dev);
+		dev->has_new_mem_table = 0;
+	}
+
+	return 0;
+}
+
+ static int
+vhost_setup_mem_table(struct virtio_net *dev)
+{
+	struct VhostUserMemory memory = dev->mem_table;
+	struct rte_vhost_mem_region *reg;
+	struct vhost_virtqueue *vq;
+	void *mmap_addr;
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint64_t alignment;
+	uint32_t i;
+	int fd;
+
+	if (dev->mem) {
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	for (i = 0; i < dev->nr_vring; i++) {
+		vq = dev->virtqueue[i];
+		/* Those addresses won't be valid anymore in host address space
+		 * after setting new mem table. Initiator need to resend these
+		 * addresses.
+		 */
+		vq->desc = NULL;
+		vq->avail = NULL;
+		vq->used = NULL;
+	}
+
+	dev->nr_guest_pages = 0;
+	if (!dev->guest_pages) {
+		dev->max_guest_pages = 8;
+		dev->guest_pages = malloc(dev->max_guest_pages *
+						sizeof(struct guest_page));
+	}
+
+	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
+		sizeof(struct rte_vhost_mem_region) * memory.nregions, 0);
+	if (dev->mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to allocate memory for dev->mem\n",
+			dev->vid);
+		return -1;
+	}
+	dev->mem->nregions = memory.nregions;
+
+	for (i = 0; i < memory.nregions; i++) {
+		fd  = dev->mem_table_fds[i];
+		reg = &dev->mem->regions[i];
+
+		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
+		reg->guest_user_addr = memory.regions[i].userspace_addr;
+		reg->size            = memory.regions[i].memory_size;
+		reg->fd              = fd;
+
+		mmap_offset = memory.regions[i].mmap_offset;
+		mmap_size   = reg->size + mmap_offset;
+
+		/* mmap() without flag of MAP_ANONYMOUS, should be called
+		 * with length argument aligned with hugepagesz at older
+		 * longterm version Linux, like 2.6.32 and 3.2.72, or
+		 * mmap() will fail with EINVAL.
+		 *
+		 * to avoid failure, make sure in caller to keep length
+		 * aligned.
+		 */
+		alignment = get_blk_size(fd);
+		if (alignment == (uint64_t)-1) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"couldn't get hugepage size through fstat\n");
+			goto err_mmap;
+		}
+		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
+
+		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_POPULATE, fd, 0);
+
+		if (mmap_addr == MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap region %u failed.\n", i);
+			goto err_mmap;
+		}
+
+		if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"MADV_DONTDUMP advice setting failed.\n");
+		}
+
+		reg->mmap_addr = mmap_addr;
+		reg->mmap_size = mmap_size;
+		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
+				      mmap_offset;
+
+		if (dev->dequeue_zero_copy)
+			add_guest_pages(dev, reg, alignment);
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest memory region %u, size: 0x%" PRIx64 "\n"
+			"\t guest physical addr: 0x%" PRIx64 "\n"
+			"\t guest virtual  addr: 0x%" PRIx64 "\n"
+			"\t host  virtual  addr: 0x%" PRIx64 "\n"
+			"\t mmap addr : 0x%" PRIx64 "\n"
+			"\t mmap size : 0x%" PRIx64 "\n"
+			"\t mmap align: 0x%" PRIx64 "\n"
+			"\t mmap off  : 0x%" PRIx64 "\n",
+			i, reg->size,
+			reg->guest_phys_addr,
+			reg->guest_user_addr,
+			reg->host_user_addr,
+			(uint64_t)(uintptr_t)mmap_addr,
+			mmap_size,
+			alignment,
+			mmap_offset);
+	}
+
+	dump_guest_pages(dev);
+
+	return 0;
+
+err_mmap:
+	free_mem_region(dev);
+	rte_free(dev->mem);
+	dev->mem = NULL;
+	return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+	       vq->kickfd != VIRTIO_INVALID_EVENTFD &&
+	       vq->callfd != VIRTIO_INVALID_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+	struct vhost_virtqueue *vq;
+	uint32_t i;
+
+	if (dev->nr_vring == 0)
+		return 0;
+
+	for (i = 0; i < dev->nr_vring; i++) {
+		vq = dev->virtqueue[i];
+
+		if (vq_is_ready(vq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is now ready for processing.\n");
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void
+vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+
+	vq = dev->virtqueue[file.index];
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = file.fd;
+}
+
+static void
+vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+
+	vq = dev->virtqueue[file.index];
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+	vq->kickfd = file.fd;
+}
+
+static void
+free_zmbufs(struct vhost_virtqueue *vq)
+{
+	struct zcopy_mbuf *zmbuf, *next;
+
+	for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+	     zmbuf != NULL; zmbuf = next) {
+		next = TAILQ_NEXT(zmbuf, next);
+
+		rte_pktmbuf_free(zmbuf->mbuf);
+		TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+	}
+
+	rte_free(vq->zmbufs);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+static int
+vhost_user_get_vring_base(struct virtio_net *dev,
+			  VhostUserMsg *msg)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	dev->flags &= ~VIRTIO_DEV_READY;
+
+	/* Here we are safe to get the last used index */
+	msg->payload.state.num = vq->last_used_idx;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring base idx:%d file:%d\n", msg->payload.state.index, msg->payload.state.num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_vring_stop.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	if (dev->dequeue_zero_copy)
+		free_zmbufs(vq);
+	rte_free(vq->shadow_used_ring);
+	vq->shadow_used_ring = NULL;
+
+	return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+static int
+vhost_user_set_vring_enable(struct virtio_net *dev,
+			    VhostUserMsg *msg)
+{
+	int enable = (int)msg->payload.state.num;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"set queue enable: %d to qp idx: %d\n",
+		enable, msg->payload.state.index);
+
+	if (dev->notify_ops->vring_state_changed)
+		dev->notify_ops->vring_state_changed(dev->vid, msg->payload.state.index, enable);
+
+	dev->virtqueue[msg->payload.state.index]->enabled = enable;
+
+	return 0;
+}
+
+static void
+vhost_user_set_protocol_features(struct virtio_net *dev,
+				 uint64_t protocol_features)
+{
+	if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+		return;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	dev->protocol_features = protocol_features;
+}
+
+static int
+vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	int fd = msg->fds[0];
+	uint64_t size, off;
+	void *addr;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+		return -1;
+	}
+
+	if (msg->size != sizeof(VhostUserLog)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid log base msg size: %"PRId32" != %d\n",
+			msg->size, (int)sizeof(VhostUserLog));
+		return -1;
+	}
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+
+	size = msg->payload.log.mmap_size;
+	off  = msg->payload.log.mmap_offset;
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"log mmap size: %"PRId64", offset: %"PRId64"\n",
+		size, off);
+
+	/*
+	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
+	 * fail when offset is not page size aligned.
+	 */
+	addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+		return -1;
+	}
+
+	/*
+	 * Free previously mapped log memory on occasionally
+	 * multiple VHOST_USER_SET_LOG_BASE.
+	 */
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+	}
+	dev->log_addr = (uint64_t)(uintptr_t)addr;
+	dev->log_base = dev->log_addr + off;
+	dev->log_size = size;
+
+	return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+static int
+vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+	RTE_LOG(DEBUG, VHOST_CONFIG,
+		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	memcpy(dev->mac.addr_bytes, mac, 6);
+
+	/*
+	 * Set the flag to inject a RARP broadcast packet at
+	 * rte_vhost_dequeue_burst().
+	 *
+	 * rte_smp_wmb() is for making sure the mac is copied
+	 * before the flag is set.
+	 */
+	rte_smp_wmb();
+	rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+	return 0;
+}
+
+static int
+vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	if (msg->payload.u64 < VIRTIO_MIN_MTU ||
+			msg->payload.u64 > VIRTIO_MAX_MTU) {
+		RTE_LOG(ERR, VHOST_CONFIG, "Invalid MTU size (%"PRIu64")\n",
+				msg->payload.u64);
+
+		return -1;
+	}
+
+	dev->mtu = msg->payload.u64;
+
+	return 0;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg && msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"invalid msg size: %d\n", msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret <= 0)
+			return ret;
+		if (ret != (int)msg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	if (!msg)
+		return 0;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+	msg->flags &= ~VHOST_USER_NEED_REPLY;
+	msg->flags |= VHOST_USER_VERSION;
+	msg->flags |= VHOST_USER_REPLY_MASK;
+
+	ret = send_fd_message(sockfd, (char *)msg,
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+	return ret;
+}
+
+/*
+ * Allocate a queue pair if it hasn't been allocated yet
+ */
+static int
+vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
+{
+	uint16_t vring_idx;
+
+	switch (msg->request) {
+	case VHOST_USER_SET_VRING_KICK:
+	case VHOST_USER_SET_VRING_CALL:
+	case VHOST_USER_SET_VRING_ERR:
+		vring_idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+		break;
+	case VHOST_USER_SET_VRING_NUM:
+	case VHOST_USER_SET_VRING_BASE:
+	case VHOST_USER_SET_VRING_ENABLE:
+		vring_idx = msg->payload.state.index;
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		vring_idx = msg->payload.addr.index;
+		break;
+	default:
+		return 0;
+	}
+
+	if (vring_idx >= VHOST_MAX_VRING) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid vring index: %u\n", vring_idx);
+		return -1;
+	}
+
+	if (dev->virtqueue[vring_idx])
+		return 0;
+
+	return alloc_vring_queue(dev, vring_idx);
+}
+
+static int
+vhost_user_nvme_io_request_passthrough(struct virtio_net *dev,
+				       uint16_t qid, uint16_t tail_head,
+				       bool is_submission_queue)
+{
+	return -1;
+}
+
+static int
+vhost_user_nvme_admin_passthrough(struct virtio_net *dev,
+				  void *cmd, void *cqe, void *buf)
+{
+	if (dev->notify_ops->vhost_nvme_admin_passthrough) {
+		return dev->notify_ops->vhost_nvme_admin_passthrough(dev->vid, cmd, cqe, buf);
+	}
+
+	return -1;
+}
+
+static int
+vhost_user_nvme_set_cq_call(struct virtio_net *dev, uint16_t qid, int fd)
+{
+	if (dev->notify_ops->vhost_nvme_set_cq_call) {
+		return dev->notify_ops->vhost_nvme_set_cq_call(dev->vid, qid, fd);
+	}
+
+	return -1;
+}
+
+static int
+vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap)
+{
+	if (dev->notify_ops->vhost_nvme_get_cap) {
+		return dev->notify_ops->vhost_nvme_get_cap(dev->vid, cap);
+	}
+
+	return -1;
+}
+
+int
+vhost_user_msg_handler(int vid, int fd)
+{
+	struct virtio_net *dev;
+	struct VhostUserMsg msg;
+	struct vhost_vring_file file;
+	int ret;
+	uint64_t cap;
+	uint64_t enable;
+	uint8_t cqe[16];
+	uint8_t cmd[64];
+	uint8_t buf[4096];
+	uint16_t qid, tail_head;
+	bool is_submission_queue;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return -1;
+
+	if (!dev->notify_ops) {
+		dev->notify_ops = vhost_driver_callback_get(dev->ifname);
+		if (!dev->notify_ops) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get callback ops for driver %s\n",
+				dev->ifname);
+			return -1;
+		}
+	}
+
+	ret = read_vhost_message(fd, &msg);
+	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+		if (ret < 0)
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read message failed\n");
+		else if (ret == 0)
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"vhost peer closed\n");
+		else
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read incorrect message\n");
+
+		return -1;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "%s: read message %s\n",
+		dev->ifname, vhost_message_str[msg.request]);
+
+	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to alloc queue\n");
+		return -1;
+	}
+
+	switch (msg.request) {
+	case VHOST_USER_GET_CONFIG:
+		if (dev->notify_ops->get_config(dev->vid,
+						msg.payload.config.region,
+						msg.payload.config.size) != 0) {
+			msg.size = sizeof(uint64_t);
+		}
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_CONFIG:
+		if ((dev->notify_ops->set_config(dev->vid,
+						msg.payload.config.region,
+						msg.payload.config.offset,
+						msg.payload.config.size,
+						msg.payload.config.flags)) != 0) {
+			ret = 1;
+		} else {
+			ret = 0;
+		}
+		break;
+	case VHOST_USER_NVME_ADMIN:
+		if (!dev->is_nvme) {
+			dev->is_nvme = 1;
+		}
+		memcpy(cmd, msg.payload.nvme.cmd.req, sizeof(cmd));
+		ret = vhost_user_nvme_admin_passthrough(dev, cmd, cqe, buf);
+		memcpy(msg.payload.nvme.cmd.cqe, cqe, sizeof(cqe));
+		msg.size = sizeof(cqe);
+		/* NVMe Identify Command */
+		if (cmd[0] == 0x06) {
+			memcpy(msg.payload.nvme.buf, &buf, 4096);
+			msg.size += 4096;
+		}
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_NVME_SET_CQ_CALL:
+		file.index = msg.payload.u64 & VHOST_USER_VRING_IDX_MASK;
+		file.fd = msg.fds[0];
+		ret = vhost_user_nvme_set_cq_call(dev, file.index, file.fd);
+		break;
+	case VHOST_USER_NVME_GET_CAP:
+		ret = vhost_user_nvme_get_cap(dev, &cap);
+		if (!ret)
+			msg.payload.u64 = cap;
+		else
+			msg.payload.u64 = 0;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_NVME_START_STOP:
+		enable = msg.payload.u64;
+		/* device must be started before set cq call */
+		if (enable) {
+			if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
+				if (dev->notify_ops->new_device(dev->vid) == 0)
+					dev->flags |= VIRTIO_DEV_RUNNING;
+			}
+		} else {
+			if (dev->flags & VIRTIO_DEV_RUNNING) {
+				dev->flags &= ~VIRTIO_DEV_RUNNING;
+				dev->notify_ops->destroy_device(dev->vid);
+			}
+		}
+		break;
+	case VHOST_USER_NVME_IO_CMD:
+		qid = msg.payload.nvme_io.qid;
+		tail_head = msg.payload.nvme_io.tail_head;
+		is_submission_queue = (msg.payload.nvme_io.queue_type == VHOST_USER_NVME_SUBMISSION_QUEUE) ? true : false;
+		vhost_user_nvme_io_request_passthrough(dev, qid, tail_head, is_submission_queue);
+		break;
+	case VHOST_USER_GET_FEATURES:
+		msg.payload.u64 = vhost_user_get_features(dev);
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		vhost_user_set_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+		msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		vhost_user_set_protocol_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		vhost_user_set_owner();
+		break;
+	case VHOST_USER_RESET_OWNER:
+		vhost_user_reset_owner(dev);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		ret = vhost_user_set_mem_table(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+		vhost_user_set_log_base(dev, &msg);
+
+		/* it needs a reply */
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_LOG_FD:
+		close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		vhost_user_set_vring_num(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		vhost_user_set_vring_addr(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		vhost_user_set_vring_base(dev, &msg);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		vhost_user_get_vring_base(dev, &msg);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		vhost_user_set_vring_kick(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		vhost_user_set_vring_call(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+			close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	case VHOST_USER_GET_QUEUE_NUM:
+		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ENABLE:
+		vhost_user_set_vring_enable(dev, &msg);
+		break;
+	case VHOST_USER_SEND_RARP:
+		vhost_user_send_rarp(dev, &msg);
+		break;
+
+	case VHOST_USER_NET_SET_MTU:
+		ret = vhost_user_net_set_mtu(dev, &msg);
+		break;
+
+	default:
+		ret = -1;
+		break;
+
+	}
+
+	if (msg.flags & VHOST_USER_NEED_REPLY) {
+		msg.payload.u64 = !!ret;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+	}
+
+	if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
+		dev->flags |= VIRTIO_DEV_READY;
+
+		if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
+			if (dev->dequeue_zero_copy) {
+				RTE_LOG(INFO, VHOST_CONFIG,
+						"dequeue zero copy is enabled\n");
+			}
+
+			if (dev->notify_ops->new_device(dev->vid) == 0)
+				dev->flags |= VIRTIO_DEV_RUNNING;
+		}
+	}
+
+	return 0;
+}
diff --git a/src/spdk/lib/vhost/rte_vhost/vhost_user.h b/src/spdk/lib/vhost/rte_vhost/vhost_user.h
new file mode 100644
index 00000000..cb5ff0a6
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost/vhost_user.h
@@ -0,0 +1,182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#define VHOST_USER_PROTOCOL_F_RARP	2
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK	3
+#define VHOST_USER_PROTOCOL_F_NET_MTU 4
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+					 (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+	VHOST_USER_NVME_ADMIN = 80,
+	VHOST_USER_NVME_SET_CQ_CALL = 81,
+	VHOST_USER_NVME_GET_CAP = 82,
+	VHOST_USER_NVME_START_STOP = 83,
+	VHOST_USER_NVME_IO_CMD = 84,
+	VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserConfig {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
+enum VhostUserNvmeQueueTypes {
+	VHOST_USER_NVME_SUBMISSION_QUEUE = 1,
+	VHOST_USER_NVME_COMPLETION_QUEUE = 2,
+};
+
+typedef struct VhostUserNvmeIO {
+	enum VhostUserNvmeQueueTypes queue_type;
+	uint32_t qid;
+	uint32_t tail_head;
+} VhostUserNvmeIO;
+
+typedef struct VhostUserMsg {
+	VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		VhostUserConfig config;
+		struct nvme {
+			union {
+				uint8_t req[64];
+				uint8_t cqe[16];
+			} cmd;
+			uint8_t buf[4096];
+		} nvme;
+		struct VhostUserNvmeIO nvme_io;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+
+/* vhost_user.c */
+int vhost_user_msg_handler(int vid, int fd);
+
+/* socket.c */
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+
+#endif
diff --git a/src/spdk/lib/vhost/vhost.c b/src/spdk/lib/vhost/vhost.c
new file mode 100644
index 00000000..0cacf613
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost.c
@@ -0,0 +1,1503 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/barrier.h"
+
+#include "spdk/vhost.h"
+#include "vhost_internal.h"
+
+static uint32_t *g_num_ctrlrs;
+
+/* Path to folder where character device will be created. Can be set by user. */
+static char dev_dirname[PATH_MAX] = "";
+
+struct spdk_vhost_dev_event_ctx {
+	/** Pointer to the controller obtained before enqueuing the event */
+	struct spdk_vhost_dev *vdev;
+
+	/** ID of the vdev to send event to. */
+	unsigned vdev_id;
+
+	/** User callback function to be executed on given lcore. */
+	spdk_vhost_event_fn cb_fn;
+
+	/** Semaphore used to signal that event is done. */
+	sem_t sem;
+
+	/** Response to be written by enqueued event. */
+	int response;
+};
+
+static int new_connection(int vid);
+static int start_device(int vid);
+static void stop_device(int vid);
+static void destroy_connection(int vid);
+static int get_config(int vid, uint8_t *config, uint32_t len);
+static int set_config(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
+
+const struct vhost_device_ops g_spdk_vhost_ops = {
+	.new_device =  start_device,
+	.destroy_device = stop_device,
+	.get_config = get_config,
+	.set_config = set_config,
+	.new_connection = new_connection,
+	.destroy_connection = destroy_connection,
+	.vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough,
+	.vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call,
+	.vhost_nvme_get_cap = spdk_vhost_nvme_get_cap,
+};
+
+static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER(
+			g_spdk_vhost_devices);
+static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void *spdk_vhost_gpa_to_vva(struct spdk_vhost_dev *vdev, uint64_t addr, uint64_t len)
+{
+	void *vva;
+	uint64_t newlen;
+
+	newlen = len;
+	vva = (void *)rte_vhost_va_from_guest_pa(vdev->mem, addr, &newlen);
+	if (newlen != len) {
+		return NULL;
+	}
+
+	return vva;
+
+}
+
+static void
+spdk_vhost_log_req_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
+			uint16_t req_id)
+{
+	struct vring_desc *desc, *desc_table;
+	uint32_t desc_table_size;
+	int rc;
+
+	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
+		return;
+	}
+
+	rc = spdk_vhost_vq_get_desc(vdev, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
+	if (spdk_unlikely(rc != 0)) {
+		SPDK_ERRLOG("Can't log used ring descriptors!\n");
+		return;
+	}
+
+	do {
+		if (spdk_vhost_vring_desc_is_wr(desc)) {
+			/* To be honest, only pages realy touched should be logged, but
+			 * doing so would require tracking those changes in each backed.
+			 * Also backend most likely will touch all/most of those pages so
+			 * for lets assume we touched all pages passed to as writeable buffers. */
+			rte_vhost_log_write(vdev->vid, desc->addr, desc->len);
+		}
+		spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
+	} while (desc);
+}
+
+static void
+spdk_vhost_log_used_vring_elem(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
+			       uint16_t idx)
+{
+	uint64_t offset, len;
+	uint16_t vq_idx;
+
+	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
+		return;
+	}
+
+	offset = offsetof(struct vring_used, ring[idx]);
+	len = sizeof(virtqueue->vring.used->ring[idx]);
+	vq_idx = virtqueue - vdev->virtqueue;
+
+	rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len);
+}
+
+static void
+spdk_vhost_log_used_vring_idx(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue)
+{
+	uint64_t offset, len;
+	uint16_t vq_idx;
+
+	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
+		return;
+	}
+
+	offset = offsetof(struct vring_used, idx);
+	len = sizeof(virtqueue->vring.used->idx);
+	vq_idx = virtqueue - vdev->virtqueue;
+
+	rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len);
+}
+
+/*
+ * Get available requests from avail ring.
+ */
+uint16_t
+spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
+			     uint16_t reqs_len)
+{
+	struct rte_vhost_vring *vring = &virtqueue->vring;
+	struct vring_avail *avail = vring->avail;
+	uint16_t size_mask = vring->size - 1;
+	uint16_t last_idx = vring->last_avail_idx, avail_idx = avail->idx;
+	uint16_t count, i;
+
+	count = avail_idx - last_idx;
+	if (spdk_likely(count == 0)) {
+		return 0;
+	}
+
+	if (spdk_unlikely(count > vring->size)) {
+		/* TODO: the queue is unrecoverably broken and should be marked so.
+		 * For now we will fail silently and report there are no new avail entries.
+		 */
+		return 0;
+	}
+
+	count = spdk_min(count, reqs_len);
+	vring->last_avail_idx += count;
+	for (i = 0; i < count; i++) {
+		reqs[i] = vring->avail->ring[(last_idx + i) & size_mask];
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING,
+		      "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
+		      last_idx, avail_idx, count);
+
+	return count;
+}
+
+static bool
+spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
+{
+	return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
+}
+
+int
+spdk_vhost_vq_get_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
+		       uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
+		       uint32_t *desc_table_size)
+{
+	if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
+		return -1;
+	}
+
+	*desc = &virtqueue->vring.desc[req_idx];
+
+	if (spdk_vhost_vring_desc_is_indirect(*desc)) {
+		assert(spdk_vhost_dev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC));
+		*desc_table_size = (*desc)->len / sizeof(**desc);
+		*desc_table = spdk_vhost_gpa_to_vva(vdev, (*desc)->addr,
+						    sizeof(**desc) * *desc_table_size);
+		*desc = *desc_table;
+		if (*desc == NULL) {
+			return -1;
+		}
+
+		return 0;
+	}
+
+	*desc_table = virtqueue->vring.desc;
+	*desc_table_size = virtqueue->vring.size;
+
+	return 0;
+}
+
+int
+spdk_vhost_vq_used_signal(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue)
+{
+	if (virtqueue->used_req_cnt == 0) {
+		return 0;
+	}
+
+	virtqueue->req_cnt += virtqueue->used_req_cnt;
+	virtqueue->used_req_cnt = 0;
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING,
+		      "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
+		      virtqueue - vdev->virtqueue, virtqueue->vring.last_used_idx);
+
+	eventfd_write(virtqueue->vring.callfd, (eventfd_t)1);
+	return 1;
+}
+
+
+static void
+check_dev_io_stats(struct spdk_vhost_dev *vdev, uint64_t now)
+{
+	struct spdk_vhost_virtqueue *virtqueue;
+	uint32_t irq_delay_base = vdev->coalescing_delay_time_base;
+	uint32_t io_threshold = vdev->coalescing_io_rate_threshold;
+	int32_t irq_delay;
+	uint32_t req_cnt;
+	uint16_t q_idx;
+
+	if (now < vdev->next_stats_check_time) {
+		return;
+	}
+
+	vdev->next_stats_check_time = now + vdev->stats_check_interval;
+	for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) {
+		virtqueue = &vdev->virtqueue[q_idx];
+
+		req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
+		if (req_cnt <= io_threshold) {
+			continue;
+		}
+
+		irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
+		virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay);
+
+		virtqueue->req_cnt = 0;
+		virtqueue->next_event_time = now;
+	}
+}
+
+void
+spdk_vhost_dev_used_signal(struct spdk_vhost_dev *vdev)
+{
+	struct spdk_vhost_virtqueue *virtqueue;
+	uint64_t now;
+	uint16_t q_idx;
+
+	if (vdev->coalescing_delay_time_base == 0) {
+		for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) {
+			virtqueue = &vdev->virtqueue[q_idx];
+
+			if (virtqueue->vring.desc == NULL ||
+			    (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+				continue;
+			}
+
+			spdk_vhost_vq_used_signal(vdev, virtqueue);
+		}
+	} else {
+		now = spdk_get_ticks();
+		check_dev_io_stats(vdev, now);
+
+		for (q_idx = 0; q_idx < vdev->max_queues; q_idx++) {
+			virtqueue = &vdev->virtqueue[q_idx];
+
+			/* No need for event right now */
+			if (now < virtqueue->next_event_time ||
+			    (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+				continue;
+			}
+
+			if (!spdk_vhost_vq_used_signal(vdev, virtqueue)) {
+				continue;
+			}
+
+			/* Syscall is quite long so update time */
+			now = spdk_get_ticks();
+			virtqueue->next_event_time = now + virtqueue->irq_delay_time;
+		}
+	}
+}
+
+int
+spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
+			  uint32_t iops_threshold)
+{
+	uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
+	uint32_t io_rate = iops_threshold * SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS / 1000U;
+
+	if (delay_time_base >= UINT32_MAX) {
+		SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
+		return -EINVAL;
+	} else if (io_rate == 0) {
+		SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
+			    1000U / SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS);
+		return -EINVAL;
+	}
+
+	vdev->coalescing_delay_time_base = delay_time_base;
+	vdev->coalescing_io_rate_threshold = io_rate;
+
+	vdev->coalescing_delay_us = delay_base_us;
+	vdev->coalescing_iops_threshold = iops_threshold;
+	return 0;
+}
+
+void
+spdk_vhost_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
+			  uint32_t *iops_threshold)
+{
+	if (delay_base_us) {
+		*delay_base_us = vdev->coalescing_delay_us;
+	}
+
+	if (iops_threshold) {
+		*iops_threshold = vdev->coalescing_iops_threshold;
+	}
+}
+
+/*
+ * Enqueue id and len to used ring.
+ */
+void
+spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
+				uint16_t id, uint32_t len)
+{
+	struct rte_vhost_vring *vring = &virtqueue->vring;
+	struct vring_used *used = vring->used;
+	uint16_t last_idx = vring->last_used_idx & (vring->size - 1);
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING,
+		      "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
+		      virtqueue - vdev->virtqueue, vring->last_used_idx, id, len);
+
+	spdk_vhost_log_req_desc(vdev, virtqueue, id);
+
+	vring->last_used_idx++;
+	used->ring[last_idx].id = id;
+	used->ring[last_idx].len = len;
+
+	/* Ensure the used ring is updated before we log it or increment used->idx. */
+	spdk_smp_wmb();
+
+	spdk_vhost_log_used_vring_elem(vdev, virtqueue, last_idx);
+	* (volatile uint16_t *) &used->idx = vring->last_used_idx;
+	spdk_vhost_log_used_vring_idx(vdev, virtqueue);
+
+	/* Ensure all our used ring changes are visible to the guest at the time
+	 * of interrupt.
+	 * TODO: this is currently an sfence on x86. For other architectures we
+	 * will most likely need an smp_mb(), but smp_mb() is an overkill for x86.
+	 */
+	spdk_wmb();
+
+	virtqueue->used_req_cnt++;
+}
+
+int
+spdk_vhost_vring_desc_get_next(struct vring_desc **desc,
+			       struct vring_desc *desc_table, uint32_t desc_table_size)
+{
+	struct vring_desc *old_desc = *desc;
+	uint16_t next_idx;
+
+	if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
+		*desc = NULL;
+		return 0;
+	}
+
+	next_idx = old_desc->next;
+	if (spdk_unlikely(next_idx >= desc_table_size)) {
+		*desc = NULL;
+		return -1;
+	}
+
+	*desc = &desc_table[next_idx];
+	return 0;
+}
+
+bool
+spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc)
+{
+	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+}
+
+#define _2MB_OFFSET(ptr)	((ptr) & (0x200000 - 1))
+
+int
+spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
+			     uint16_t *iov_index, const struct vring_desc *desc)
+{
+	uint32_t remaining = desc->len;
+	uint32_t to_boundary;
+	uint32_t len;
+	uintptr_t payload = desc->addr;
+	uintptr_t vva;
+
+	while (remaining) {
+		if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
+			SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
+			return -1;
+		}
+		vva = (uintptr_t)rte_vhost_gpa_to_vva(vdev->mem, payload);
+		if (vva == 0) {
+			SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
+			return -1;
+		}
+		to_boundary = 0x200000 - _2MB_OFFSET(payload);
+		if (spdk_likely(remaining <= to_boundary)) {
+			len = remaining;
+		} else {
+			/*
+			 * Descriptor crosses a 2MB hugepage boundary.  vhost memory regions are allocated
+			 *  from hugepage memory, so this means this descriptor may be described by
+			 *  discontiguous vhost memory regions.  Do not blindly split on the 2MB boundary,
+			 *  only split it if the two sides of the boundary do not map to the same vhost
+			 *  memory region.  This helps ensure we do not exceed the max number of IOVs
+			 *  defined by SPDK_VHOST_IOVS_MAX.
+			 */
+			len = to_boundary;
+			while (len < remaining) {
+				if (vva + len != (uintptr_t)rte_vhost_gpa_to_vva(vdev->mem, payload + len)) {
+					break;
+				}
+				len += spdk_min(remaining - len, 0x200000);
+			}
+		}
+		iov[*iov_index].iov_base = (void *)vva;
+		iov[*iov_index].iov_len = len;
+		remaining -= len;
+		payload += len;
+		(*iov_index)++;
+	}
+
+	return 0;
+}
+
+static struct spdk_vhost_dev *
+spdk_vhost_dev_find_by_id(unsigned id)
+{
+	struct spdk_vhost_dev *vdev;
+
+	TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+		if (vdev->id == id) {
+			return vdev;
+		}
+	}
+
+	return NULL;
+}
+
+static struct spdk_vhost_dev *
+spdk_vhost_dev_find_by_vid(int vid)
+{
+	struct spdk_vhost_dev *vdev;
+
+	TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+		if (vdev->vid == vid) {
+			return vdev;
+		}
+	}
+
+	return NULL;
+}
+
+#define SHIFT_2MB	21
+#define SIZE_2MB	(1ULL << SHIFT_2MB)
+#define FLOOR_2MB(x)	(((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB
+#define CEIL_2MB(x)	((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB
+
+static void
+spdk_vhost_dev_mem_register(struct spdk_vhost_dev *vdev)
+{
+	struct rte_vhost_mem_region *region;
+	uint32_t i;
+
+	for (i = 0; i < vdev->mem->nregions; i++) {
+		uint64_t start, end, len;
+		region = &vdev->mem->regions[i];
+		start = FLOOR_2MB(region->mmap_addr);
+		end = CEIL_2MB(region->mmap_addr + region->mmap_size);
+		len = end - start;
+		SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
+			     start, len);
+
+		if (spdk_mem_register((void *)start, len) != 0) {
+			SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
+				     i);
+			continue;
+		}
+	}
+}
+
+static void
+spdk_vhost_dev_mem_unregister(struct spdk_vhost_dev *vdev)
+{
+	struct rte_vhost_mem_region *region;
+	uint32_t i;
+
+	for (i = 0; i < vdev->mem->nregions; i++) {
+		uint64_t start, end, len;
+		region = &vdev->mem->regions[i];
+		start = FLOOR_2MB(region->mmap_addr);
+		end = CEIL_2MB(region->mmap_addr + region->mmap_size);
+		len = end - start;
+
+		if (spdk_vtophys((void *) start) == SPDK_VTOPHYS_ERROR) {
+			continue; /* region has not been registered */
+		}
+
+		if (spdk_mem_unregister((void *)start, len) != 0) {
+			assert(false);
+		}
+	}
+
+}
+
+static void
+spdk_vhost_free_reactor(uint32_t lcore)
+{
+	g_num_ctrlrs[lcore]--;
+}
+
+struct spdk_vhost_dev *
+spdk_vhost_dev_find(const char *ctrlr_name)
+{
+	struct spdk_vhost_dev *vdev;
+	size_t dev_dirname_len = strlen(dev_dirname);
+
+	if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) {
+		ctrlr_name += dev_dirname_len;
+	}
+
+	TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+		if (strcmp(vdev->name, ctrlr_name) == 0) {
+			return vdev;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
+{
+	int rc;
+
+	if (cpumask == NULL) {
+		return -1;
+	}
+
+	if (mask == NULL) {
+		spdk_cpuset_copy(cpumask, spdk_app_get_core_mask());
+		return 0;
+	}
+
+	rc = spdk_app_parse_core_mask(mask, cpumask);
+	if (rc < 0) {
+		SPDK_ERRLOG("invalid cpumask %s\n", mask);
+		return -1;
+	}
+
+	if (spdk_cpuset_count(cpumask) == 0) {
+		SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n",
+			    spdk_cpuset_fmt(spdk_app_get_core_mask()));
+		return -1;
+	}
+
+	return 0;
+}
+
+static void *
+_start_rte_driver(void *arg)
+{
+	char *path = arg;
+
+	if (rte_vhost_driver_start(path) != 0) {
+		return NULL;
+	}
+
+	return path;
+}
+
+int
+spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
+			const struct spdk_vhost_dev_backend *backend)
+{
+	static unsigned ctrlr_num;
+	char path[PATH_MAX];
+	struct stat file_stat;
+	struct spdk_cpuset *cpumask;
+	int rc;
+
+	assert(vdev);
+
+	/* We expect devices inside g_spdk_vhost_devices to be sorted in ascending
+	 * order in regard of vdev->id. For now we always set vdev->id = ctrlr_num++
+	 * and append each vdev to the very end of g_spdk_vhost_devices list.
+	 * This is required for foreach vhost events to work.
+	 */
+	if (ctrlr_num == UINT_MAX) {
+		assert(false);
+		return -EINVAL;
+	}
+
+	if (name == NULL) {
+		SPDK_ERRLOG("Can't register controller with no name\n");
+		return -EINVAL;
+	}
+
+	cpumask = spdk_cpuset_alloc();
+	if (!cpumask) {
+		SPDK_ERRLOG("spdk_cpuset_alloc failed\n");
+		return -ENOMEM;
+	}
+
+	if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) {
+		SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n",
+			    mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask()));
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (spdk_vhost_dev_find(name)) {
+		SPDK_ERRLOG("vhost controller %s already exists.\n", name);
+		rc = -EEXIST;
+		goto out;
+	}
+
+	if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) {
+		SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname,
+			    name);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Register vhost driver to handle vhost messages. */
+	if (stat(path, &file_stat) != -1) {
+		if (!S_ISSOCK(file_stat.st_mode)) {
+			SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
+				    "The file already exists and is not a socket.\n",
+				    path);
+			rc = -EIO;
+			goto out;
+		} else if (unlink(path) != 0) {
+			SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
+				    "The socket already exists and failed to unlink.\n",
+				    path);
+			rc = -EIO;
+			goto out;
+		}
+	}
+
+	if (rte_vhost_driver_register(path, 0) != 0) {
+		SPDK_ERRLOG("Could not register controller %s with vhost library\n", name);
+		SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
+		rc = -EIO;
+		goto out;
+	}
+	if (rte_vhost_driver_set_features(path, backend->virtio_features) ||
+	    rte_vhost_driver_disable_features(path, backend->disabled_features)) {
+		SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name);
+
+		rte_vhost_driver_unregister(path);
+		rc = -EIO;
+		goto out;
+	}
+
+	if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
+		rte_vhost_driver_unregister(path);
+		SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name);
+		rc = -EIO;
+		goto out;
+	}
+
+	/* The following might start a POSIX thread that polls for incoming
+	 * socket connections and calls backend->start/stop_device. These backend
+	 * callbacks are also protected by the global SPDK vhost mutex, so we're
+	 * safe with not initializing the vdev just yet.
+	 */
+	if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) {
+		SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
+			    name, errno, spdk_strerror(errno));
+		rte_vhost_driver_unregister(path);
+		rc = -EIO;
+		goto out;
+	}
+
+	vdev->name = strdup(name);
+	vdev->path = strdup(path);
+	vdev->id = ctrlr_num++;
+	vdev->vid = -1;
+	vdev->lcore = -1;
+	vdev->cpumask = cpumask;
+	vdev->registered = true;
+	vdev->backend = backend;
+
+	spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
+				  SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
+	vdev->next_stats_check_time = 0;
+	vdev->stats_check_interval = SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS * spdk_get_ticks_hz() /
+				     1000UL;
+
+	TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq);
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name);
+	return 0;
+
+out:
+	spdk_cpuset_free(cpumask);
+	return rc;
+}
+
+int
+spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev)
+{
+	if (vdev->vid != -1) {
+		SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
+		return -EBUSY;
+	}
+
+	if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) {
+		SPDK_ERRLOG("Could not unregister controller %s with vhost library\n"
+			    "Check if domain socket %s still exists\n",
+			    vdev->name, vdev->path);
+		return -EIO;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name);
+
+	free(vdev->name);
+	free(vdev->path);
+	spdk_cpuset_free(vdev->cpumask);
+	TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq);
+	return 0;
+}
+
+static struct spdk_vhost_dev *
+spdk_vhost_dev_next(unsigned i)
+{
+	struct spdk_vhost_dev *vdev;
+
+	TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+		if (vdev->id > i) {
+			return vdev;
+		}
+	}
+
+	return NULL;
+}
+
+const char *
+spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev)
+{
+	assert(vdev != NULL);
+	return vdev->name;
+}
+
+const struct spdk_cpuset *
+spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev)
+{
+	assert(vdev != NULL);
+	return vdev->cpumask;
+}
+
+static uint32_t
+spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask)
+{
+	uint32_t i, selected_core;
+	uint32_t min_ctrlrs;
+
+	min_ctrlrs = INT_MAX;
+	selected_core = spdk_env_get_first_core();
+
+	SPDK_ENV_FOREACH_CORE(i) {
+		if (!spdk_cpuset_get_cpu(cpumask, i)) {
+			continue;
+		}
+
+		if (g_num_ctrlrs[i] < min_ctrlrs) {
+			selected_core = i;
+			min_ctrlrs = g_num_ctrlrs[i];
+		}
+	}
+
+	g_num_ctrlrs[selected_core]++;
+	return selected_core;
+}
+
+void
+spdk_vhost_dev_backend_event_done(void *event_ctx, int response)
+{
+	struct spdk_vhost_dev_event_ctx *ctx = event_ctx;
+
+	ctx->response = response;
+	sem_post(&ctx->sem);
+}
+
+static void
+spdk_vhost_event_cb(void *arg1, void *arg2)
+{
+	struct spdk_vhost_dev_event_ctx *ctx = arg1;
+
+	ctx->cb_fn(ctx->vdev, ctx);
+}
+
+static void
+spdk_vhost_event_async_fn(void *arg1, void *arg2)
+{
+	struct spdk_vhost_dev_event_ctx *ctx = arg1;
+	struct spdk_vhost_dev *vdev;
+	struct spdk_event *ev;
+
+	if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) {
+		ev = spdk_event_allocate(spdk_env_get_current_core(), spdk_vhost_event_async_fn, arg1, arg2);
+		spdk_event_call(ev);
+		return;
+	}
+
+	vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id);
+	if (vdev != ctx->vdev) {
+		/* vdev has been changed after enqueuing this event */
+		vdev = NULL;
+	}
+
+	if (vdev != NULL && vdev->lcore >= 0 &&
+	    (uint32_t)vdev->lcore != spdk_env_get_current_core()) {
+		/* if vdev has been relocated to other core, it is no longer thread-safe
+		 * to access its contents here. Even though we're running under global vhost
+		 * mutex, the controller itself (and its pollers) are not. We need to chase
+		 * the vdev thread as many times as necessary.
+		 */
+		ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_async_fn, arg1, arg2);
+		spdk_event_call(ev);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	ctx->cb_fn(vdev, arg2);
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+
+	free(ctx);
+}
+
+static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev,
+		spdk_vhost_event_fn fn, void *arg);
+
+static void
+spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
+{
+	struct spdk_vhost_dev_event_ctx *ctx = arg1;
+	struct spdk_vhost_dev *vdev;
+	struct spdk_event *ev;
+
+	if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) {
+		ev = spdk_event_allocate(spdk_env_get_current_core(),
+					 spdk_vhost_event_async_foreach_fn, arg1, arg2);
+		spdk_event_call(ev);
+		return;
+	}
+
+	vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id);
+	if (vdev != ctx->vdev) {
+		/* ctx->vdev is probably a dangling pointer at this point.
+		 * It must have been removed in the meantime, so we just skip
+		 * it in our foreach chain. */
+		goto out_unlock_continue;
+	}
+
+	/* the assert is just for static analyzers, vdev cannot be NULL here */
+	assert(vdev != NULL);
+	if (vdev->lcore >= 0 &&
+	    (uint32_t)vdev->lcore != spdk_env_get_current_core()) {
+		/* if vdev has been relocated to other core, it is no longer thread-safe
+		 * to access its contents here. Even though we're running under global vhost
+		 * mutex, the controller itself (and its pollers) are not. We need to chase
+		 * the vdev thread as many times as necessary.
+		 */
+		ev = spdk_event_allocate(vdev->lcore,
+					 spdk_vhost_event_async_foreach_fn, arg1, arg2);
+		spdk_event_call(ev);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	ctx->cb_fn(vdev, arg2);
+
+out_unlock_continue:
+	vdev = spdk_vhost_dev_next(ctx->vdev_id);
+	spdk_vhost_external_event_foreach_continue(vdev, ctx->cb_fn, arg2);
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+
+	free(ctx);
+}
+
+static int
+_spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
+		       unsigned timeout_sec, const char *errmsg)
+{
+	struct spdk_vhost_dev_event_ctx ev_ctx = {0};
+	struct spdk_event *ev;
+	struct timespec timeout;
+	int rc;
+
+	rc = sem_init(&ev_ctx.sem, 0, 0);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n");
+		return -errno;
+	}
+
+	ev_ctx.vdev = vdev;
+	ev_ctx.cb_fn = cb_fn;
+	ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_cb, &ev_ctx, NULL);
+	assert(ev);
+	spdk_event_call(ev);
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+
+	clock_gettime(CLOCK_REALTIME, &timeout);
+	timeout.tv_sec += timeout_sec;
+
+	rc = sem_timedwait(&ev_ctx.sem, &timeout);
+	if (rc != 0) {
+		SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
+		sem_wait(&ev_ctx.sem);
+	}
+
+	sem_destroy(&ev_ctx.sem);
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	return ev_ctx.response;
+}
+
+static int
+spdk_vhost_event_async_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn, void *arg,
+			    bool foreach)
+{
+	struct spdk_vhost_dev_event_ctx *ev_ctx;
+	struct spdk_event *ev;
+	spdk_event_fn fn;
+
+	ev_ctx = calloc(1, sizeof(*ev_ctx));
+	if (ev_ctx == NULL) {
+		SPDK_ERRLOG("Failed to alloc vhost event.\n");
+		assert(false);
+		return -ENOMEM;
+	}
+
+	ev_ctx->vdev = vdev;
+	ev_ctx->vdev_id = vdev->id;
+	ev_ctx->cb_fn = cb_fn;
+
+	fn = foreach ? spdk_vhost_event_async_foreach_fn : spdk_vhost_event_async_fn;
+	ev = spdk_event_allocate(ev_ctx->vdev->lcore, fn, ev_ctx, arg);
+	assert(ev);
+	spdk_event_call(ev);
+
+	return 0;
+}
+
+static void
+stop_device(int vid)
+{
+	struct spdk_vhost_dev *vdev;
+	struct rte_vhost_vring *q;
+	int rc;
+	uint16_t i;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = spdk_vhost_dev_find_by_vid(vid);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Couldn't find device with vid %d to stop.\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	if (vdev->lcore == -1) {
+		SPDK_ERRLOG("Controller %s is not loaded.\n", vdev->name);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	rc = _spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device");
+	if (rc != 0) {
+		SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	for (i = 0; i < vdev->max_queues; i++) {
+		q = &vdev->virtqueue[i].vring;
+		if (q->desc == NULL) {
+			continue;
+		}
+		rte_vhost_set_vhost_vring_last_idx(vdev->vid, i, q->last_avail_idx, q->last_used_idx);
+	}
+
+	spdk_vhost_dev_mem_unregister(vdev);
+	free(vdev->mem);
+	spdk_vhost_free_reactor(vdev->lcore);
+	vdev->lcore = -1;
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+}
+
+static int
+start_device(int vid)
+{
+	struct spdk_vhost_dev *vdev;
+	int rc = -1;
+	uint16_t i;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+
+	vdev = spdk_vhost_dev_find_by_vid(vid);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid);
+		goto out;
+	}
+
+	if (vdev->lcore != -1) {
+		SPDK_ERRLOG("Controller %s already loaded.\n", vdev->name);
+		goto out;
+	}
+
+	vdev->max_queues = 0;
+	memset(vdev->virtqueue, 0, sizeof(vdev->virtqueue));
+	for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) {
+		if (rte_vhost_get_vhost_vring(vid, i, &vdev->virtqueue[i].vring)) {
+			continue;
+		}
+
+		if (vdev->virtqueue[i].vring.desc == NULL ||
+		    vdev->virtqueue[i].vring.size == 0) {
+			continue;
+		}
+
+		/* Disable notifications. */
+		if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) {
+			SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i);
+			goto out;
+		}
+
+		vdev->max_queues = i + 1;
+	}
+
+	if (rte_vhost_get_negotiated_features(vid, &vdev->negotiated_features) != 0) {
+		SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
+		goto out;
+	}
+
+	if (rte_vhost_get_mem_table(vid, &vdev->mem) != 0) {
+		SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
+		goto out;
+	}
+
+	/*
+	 * Not sure right now but this look like some kind of QEMU bug and guest IO
+	 * might be frozed without kicking all queues after live-migration. This look like
+	 * the previous vhost instance failed to effectively deliver all interrupts before
+	 * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts
+	 * should be ignored by guest virtio driver.
+	 *
+	 * Tested on QEMU 2.10.91 and 2.11.50.
+	 */
+	for (i = 0; i < vdev->max_queues; i++) {
+		if (vdev->virtqueue[i].vring.callfd != -1) {
+			eventfd_write(vdev->virtqueue[i].vring.callfd, (eventfd_t)1);
+		}
+	}
+
+	vdev->lcore = spdk_vhost_allocate_reactor(vdev->cpumask);
+	spdk_vhost_dev_mem_register(vdev);
+	rc = _spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device");
+	if (rc != 0) {
+		spdk_vhost_dev_mem_unregister(vdev);
+		free(vdev->mem);
+		spdk_vhost_free_reactor(vdev->lcore);
+		vdev->lcore = -1;
+	}
+
+out:
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+	return rc;
+}
+
+static int
+get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct spdk_vhost_dev *vdev;
+	int rc = -1;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = spdk_vhost_dev_find_by_vid(vid);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid);
+		goto out;
+	}
+
+	if (vdev->backend->vhost_get_config) {
+		rc = vdev->backend->vhost_get_config(vdev, config, len);
+	}
+
+out:
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+	return rc;
+}
+
+static int
+set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags)
+{
+	struct spdk_vhost_dev *vdev;
+	int rc = -1;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = spdk_vhost_dev_find_by_vid(vid);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Controller with vid %d doesn't exist.\n", vid);
+		goto out;
+	}
+
+	if (vdev->backend->vhost_set_config) {
+		rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags);
+	}
+
+out:
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+	return rc;
+}
+
+int
+spdk_vhost_set_socket_path(const char *basename)
+{
+	int ret;
+
+	if (basename && strlen(basename) > 0) {
+		ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename);
+		if (ret <= 0) {
+			return -EINVAL;
+		}
+		if ((size_t)ret >= sizeof(dev_dirname) - 2) {
+			SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
+			return -EINVAL;
+		}
+
+		if (dev_dirname[ret - 1] != '/') {
+			dev_dirname[ret] = '/';
+			dev_dirname[ret + 1]  = '\0';
+		}
+	}
+
+	return 0;
+}
+
+static void *
+session_shutdown(void *arg)
+{
+	struct spdk_vhost_dev *vdev = NULL;
+
+	TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+		rte_vhost_driver_unregister(vdev->path);
+		vdev->registered = false;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n");
+	spdk_event_call((struct spdk_event *)arg);
+	return NULL;
+}
+
+void
+spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	assert(vdev->backend->dump_info_json != NULL);
+	vdev->backend->dump_info_json(vdev, w);
+}
+
+int
+spdk_vhost_dev_remove(struct spdk_vhost_dev *vdev)
+{
+	return vdev->backend->remove_device(vdev);
+}
+
+static int
+new_connection(int vid)
+{
+	struct spdk_vhost_dev *vdev;
+	char ifname[PATH_MAX];
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
+		SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return -1;
+	}
+
+	vdev = spdk_vhost_dev_find(ifname);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return -1;
+	}
+
+	/* since pollers are not running it safe not to use spdk_event here */
+	if (vdev->vid != -1) {
+		SPDK_ERRLOG("Device with vid %d is already connected.\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return -1;
+	}
+
+	vdev->vid = vid;
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+	return 0;
+}
+
+static void
+destroy_connection(int vid)
+{
+	struct spdk_vhost_dev *vdev;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = spdk_vhost_dev_find_by_vid(vid);
+	if (vdev == NULL) {
+		SPDK_ERRLOG("Couldn't find device with vid %d to destroy connection for.\n", vid);
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		return;
+	}
+
+	/* since pollers are not running it safe not to use spdk_event here */
+	vdev->vid = -1;
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+}
+
+void
+spdk_vhost_call_external_event(const char *ctrlr_name, spdk_vhost_event_fn fn, void *arg)
+{
+	struct spdk_vhost_dev *vdev;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = spdk_vhost_dev_find(ctrlr_name);
+
+	if (vdev == NULL) {
+		pthread_mutex_unlock(&g_spdk_vhost_mutex);
+		fn(NULL, arg);
+		return;
+	}
+
+	if (vdev->lcore == -1) {
+		fn(vdev, arg);
+	} else {
+		spdk_vhost_event_async_send(vdev, fn, arg, false);
+	}
+
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+}
+
+static void
+spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev,
+		spdk_vhost_event_fn fn, void *arg)
+{
+	if (vdev == NULL) {
+		fn(NULL, arg);
+		return;
+	}
+
+	while (vdev->lcore == -1) {
+		fn(vdev, arg);
+		vdev = spdk_vhost_dev_next(vdev->id);
+		if (vdev == NULL) {
+			fn(NULL, arg);
+			return;
+		}
+	}
+
+	spdk_vhost_event_async_send(vdev, fn, arg, true);
+}
+
+void
+spdk_vhost_call_external_event_foreach(spdk_vhost_event_fn fn, void *arg)
+{
+	struct spdk_vhost_dev *vdev;
+
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+	vdev = TAILQ_FIRST(&g_spdk_vhost_devices);
+	spdk_vhost_external_event_foreach_continue(vdev, fn, arg);
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+}
+
+void
+spdk_vhost_lock(void)
+{
+	pthread_mutex_lock(&g_spdk_vhost_mutex);
+}
+
+void
+spdk_vhost_unlock(void)
+{
+	pthread_mutex_unlock(&g_spdk_vhost_mutex);
+}
+
+int
+spdk_vhost_init(void)
+{
+	uint32_t last_core;
+	size_t len;
+	int ret;
+
+	if (dev_dirname[0] == '\0') {
+		if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) {
+			SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
+			return -1;
+		}
+
+		len = strlen(dev_dirname);
+		if (dev_dirname[len - 1] != '/') {
+			dev_dirname[len] = '/';
+			dev_dirname[len + 1] = '\0';
+		}
+	}
+
+	last_core = spdk_env_get_last_core();
+	g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t));
+	if (!g_num_ctrlrs) {
+		SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n",
+			    last_core + 1);
+		return -1;
+	}
+
+	ret = spdk_vhost_scsi_controller_construct();
+	if (ret != 0) {
+		SPDK_ERRLOG("Cannot construct vhost controllers\n");
+		return -1;
+	}
+
+	ret = spdk_vhost_blk_controller_construct();
+	if (ret != 0) {
+		SPDK_ERRLOG("Cannot construct vhost block controllers\n");
+		return -1;
+	}
+
+	ret = spdk_vhost_nvme_controller_construct();
+	if (ret != 0) {
+		SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+_spdk_vhost_fini_remove_vdev_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	spdk_vhost_fini_cb fini_cb = arg;
+
+	if (vdev != NULL) {
+		spdk_vhost_dev_remove(vdev);
+		return 0;
+	}
+
+	/* All devices are removed now. */
+	free(g_num_ctrlrs);
+	fini_cb();
+	return 0;
+}
+
+static void
+_spdk_vhost_fini(void *arg1, void *arg2)
+{
+	spdk_vhost_fini_cb fini_cb = arg1;
+
+	spdk_vhost_call_external_event_foreach(_spdk_vhost_fini_remove_vdev_cb, fini_cb);
+}
+
+void
+spdk_vhost_fini(spdk_vhost_fini_cb fini_cb)
+{
+	pthread_t tid;
+	int rc;
+	struct spdk_event *fini_ev;
+
+	fini_ev = spdk_event_allocate(spdk_env_get_current_core(), _spdk_vhost_fini, fini_cb, NULL);
+
+	/* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
+	 * ops for stopping a device or removing a connection, we need to call it from
+	 * a separate thread to avoid deadlock.
+	 */
+	rc = pthread_create(&tid, NULL, &session_shutdown, fini_ev);
+	if (rc < 0) {
+		SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc));
+		abort();
+	}
+	pthread_detach(tid);
+}
+
+struct spdk_vhost_write_config_json_ctx {
+	struct spdk_json_write_ctx *w;
+	struct spdk_event *done_ev;
+};
+
+static int
+spdk_vhost_config_json_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct spdk_vhost_write_config_json_ctx *ctx = arg;
+	uint32_t delay_base_us;
+	uint32_t iops_threshold;
+
+	if (vdev == NULL) {
+		spdk_json_write_array_end(ctx->w);
+		spdk_event_call(ctx->done_ev);
+		free(ctx);
+		return 0;
+	}
+
+	vdev->backend->write_config_json(vdev, ctx->w);
+
+	spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold);
+	if (delay_base_us) {
+		spdk_json_write_object_begin(ctx->w);
+		spdk_json_write_named_string(ctx->w, "method", "set_vhost_controller_coalescing");
+
+		spdk_json_write_named_object_begin(ctx->w, "params");
+		spdk_json_write_named_string(ctx->w, "ctrlr", vdev->name);
+		spdk_json_write_named_uint32(ctx->w, "delay_base_us", delay_base_us);
+		spdk_json_write_named_uint32(ctx->w, "iops_threshold", iops_threshold);
+		spdk_json_write_object_end(ctx->w);
+
+		spdk_json_write_object_end(ctx->w);
+	}
+
+	return 0;
+}
+
+void
+spdk_vhost_config_json(struct spdk_json_write_ctx *w, struct spdk_event *done_ev)
+{
+	struct spdk_vhost_write_config_json_ctx *ctx;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx) {
+		spdk_event_call(done_ev);
+		return;
+	}
+
+	ctx->w = w;
+	ctx->done_ev = done_ev;
+
+	spdk_json_write_array_begin(w);
+
+	spdk_vhost_call_external_event_foreach(spdk_vhost_config_json_cb, ctx);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vhost", SPDK_LOG_VHOST)
+SPDK_LOG_REGISTER_COMPONENT("vhost_ring", SPDK_LOG_VHOST_RING)
diff --git a/src/spdk/lib/vhost/vhost_blk.c b/src/spdk/lib/vhost/vhost_blk.c
new file mode 100644
index 00000000..6a9a1896
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost_blk.c
@@ -0,0 +1,901 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/virtio_blk.h>
+
+#include "spdk/env.h"
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/vhost.h"
+
+#include "vhost_internal.h"
+
+struct spdk_vhost_blk_task {
+	struct spdk_bdev_io *bdev_io;
+	struct spdk_vhost_blk_dev *bvdev;
+	struct spdk_vhost_virtqueue *vq;
+
+	volatile uint8_t *status;
+
+	uint16_t req_idx;
+
+	/* for io wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+
+	/* If set, the task is currently used for I/O processing. */
+	bool used;
+
+	/** Number of bytes that were written. */
+	uint32_t used_len;
+	uint16_t iovcnt;
+	struct iovec iovs[SPDK_VHOST_IOVS_MAX];
+};
+
+struct spdk_vhost_blk_dev {
+	struct spdk_vhost_dev vdev;
+	struct spdk_bdev *bdev;
+	struct spdk_bdev_desc *bdev_desc;
+	struct spdk_io_channel *bdev_io_channel;
+	struct spdk_poller *requestq_poller;
+	struct spdk_vhost_dev_destroy_ctx destroy_ctx;
+	bool readonly;
+};
+
+/* forward declaration */
+static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
+
+static int
+process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev,
+		    struct spdk_vhost_virtqueue *vq);
+
+static void
+blk_task_finish(struct spdk_vhost_blk_task *task)
+{
+	assert(task->bvdev->vdev.task_cnt > 0);
+	task->bvdev->vdev.task_cnt--;
+	task->used = false;
+}
+
+static void
+invalid_blk_request(struct spdk_vhost_blk_task *task, uint8_t status)
+{
+	if (task->status) {
+		*task->status = status;
+	}
+
+	spdk_vhost_vq_used_ring_enqueue(&task->bvdev->vdev, task->vq, task->req_idx,
+					task->used_len);
+	blk_task_finish(task);
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Invalid request (status=%" PRIu8")\n", status);
+}
+
+/*
+ * Process task's descriptor chain and setup data related fields.
+ * Return
+ *   total size of suplied buffers
+ *
+ *   FIXME: Make this function return to rd_cnt and wr_cnt
+ */
+static int
+blk_iovs_setup(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
+	       struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
+{
+	struct vring_desc *desc, *desc_table;
+	uint16_t out_cnt = 0, cnt = 0;
+	uint32_t desc_table_size, len = 0;
+	int rc;
+
+	rc = spdk_vhost_vq_get_desc(vdev, vq, req_idx, &desc, &desc_table, &desc_table_size);
+	if (rc != 0) {
+		SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
+		return -1;
+	}
+
+	while (1) {
+		/*
+		 * Maximum cnt reached?
+		 * Should not happen if request is well formatted, otherwise this is a BUG.
+		 */
+		if (spdk_unlikely(cnt == *iovs_cnt)) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Max IOVs in request reached (req_idx = %"PRIu16").\n",
+				      req_idx);
+			return -1;
+		}
+
+		if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &cnt, desc))) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
+				      req_idx, cnt);
+			return -1;
+		}
+
+		len += desc->len;
+
+		out_cnt += spdk_vhost_vring_desc_is_wr(desc);
+
+		rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
+		if (rc != 0) {
+			SPDK_ERRLOG("%s: Descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
+				    vdev->name, req_idx);
+			return -1;
+		} else if (desc == NULL) {
+			break;
+		}
+	}
+
+	/*
+	 * There must be least two descriptors.
+	 * First contain request so it must be readable.
+	 * Last descriptor contain buffer for response so it must be writable.
+	 */
+	if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
+		return -1;
+	}
+
+	*length = len;
+	*iovs_cnt = cnt;
+	return 0;
+}
+
+static void
+blk_request_finish(bool success, struct spdk_vhost_blk_task *task)
+{
+	*task->status = success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR;
+	spdk_vhost_vq_used_ring_enqueue(&task->bvdev->vdev, task->vq, task->req_idx,
+					task->used_len);
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Finished task (%p) req_idx=%d\n status: %s\n", task,
+		      task->req_idx, success ? "OK" : "FAIL");
+	blk_task_finish(task);
+}
+
+static void
+blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_vhost_blk_task *task = cb_arg;
+
+	spdk_bdev_free_io(bdev_io);
+	blk_request_finish(success, task);
+}
+
+static void
+blk_request_resubmit(void *arg)
+{
+	struct spdk_vhost_blk_task *task = (struct spdk_vhost_blk_task *)arg;
+	int rc = 0;
+
+	rc = process_blk_request(task, task->bvdev, task->vq);
+	if (rc == 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p resubmitted ======\n", task);
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p failed ======\n", task);
+	}
+}
+
+static inline void
+blk_request_queue_io(struct spdk_vhost_blk_task *task)
+{
+	int rc;
+	struct spdk_vhost_blk_dev *bvdev = task->bvdev;
+	struct spdk_bdev *bdev = bvdev->bdev;
+
+	task->bdev_io_wait.bdev = bdev;
+	task->bdev_io_wait.cb_fn = blk_request_resubmit;
+	task->bdev_io_wait.cb_arg = task;
+
+	rc = spdk_bdev_queue_io_wait(bdev, bvdev->bdev_io_channel, &task->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in vhost_blk, rc=%d\n", rc);
+		invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
+	}
+}
+
+static int
+process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev *bvdev,
+		    struct spdk_vhost_virtqueue *vq)
+{
+	const struct virtio_blk_outhdr *req;
+	struct iovec *iov;
+	uint32_t type;
+	uint32_t payload_len;
+	int rc;
+
+	if (blk_iovs_setup(&bvdev->vdev, vq, task->req_idx, task->iovs, &task->iovcnt, &payload_len)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
+		/* Only READ and WRITE are supported for now. */
+		invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+		return -1;
+	}
+
+	iov = &task->iovs[0];
+	if (spdk_unlikely(iov->iov_len != sizeof(*req))) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK,
+			      "First descriptor size is %zu but expected %zu (req_idx = %"PRIu16").\n",
+			      iov->iov_len, sizeof(*req), task->req_idx);
+		invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+		return -1;
+	}
+
+	req = iov->iov_base;
+
+	iov = &task->iovs[task->iovcnt - 1];
+	if (spdk_unlikely(iov->iov_len != 1)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK,
+			      "Last descriptor size is %zu but expected %d (req_idx = %"PRIu16").\n",
+			      iov->iov_len, 1, task->req_idx);
+		invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+		return -1;
+	}
+
+	task->status = iov->iov_base;
+	payload_len -= sizeof(*req) + sizeof(*task->status);
+	task->iovcnt -= 2;
+
+	type = req->type;
+#ifdef VIRTIO_BLK_T_BARRIER
+	/* Don't care about barier for now (as QEMU's virtio-blk do). */
+	type &= ~VIRTIO_BLK_T_BARRIER;
+#endif
+
+	switch (type) {
+	case VIRTIO_BLK_T_IN:
+	case VIRTIO_BLK_T_OUT:
+		if (spdk_unlikely((payload_len & (512 - 1)) != 0)) {
+			SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (req_idx = %"PRIu16").\n",
+				    type ? "WRITE" : "READ", task->req_idx);
+			invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+			return -1;
+		}
+
+		if (type == VIRTIO_BLK_T_IN) {
+			task->used_len = payload_len + sizeof(*task->status);
+			rc = spdk_bdev_readv(bvdev->bdev_desc, bvdev->bdev_io_channel,
+					     &task->iovs[1], task->iovcnt, req->sector * 512,
+					     payload_len, blk_request_complete_cb, task);
+		} else if (!bvdev->readonly) {
+			task->used_len = sizeof(*task->status);
+			rc = spdk_bdev_writev(bvdev->bdev_desc, bvdev->bdev_io_channel,
+					      &task->iovs[1], task->iovcnt, req->sector * 512,
+					      payload_len, blk_request_complete_cb, task);
+		} else {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Device is in read-only mode!\n");
+			rc = -1;
+		}
+
+		if (rc) {
+			if (rc == -ENOMEM) {
+				SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n");
+				blk_request_queue_io(task);
+			} else {
+				invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
+				return -1;
+			}
+		}
+		break;
+	case VIRTIO_BLK_T_GET_ID:
+		if (!task->iovcnt || !payload_len) {
+			invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+			return -1;
+		}
+		task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
+		spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_product_name(bvdev->bdev),
+				task->used_len, ' ');
+		blk_request_finish(true, task);
+		break;
+	default:
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "Not supported request type '%"PRIu32"'.\n", type);
+		invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq)
+{
+	struct spdk_vhost_blk_task *task;
+	int rc;
+	uint16_t reqs[32];
+	uint16_t reqs_cnt, i;
+
+	reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
+	if (!reqs_cnt) {
+		return;
+	}
+
+	for (i = 0; i < reqs_cnt; i++) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Starting processing request idx %"PRIu16"======\n",
+			      reqs[i]);
+
+		if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
+			SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
+				    bvdev->vdev.name, reqs[i], vq->vring.size);
+			spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		task = &((struct spdk_vhost_blk_task *)vq->tasks)[reqs[i]];
+		if (spdk_unlikely(task->used)) {
+			SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
+				    bvdev->vdev.name, reqs[i]);
+			spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		bvdev->vdev.task_cnt++;
+
+		task->used = true;
+		task->iovcnt = SPDK_COUNTOF(task->iovs);
+		task->status = NULL;
+		task->used_len = 0;
+
+		rc = process_blk_request(task, bvdev, vq);
+		if (rc == 0) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d submitted ======\n", task,
+				      reqs[i]);
+		} else {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "====== Task %p req_idx %d failed ======\n", task, reqs[i]);
+		}
+	}
+}
+
+static int
+vdev_worker(void *arg)
+{
+	struct spdk_vhost_blk_dev *bvdev = arg;
+	uint16_t q_idx;
+
+	for (q_idx = 0; q_idx < bvdev->vdev.max_queues; q_idx++) {
+		process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]);
+	}
+
+	spdk_vhost_dev_used_signal(&bvdev->vdev);
+
+	return -1;
+}
+
+static void
+no_bdev_process_vq(struct spdk_vhost_blk_dev *bvdev, struct spdk_vhost_virtqueue *vq)
+{
+	struct iovec iovs[SPDK_VHOST_IOVS_MAX];
+	uint32_t length;
+	uint16_t iovcnt, req_idx;
+
+	if (spdk_vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
+		return;
+	}
+
+	iovcnt = SPDK_COUNTOF(iovs);
+	if (blk_iovs_setup(&bvdev->vdev, vq, req_idx, iovs, &iovcnt, &length) == 0) {
+		*(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK_DATA, "Aborting request %" PRIu16"\n", req_idx);
+	}
+
+	spdk_vhost_vq_used_ring_enqueue(&bvdev->vdev, vq, req_idx, 0);
+}
+
+static int
+no_bdev_vdev_worker(void *arg)
+{
+	struct spdk_vhost_blk_dev *bvdev = arg;
+	uint16_t q_idx;
+
+	for (q_idx = 0; q_idx < bvdev->vdev.max_queues; q_idx++) {
+		no_bdev_process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]);
+	}
+
+	spdk_vhost_dev_used_signal(&bvdev->vdev);
+
+	if (bvdev->vdev.task_cnt == 0 && bvdev->bdev_io_channel) {
+		spdk_put_io_channel(bvdev->bdev_io_channel);
+		bvdev->bdev_io_channel = NULL;
+	}
+
+	return -1;
+}
+
+static struct spdk_vhost_blk_dev *
+to_blk_dev(struct spdk_vhost_dev *vdev)
+{
+	if (vdev == NULL) {
+		return NULL;
+	}
+
+	if (vdev->backend != &vhost_blk_device_backend) {
+		SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
+		return NULL;
+	}
+
+	return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
+}
+
+struct spdk_bdev *
+spdk_vhost_blk_get_dev(struct spdk_vhost_dev *vdev)
+{
+	struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
+
+	assert(bvdev != NULL);
+	return bvdev->bdev;
+}
+
+static int
+_bdev_remove_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct spdk_vhost_blk_dev *bvdev = arg;
+
+	SPDK_WARNLOG("Controller %s: Hot-removing bdev - all further requests will fail.\n",
+		     bvdev->vdev.name);
+	if (bvdev->requestq_poller) {
+		spdk_poller_unregister(&bvdev->requestq_poller);
+		bvdev->requestq_poller = spdk_poller_register(no_bdev_vdev_worker, bvdev, 0);
+	}
+
+	spdk_bdev_close(bvdev->bdev_desc);
+	bvdev->bdev_desc = NULL;
+	bvdev->bdev = NULL;
+	return 0;
+}
+
+static void
+bdev_remove_cb(void *remove_ctx)
+{
+	struct spdk_vhost_blk_dev *bvdev = remove_ctx;
+
+	spdk_vhost_call_external_event(bvdev->vdev.name, _bdev_remove_cb, bvdev);
+}
+
+static void
+free_task_pool(struct spdk_vhost_blk_dev *bvdev)
+{
+	struct spdk_vhost_virtqueue *vq;
+	uint16_t i;
+
+	for (i = 0; i < bvdev->vdev.max_queues; i++) {
+		vq = &bvdev->vdev.virtqueue[i];
+		if (vq->tasks == NULL) {
+			continue;
+		}
+
+		spdk_dma_free(vq->tasks);
+		vq->tasks = NULL;
+	}
+}
+
+static int
+alloc_task_pool(struct spdk_vhost_blk_dev *bvdev)
+{
+	struct spdk_vhost_virtqueue *vq;
+	struct spdk_vhost_blk_task *task;
+	uint32_t task_cnt;
+	uint16_t i;
+	uint32_t j;
+
+	for (i = 0; i < bvdev->vdev.max_queues; i++) {
+		vq = &bvdev->vdev.virtqueue[i];
+		if (vq->vring.desc == NULL) {
+			continue;
+		}
+
+		task_cnt = vq->vring.size;
+		if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
+			/* sanity check */
+			SPDK_ERRLOG("Controller %s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
+				    bvdev->vdev.name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
+			free_task_pool(bvdev);
+			return -1;
+		}
+		vq->tasks = spdk_dma_zmalloc(sizeof(struct spdk_vhost_blk_task) * task_cnt,
+					     SPDK_CACHE_LINE_SIZE, NULL);
+		if (vq->tasks == NULL) {
+			SPDK_ERRLOG("Controller %s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
+				    bvdev->vdev.name, task_cnt, i);
+			free_task_pool(bvdev);
+			return -1;
+		}
+
+		for (j = 0; j < task_cnt; j++) {
+			task = &((struct spdk_vhost_blk_task *)vq->tasks)[j];
+			task->bvdev = bvdev;
+			task->req_idx = j;
+			task->vq = vq;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * A new device is added to a data core. First the device is added to the main linked list
+ * and then allocated to a specific data core.
+ *
+ */
+static int
+spdk_vhost_blk_start(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_blk_dev *bvdev;
+	int i, rc = 0;
+
+	bvdev = to_blk_dev(vdev);
+	if (bvdev == NULL) {
+		SPDK_ERRLOG("Trying to start non-blk controller as a blk one.\n");
+		rc = -1;
+		goto out;
+	}
+
+	/* validate all I/O queues are in a contiguous index range */
+	for (i = 0; i < vdev->max_queues; i++) {
+		if (vdev->virtqueue[i].vring.desc == NULL) {
+			SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vdev->name, i);
+			rc = -1;
+			goto out;
+		}
+	}
+
+	rc = alloc_task_pool(bvdev);
+	if (rc != 0) {
+		SPDK_ERRLOG("%s: failed to alloc task pool.\n", bvdev->vdev.name);
+		goto out;
+	}
+
+	if (bvdev->bdev) {
+		bvdev->bdev_io_channel = spdk_bdev_get_io_channel(bvdev->bdev_desc);
+		if (!bvdev->bdev_io_channel) {
+			free_task_pool(bvdev);
+			SPDK_ERRLOG("Controller %s: IO channel allocation failed\n", vdev->name);
+			rc = -1;
+			goto out;
+		}
+	}
+
+	bvdev->requestq_poller = spdk_poller_register(bvdev->bdev ? vdev_worker : no_bdev_vdev_worker,
+				 bvdev, 0);
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Started poller for vhost controller %s on lcore %d\n",
+		     vdev->name, vdev->lcore);
+out:
+	spdk_vhost_dev_backend_event_done(event_ctx, rc);
+	return rc;
+}
+
+static int
+destroy_device_poller_cb(void *arg)
+{
+	struct spdk_vhost_blk_dev *bvdev = arg;
+	int i;
+
+	if (bvdev->vdev.task_cnt > 0) {
+		return -1;
+	}
+
+	for (i = 0; i < bvdev->vdev.max_queues; i++) {
+		bvdev->vdev.virtqueue[i].next_event_time = 0;
+		spdk_vhost_vq_used_signal(&bvdev->vdev, &bvdev->vdev.virtqueue[i]);
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Stopping poller for vhost controller %s\n", bvdev->vdev.name);
+
+	if (bvdev->bdev_io_channel) {
+		spdk_put_io_channel(bvdev->bdev_io_channel);
+		bvdev->bdev_io_channel = NULL;
+	}
+
+	free_task_pool(bvdev);
+	spdk_poller_unregister(&bvdev->destroy_ctx.poller);
+	spdk_vhost_dev_backend_event_done(bvdev->destroy_ctx.event_ctx, 0);
+
+	return -1;
+}
+
+static int
+spdk_vhost_blk_stop(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_blk_dev *bvdev;
+
+	bvdev = to_blk_dev(vdev);
+	if (bvdev == NULL) {
+		SPDK_ERRLOG("Trying to stop non-blk controller as a blk one.\n");
+		goto err;
+	}
+
+	bvdev->destroy_ctx.event_ctx = event_ctx;
+	spdk_poller_unregister(&bvdev->requestq_poller);
+	bvdev->destroy_ctx.poller = spdk_poller_register(destroy_device_poller_cb,
+				    bvdev, 1000);
+	return 0;
+
+err:
+	spdk_vhost_dev_backend_event_done(event_ctx, -1);
+	return -1;
+}
+
+static void
+spdk_vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_bdev *bdev = spdk_vhost_blk_get_dev(vdev);
+	struct spdk_vhost_blk_dev *bvdev;
+
+	bvdev = to_blk_dev(vdev);
+	if (bvdev == NULL) {
+		return;
+	}
+
+	assert(bvdev != NULL);
+	spdk_json_write_name(w, "block");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "readonly");
+	spdk_json_write_bool(w, bvdev->readonly);
+
+	spdk_json_write_name(w, "bdev");
+	if (bdev) {
+		spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+	} else {
+		spdk_json_write_null(w);
+	}
+
+	spdk_json_write_object_end(w);
+}
+
+static void
+spdk_vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_vhost_blk_dev *bvdev;
+
+	bvdev = to_blk_dev(vdev);
+	if (bvdev == NULL) {
+		return;
+	}
+
+	if (!bvdev->bdev) {
+		return;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "construct_vhost_blk_controller");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "ctrlr", vdev->name);
+	spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
+	spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(vdev->cpumask));
+	spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static int spdk_vhost_blk_destroy(struct spdk_vhost_dev *dev);
+
+static int
+spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
+			  uint32_t len)
+{
+	struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
+	struct spdk_vhost_blk_dev *bvdev;
+	struct spdk_bdev *bdev;
+	uint32_t blk_size;
+	uint64_t blkcnt;
+
+	bvdev = to_blk_dev(vdev);
+	if (bvdev == NULL) {
+		SPDK_ERRLOG("Trying to get virito_blk configuration failed\n");
+		return -1;
+	}
+
+	if (len < sizeof(*blkcfg)) {
+		return -1;
+	}
+
+	bdev = bvdev->bdev;
+	if (bdev == NULL) {
+		/* We can't just return -1 here as this GET_CONFIG message might
+		 * be caused by a QEMU VM reboot. Returning -1 will indicate an
+		 * error to QEMU, who might then decide to terminate itself.
+		 * We don't want that. A simple reboot shouldn't break the system.
+		 *
+		 * Presenting a block device with block size 0 and block count 0
+		 * doesn't cause any problems on QEMU side and the virtio-pci
+		 * device is even still available inside the VM, but there will
+		 * be no block device created for it - the kernel drivers will
+		 * silently reject it.
+		 */
+		blk_size = 0;
+		blkcnt = 0;
+	} else {
+		blk_size = spdk_bdev_get_block_size(bdev);
+		blkcnt = spdk_bdev_get_num_blocks(bdev);
+	}
+
+	memset(blkcfg, 0, sizeof(*blkcfg));
+	blkcfg->blk_size = blk_size;
+	/* minimum I/O size in blocks */
+	blkcfg->min_io_size = 1;
+	/* expressed in 512 Bytes sectors */
+	blkcfg->capacity = (blkcnt * blk_size) / 512;
+	blkcfg->size_max = 131072;
+	/*  -2 for REQ and RESP and -1 for region boundary splitting */
+	blkcfg->seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
+	/* QEMU can overwrite this value when started */
+	blkcfg->num_queues = SPDK_VHOST_MAX_VQUEUES;
+
+	return 0;
+}
+
+static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
+	.virtio_features = SPDK_VHOST_FEATURES |
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) |
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_RO) |
+	(1ULL << VIRTIO_BLK_F_BLK_SIZE) | (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI) |
+	(1ULL << VIRTIO_BLK_F_FLUSH)    | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) |
+	(1ULL << VIRTIO_BLK_F_MQ),
+	.disabled_features = SPDK_VHOST_DISABLED_FEATURES | (1ULL << VIRTIO_BLK_F_GEOMETRY) |
+	(1ULL << VIRTIO_BLK_F_RO) | (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) |
+	(1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI),
+	.start_device =  spdk_vhost_blk_start,
+	.stop_device = spdk_vhost_blk_stop,
+	.vhost_get_config = spdk_vhost_blk_get_config,
+	.dump_info_json = spdk_vhost_blk_dump_info_json,
+	.write_config_json = spdk_vhost_blk_write_config_json,
+	.remove_device = spdk_vhost_blk_destroy,
+};
+
+int
+spdk_vhost_blk_controller_construct(void)
+{
+	struct spdk_conf_section *sp;
+	unsigned ctrlr_num;
+	char *bdev_name;
+	char *cpumask;
+	char *name;
+	bool readonly;
+
+	for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+		if (!spdk_conf_section_match_prefix(sp, "VhostBlk")) {
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VhostBlk%u", &ctrlr_num) != 1) {
+			SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+				    spdk_conf_section_get_name(sp));
+			return -1;
+		}
+
+		name = spdk_conf_section_get_val(sp, "Name");
+		if (name == NULL) {
+			SPDK_ERRLOG("VhostBlk%u: missing Name\n", ctrlr_num);
+			return -1;
+		}
+
+		cpumask = spdk_conf_section_get_val(sp, "Cpumask");
+		readonly = spdk_conf_section_get_boolval(sp, "ReadOnly", false);
+
+		bdev_name = spdk_conf_section_get_val(sp, "Dev");
+		if (bdev_name == NULL) {
+			continue;
+		}
+
+		if (spdk_vhost_blk_construct(name, cpumask, bdev_name, readonly) < 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name, bool readonly)
+{
+	struct spdk_vhost_blk_dev *bvdev = NULL;
+	struct spdk_bdev *bdev;
+	int ret = 0;
+
+	spdk_vhost_lock();
+	bdev = spdk_bdev_get_by_name(dev_name);
+	if (bdev == NULL) {
+		SPDK_ERRLOG("Controller %s: bdev '%s' not found\n",
+			    name, dev_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bvdev = spdk_dma_zmalloc(sizeof(*bvdev), SPDK_CACHE_LINE_SIZE, NULL);
+	if (bvdev == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = spdk_bdev_open(bdev, true, bdev_remove_cb, bvdev, &bvdev->bdev_desc);
+	if (ret != 0) {
+		SPDK_ERRLOG("Controller %s: could not open bdev '%s', error=%d\n",
+			    name, dev_name, ret);
+		goto out;
+	}
+
+	bvdev->bdev = bdev;
+	bvdev->readonly = readonly;
+	ret = spdk_vhost_dev_register(&bvdev->vdev, name, cpumask, &vhost_blk_device_backend);
+	if (ret != 0) {
+		spdk_bdev_close(bvdev->bdev_desc);
+		goto out;
+	}
+
+	if (readonly && rte_vhost_driver_enable_features(bvdev->vdev.path, (1ULL << VIRTIO_BLK_F_RO))) {
+		SPDK_ERRLOG("Controller %s: failed to set as a readonly\n", name);
+		spdk_bdev_close(bvdev->bdev_desc);
+
+		if (spdk_vhost_dev_unregister(&bvdev->vdev) != 0) {
+			SPDK_ERRLOG("Controller %s: failed to remove controller\n", name);
+		}
+
+		ret = -1;
+		goto out;
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: using bdev '%s'\n", name, dev_name);
+out:
+	if (ret != 0 && bvdev) {
+		spdk_dma_free(bvdev);
+	}
+	spdk_vhost_unlock();
+	return ret;
+}
+
+static int
+spdk_vhost_blk_destroy(struct spdk_vhost_dev *vdev)
+{
+	struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
+	int rc;
+
+	if (!bvdev) {
+		return -EINVAL;
+	}
+
+	rc = spdk_vhost_dev_unregister(&bvdev->vdev);
+	if (rc != 0) {
+		return rc;
+	}
+
+	if (bvdev->bdev_desc) {
+		spdk_bdev_close(bvdev->bdev_desc);
+		bvdev->bdev_desc = NULL;
+	}
+	bvdev->bdev = NULL;
+
+	spdk_dma_free(bvdev);
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vhost_blk", SPDK_LOG_VHOST_BLK)
+SPDK_LOG_REGISTER_COMPONENT("vhost_blk_data", SPDK_LOG_VHOST_BLK_DATA)
diff --git a/src/spdk/lib/vhost/vhost_internal.h b/src/spdk/lib/vhost/vhost_internal.h
new file mode 100644
index 00000000..9c0ad211
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost_internal.h
@@ -0,0 +1,277 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VHOST_INTERNAL_H
+#define SPDK_VHOST_INTERNAL_H
+
+#include "spdk/stdinc.h"
+
+#include <rte_vhost.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/event.h"
+#include "spdk/rpc.h"
+
+#define SPDK_CACHE_LINE_SIZE RTE_CACHE_LINE_SIZE
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+#endif
+
+#ifndef VIRTIO_F_VERSION_1
+#define VIRTIO_F_VERSION_1 32
+#endif
+
+#ifndef VIRTIO_BLK_F_MQ
+#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#endif
+
+#ifndef VIRTIO_BLK_F_CONFIG_WCE
+#define VIRTIO_BLK_F_CONFIG_WCE	11
+#endif
+
+#define SPDK_VHOST_MAX_VQUEUES	256
+#define SPDK_VHOST_MAX_VQ_SIZE	1024
+
+#define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
+
+#define SPDK_VHOST_IOVS_MAX 129
+
+/*
+ * Rate at which stats are checked for interrupt coalescing.
+ */
+#define SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS 10
+/*
+ * Default threshold at which interrupts start to be coalesced.
+ */
+#define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
+
+/*
+ * Currently coalescing is not used by default.
+ * Setting this to value > 0 here or by RPC will enable coalescing.
+ */
+#define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
+
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+struct spdk_vhost_virtqueue {
+	struct rte_vhost_vring vring;
+	void *tasks;
+
+	/* Request count from last stats check */
+	uint32_t req_cnt;
+
+	/* Request count from last event */
+	uint16_t used_req_cnt;
+
+	/* How long interrupt is delayed */
+	uint32_t irq_delay_time;
+
+	/* Next time when we need to send event */
+	uint64_t next_event_time;
+
+} __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
+
+struct spdk_vhost_dev_backend {
+	uint64_t virtio_features;
+	uint64_t disabled_features;
+
+	/**
+	 * Callbacks for starting and pausing the device.
+	 * The first param is struct spdk_vhost_dev *.
+	 * The second one is event context that has to be
+	 * passed to spdk_vhost_dev_backend_event_done().
+	 */
+	spdk_vhost_event_fn start_device;
+	spdk_vhost_event_fn stop_device;
+
+	int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len);
+	int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config,
+				uint32_t offset, uint32_t size, uint32_t flags);
+
+	void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
+	void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
+	int (*remove_device)(struct spdk_vhost_dev *vdev);
+};
+
+struct spdk_vhost_dev {
+	struct rte_vhost_memory *mem;
+	char *name;
+	char *path;
+
+	/* Unique device ID. */
+	unsigned id;
+
+	/* rte_vhost device ID. */
+	int vid;
+	int task_cnt;
+	int32_t lcore;
+	struct spdk_cpuset *cpumask;
+	bool registered;
+
+	const struct spdk_vhost_dev_backend *backend;
+
+	/* Saved orginal values used to setup coalescing to avoid integer
+	 * rounding issues during save/load config.
+	 */
+	uint32_t coalescing_delay_us;
+	uint32_t coalescing_iops_threshold;
+
+	uint32_t coalescing_delay_time_base;
+
+	/* Threshold when event coalescing for virtqueue will be turned on. */
+	uint32_t  coalescing_io_rate_threshold;
+
+	/* Next time when stats for event coalescing will be checked. */
+	uint64_t next_stats_check_time;
+
+	/* Interval used for event coalescing checking. */
+	uint64_t stats_check_interval;
+
+	uint16_t max_queues;
+
+	uint64_t negotiated_features;
+
+	struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
+
+	TAILQ_ENTRY(spdk_vhost_dev) tailq;
+};
+
+struct spdk_vhost_dev_destroy_ctx {
+	struct spdk_poller *poller;
+	void *event_ctx;
+};
+
+struct spdk_vhost_dev *spdk_vhost_dev_find(const char *ctrlr_name);
+
+void *spdk_vhost_gpa_to_vva(struct spdk_vhost_dev *vdev, uint64_t addr, uint64_t len);
+
+uint16_t spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs,
+				      uint16_t reqs_len);
+
+/**
+ * Get a virtio descriptor at given index in given virtqueue.
+ * The descriptor will provide access to the entire descriptor
+ * chain. The subsequent descriptors are accesible via
+ * \c spdk_vhost_vring_desc_get_next.
+ * \param vdev vhost device
+ * \param vq virtqueue
+ * \param req_idx descriptor index
+ * \param desc pointer to be set to the descriptor
+ * \param desc_table descriptor table to be used with
+ * \c spdk_vhost_vring_desc_get_next. This might be either
+ * default virtqueue descriptor table or per-chain indirect
+ * table.
+ * \param desc_table_size size of the *desc_table*
+ * \return 0 on success, -1 if given index is invalid.
+ * If -1 is returned, the content of params is undefined.
+ */
+int spdk_vhost_vq_get_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq,
+			   uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
+			   uint32_t *desc_table_size);
+
+/**
+ * Send IRQ/call client (if pending) for \c vq.
+ * \param vdev vhost device
+ * \param vq virtqueue
+ * \return
+ *   0 - if no interrupt was signalled
+ *   1 - if interrupt was signalled
+ */
+int spdk_vhost_vq_used_signal(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq);
+
+
+/**
+ * Send IRQs for all queues that need to be signaled.
+ * \param vdev vhost device
+ * \param vq virtqueue
+ */
+void spdk_vhost_dev_used_signal(struct spdk_vhost_dev *vdev);
+
+void spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq,
+				     uint16_t id, uint32_t len);
+
+/**
+ * Get subsequent descriptor from given table.
+ * \param desc current descriptor, will be set to the
+ * next descriptor (NULL in case this is the last
+ * descriptor in the chain or the next desc is invalid)
+ * \param desc_table descriptor table
+ * \param desc_table_size size of the *desc_table*
+ * \return 0 on success, -1 if given index is invalid
+ * The *desc* param will be set regardless of the
+ * return value.
+ */
+int spdk_vhost_vring_desc_get_next(struct vring_desc **desc,
+				   struct vring_desc *desc_table, uint32_t desc_table_size);
+bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc);
+
+int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
+				 uint16_t *iov_index, const struct vring_desc *desc);
+
+static inline bool __attribute__((always_inline))
+spdk_vhost_dev_has_feature(struct spdk_vhost_dev *vdev, unsigned feature_id)
+{
+	return vdev->negotiated_features & (1ULL << feature_id);
+}
+
+int spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
+			    const struct spdk_vhost_dev_backend *backend);
+int spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev);
+
+int spdk_vhost_scsi_controller_construct(void);
+int spdk_vhost_blk_controller_construct(void);
+void spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
+void spdk_vhost_dev_backend_event_done(void *event_ctx, int response);
+void spdk_vhost_lock(void);
+void spdk_vhost_unlock(void);
+int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev);
+int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
+int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
+int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap);
+int spdk_vhost_nvme_controller_construct(void);
+int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
+int spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev);
+int spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev,
+			       const char *bdev_name);
+
+#endif /* SPDK_VHOST_INTERNAL_H */
diff --git a/src/spdk/lib/vhost/vhost_nvme.c b/src/spdk/lib/vhost/vhost_nvme.c
new file mode 100644
index 00000000..35015d93
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost_nvme.c
@@ -0,0 +1,1465 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/nvme.h"
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/barrier.h"
+#include "spdk/vhost.h"
+#include "spdk/bdev.h"
+#include "spdk/version.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/likely.h"
+
+#include "vhost_internal.h"
+
+#define MAX_IO_QUEUES 31
+#define MAX_IOVS 64
+#define MAX_NAMESPACE 8
+#define MAX_QUEUE_ENTRIES_SUPPORTED 256
+#define MAX_BATCH_IO 8
+
+struct spdk_vhost_nvme_sq {
+	uint16_t sqid;
+	uint16_t size;
+	uint16_t cqid;
+	bool valid;
+	struct spdk_nvme_cmd *sq_cmd;
+	uint16_t sq_head;
+	uint16_t sq_tail;
+};
+
+struct spdk_vhost_nvme_cq {
+	uint8_t phase;
+	uint16_t size;
+	uint16_t cqid;
+	bool valid;
+	volatile struct spdk_nvme_cpl *cq_cqe;
+	uint16_t cq_head;
+	uint16_t guest_signaled_cq_head;
+	uint32_t need_signaled_cnt;
+	STAILQ_HEAD(, spdk_vhost_nvme_task) cq_full_waited_tasks;
+	bool irq_enabled;
+	int virq;
+};
+
+struct spdk_vhost_nvme_ns {
+	struct spdk_bdev *bdev;
+	uint32_t block_size;
+	uint64_t capacity;
+	uint32_t nsid;
+	uint32_t active_ns;
+	struct spdk_bdev_desc *bdev_desc;
+	struct spdk_io_channel *bdev_io_channel;
+	struct spdk_nvme_ns_data nsdata;
+};
+
+struct spdk_vhost_nvme_task {
+	struct spdk_nvme_cmd cmd;
+	struct spdk_vhost_nvme_dev *nvme;
+	uint16_t sqid;
+	uint16_t cqid;
+
+	/** array of iovecs to transfer. */
+	struct iovec iovs[MAX_IOVS];
+
+	/** Number of iovecs in iovs array. */
+	int iovcnt;
+
+	/** Current iovec position. */
+	int iovpos;
+
+	/** Offset in current iovec. */
+	uint32_t iov_offset;
+
+	/* for bdev_io_wait */
+	struct spdk_bdev_io_wait_entry bdev_io_wait;
+	struct spdk_vhost_nvme_sq *sq;
+	struct spdk_vhost_nvme_ns *ns;
+
+	/* parent pointer. */
+	struct spdk_vhost_nvme_task *parent;
+	uint8_t dnr;
+	uint8_t sct;
+	uint8_t sc;
+	uint32_t num_children;
+	STAILQ_ENTRY(spdk_vhost_nvme_task) stailq;
+};
+
+struct spdk_vhost_nvme_dev {
+	struct spdk_vhost_dev vdev;
+
+	uint32_t num_io_queues;
+	union spdk_nvme_cap_register cap;
+	union spdk_nvme_cc_register cc;
+	union spdk_nvme_csts_register csts;
+	struct spdk_nvme_ctrlr_data cdata;
+
+	uint32_t num_sqs;
+	uint32_t num_cqs;
+
+	uint32_t num_ns;
+	struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE];
+
+	volatile uint32_t *dbbuf_dbs;
+	volatile uint32_t *dbbuf_eis;
+	struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1];
+	struct spdk_vhost_nvme_cq cq_queue[MAX_IO_QUEUES + 1];
+
+	TAILQ_ENTRY(spdk_vhost_nvme_dev) tailq;
+	STAILQ_HEAD(, spdk_vhost_nvme_task) free_tasks;
+	struct spdk_poller *requestq_poller;
+	struct spdk_vhost_dev_destroy_ctx destroy_ctx;
+};
+
+static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend;
+
+/*
+ * Report the SPDK version as the firmware revision.
+ * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
+ */
+#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
+
+static int
+spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
+		     struct spdk_vhost_nvme_task *task);
+
+static struct spdk_vhost_nvme_dev *
+to_nvme_dev(struct spdk_vhost_dev *vdev)
+{
+	if (vdev->backend != &spdk_vhost_nvme_device_backend) {
+		SPDK_ERRLOG("%s: not a vhost-nvme device\n", vdev->name);
+		return NULL;
+	}
+
+	return SPDK_CONTAINEROF(vdev, struct spdk_vhost_nvme_dev, vdev);
+}
+
+static TAILQ_HEAD(, spdk_vhost_nvme_dev) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
+
+static inline unsigned int sq_offset(unsigned int qid, uint32_t db_stride)
+{
+	return qid * 2 * db_stride;
+}
+
+static inline unsigned int cq_offset(unsigned int qid, uint32_t db_stride)
+{
+	return (qid * 2 + 1) * db_stride;
+}
+
+static void
+nvme_inc_cq_head(struct spdk_vhost_nvme_cq *cq)
+{
+	cq->cq_head++;
+	if (cq->cq_head >= cq->size) {
+		cq->cq_head = 0;
+		cq->phase = !cq->phase;
+	}
+}
+
+static bool
+nvme_cq_is_full(struct spdk_vhost_nvme_cq *cq)
+{
+	return ((cq->cq_head + 1) % cq->size == cq->guest_signaled_cq_head);
+}
+
+static void
+nvme_inc_sq_head(struct spdk_vhost_nvme_sq *sq)
+{
+	sq->sq_head = (sq->sq_head + 1) % sq->size;
+}
+
+static struct spdk_vhost_nvme_sq *
+spdk_vhost_nvme_get_sq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
+{
+	if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
+		return NULL;
+	}
+
+	return &dev->sq_queue[qid];
+}
+
+static struct spdk_vhost_nvme_cq *
+spdk_vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
+{
+	if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
+		return NULL;
+	}
+
+	return &dev->cq_queue[qid];
+}
+
+static int
+spdk_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd,
+		   struct spdk_vhost_nvme_task *task, uint32_t len)
+{
+	uint64_t prp1, prp2;
+	void *vva;
+	uint32_t i;
+	uint32_t residue_len, nents, mps = 4096;
+	uint64_t *prp_list;
+
+	prp1 = cmd->dptr.prp.prp1;
+	prp2 = cmd->dptr.prp.prp2;
+
+	/* PRP1 may started with unaligned page address */
+	residue_len = mps - (prp1 % mps);
+	residue_len = spdk_min(len, residue_len);
+
+	vva = spdk_vhost_gpa_to_vva(&nvme->vdev, prp1, residue_len);
+	if (spdk_unlikely(vva == NULL)) {
+		SPDK_ERRLOG("GPA to VVA failed\n");
+		return -1;
+	}
+	task->iovs[0].iov_base = vva;
+	task->iovs[0].iov_len = residue_len;
+	len -= residue_len;
+
+	if (len) {
+		if (spdk_unlikely(prp2 == 0)) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PRP2=0 in command\n");
+			return -1;
+		}
+
+		if (len <= mps) {
+			/* 2 PRP used */
+			task->iovcnt = 2;
+			vva = spdk_vhost_gpa_to_vva(&nvme->vdev, prp2, len);
+			if (spdk_unlikely(vva == NULL)) {
+				return -1;
+			}
+			task->iovs[1].iov_base = vva;
+			task->iovs[1].iov_len = len;
+		} else {
+			/* PRP list used */
+			nents = (len + mps - 1) / mps;
+			vva = spdk_vhost_gpa_to_vva(&nvme->vdev, prp2, nents * sizeof(*prp_list));
+			if (spdk_unlikely(vva == NULL)) {
+				return -1;
+			}
+			prp_list = vva;
+			i = 0;
+			while (len != 0) {
+				residue_len = spdk_min(len, mps);
+				vva = spdk_vhost_gpa_to_vva(&nvme->vdev, prp_list[i], residue_len);
+				if (spdk_unlikely(vva == NULL)) {
+					return -1;
+				}
+				task->iovs[i + 1].iov_base = vva;
+				task->iovs[i + 1].iov_len = residue_len;
+				len -= residue_len;
+				i++;
+			}
+			task->iovcnt = i + 1;
+		}
+	} else {
+		/* 1 PRP used */
+		task->iovcnt = 1;
+	}
+
+	return 0;
+}
+
+static void
+spdk_nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme)
+{
+	struct spdk_vhost_nvme_cq *cq;
+	uint32_t qid, cq_head;
+
+	assert(nvme != NULL);
+
+	for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
+		cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
+		if (!cq || !cq->valid) {
+			continue;
+		}
+
+		cq_head = nvme->dbbuf_dbs[cq_offset(qid, 1)];
+		if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) {
+			eventfd_write(cq->virq, (eventfd_t)1);
+			cq->need_signaled_cnt = 0;
+		}
+	}
+}
+
+static void
+spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
+{
+	struct spdk_vhost_nvme_dev *nvme = task->nvme;
+	struct spdk_nvme_cpl cqe = {0};
+	struct spdk_vhost_nvme_cq *cq;
+	struct spdk_vhost_nvme_sq *sq;
+	struct spdk_nvme_cmd *cmd = &task->cmd;
+	uint16_t cqid = task->cqid;
+	uint16_t sqid = task->sqid;
+
+	cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
+	sq = spdk_vhost_nvme_get_sq_from_qid(nvme, sqid);
+	if (spdk_unlikely(!cq || !sq)) {
+		return;
+	}
+
+	cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(cqid, 1)];
+	if (spdk_unlikely(nvme_cq_is_full(cq))) {
+		STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq);
+		return;
+	}
+
+	cqe.sqid = sqid;
+	cqe.sqhd = sq->sq_head;
+	cqe.cid = cmd->cid;
+	cqe.status.dnr = task->dnr;
+	cqe.status.sct = task->sct;
+	cqe.status.sc = task->sc;
+	cqe.status.p = !cq->phase;
+	cq->cq_cqe[cq->cq_head] = cqe;
+	spdk_smp_wmb();
+	cq->cq_cqe[cq->cq_head].status.p = cq->phase;
+
+	nvme_inc_cq_head(cq);
+	cq->need_signaled_cnt++;
+
+	/* MMIO Controll */
+	nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
+
+	STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
+}
+
+static void
+blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_vhost_nvme_task *task = cb_arg;
+	struct spdk_nvme_cmd *cmd = &task->cmd;
+	int sc, sct;
+
+	assert(bdev_io != NULL);
+
+	spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+	spdk_bdev_free_io(bdev_io);
+
+	task->dnr = !success;
+	task->sct = sct;
+	task->sc = sc;
+
+	if (spdk_unlikely(!success)) {
+		SPDK_ERRLOG("I/O error, sector %u\n", cmd->cdw10);
+	}
+
+	spdk_vhost_nvme_task_complete(task);
+}
+
+static void
+blk_unmap_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+	struct spdk_vhost_nvme_task *child = cb_arg;
+	struct spdk_vhost_nvme_task *task = child->parent;
+	struct spdk_vhost_nvme_dev *nvme = task->nvme;
+	int sct, sc;
+
+	assert(bdev_io != NULL);
+
+	task->num_children--;
+	if (!success) {
+		task->dnr = 1;
+		spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
+		task->sct = sct;
+		task->sc = sc;
+	}
+
+	spdk_bdev_free_io(bdev_io);
+
+	if (!task->num_children) {
+		spdk_vhost_nvme_task_complete(task);
+	}
+
+	STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
+}
+
+static struct spdk_vhost_nvme_ns *
+spdk_vhost_nvme_get_ns_from_nsid(struct spdk_vhost_nvme_dev *dev, uint32_t nsid)
+{
+	if (spdk_unlikely(!nsid || nsid > dev->num_ns)) {
+		return NULL;
+	}
+
+	return &dev->ns[nsid - 1];
+}
+
+static void
+vhost_nvme_resubmit_task(void *arg)
+{
+	struct spdk_vhost_nvme_task *task = (struct spdk_vhost_nvme_task *)arg;
+	int rc;
+
+	rc = spdk_nvme_process_sq(task->nvme, task->sq, task);
+	if (rc) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "vhost_nvme: task resubmit failed, rc = %d.\n", rc);
+	}
+}
+
+static int
+vhost_nvme_queue_task(struct spdk_vhost_nvme_task *task)
+{
+	int rc;
+
+	task->bdev_io_wait.bdev = task->ns->bdev;
+	task->bdev_io_wait.cb_fn = vhost_nvme_resubmit_task;
+	task->bdev_io_wait.cb_arg = task;
+
+	rc = spdk_bdev_queue_io_wait(task->ns->bdev, task->ns->bdev_io_channel, &task->bdev_io_wait);
+	if (rc != 0) {
+		SPDK_ERRLOG("Queue io failed in vhost_nvme_queue_task, rc=%d.\n", rc);
+		task->dnr = 1;
+		task->sct = SPDK_NVME_SCT_GENERIC;
+		task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+		spdk_vhost_nvme_task_complete(task);
+	}
+
+	return rc;
+}
+
+static int
+spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
+		     struct spdk_vhost_nvme_task *task)
+{
+	struct spdk_vhost_nvme_task *child;
+	struct spdk_nvme_cmd *cmd = &task->cmd;
+	struct spdk_vhost_nvme_ns *ns;
+	int ret = -1;
+	uint32_t len, nlba, block_size;
+	uint64_t slba;
+	struct spdk_nvme_dsm_range *range;
+	uint16_t i, num_ranges = 0;
+
+	task->nvme = nvme;
+	task->dnr = 0;
+	task->sct = 0;
+	task->sc = 0;
+
+	ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, cmd->nsid);
+	if (spdk_unlikely(!ns)) {
+		task->dnr = 1;
+		task->sct = SPDK_NVME_SCT_GENERIC;
+		task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		spdk_vhost_nvme_task_complete(task);
+		return -1;
+	}
+
+	block_size = ns->block_size;
+	task->num_children = 0;
+	task->cqid = sq->cqid;
+	task->sqid = sq->sqid;
+
+	task->ns = ns;
+
+	if (spdk_unlikely(!ns->active_ns)) {
+		task->dnr = 1;
+		task->sct = SPDK_NVME_SCT_GENERIC;
+		task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
+		spdk_vhost_nvme_task_complete(task);
+		return -1;
+	}
+
+	/* valid only for Read/Write commands */
+	nlba = (cmd->cdw12 & 0xffff) + 1;
+	slba = cmd->cdw11;
+	slba = (slba << 32) | cmd->cdw10;
+
+	if (cmd->opc == SPDK_NVME_OPC_READ || cmd->opc == SPDK_NVME_OPC_WRITE ||
+	    cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
+		if (cmd->psdt != SPDK_NVME_PSDT_PRP) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PSDT %u%ub in command\n",
+				      cmd->psdt >> 1, cmd->psdt & 1u);
+			task->dnr = 1;
+			task->sct = SPDK_NVME_SCT_GENERIC;
+			task->sc = SPDK_NVME_SC_INVALID_FIELD;
+			spdk_vhost_nvme_task_complete(task);
+			return -1;
+		}
+
+		if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
+			num_ranges = (cmd->cdw10 & 0xff) + 1;
+			len = num_ranges * sizeof(struct spdk_nvme_dsm_range);
+		} else {
+			len = nlba * block_size;
+		}
+
+		ret = spdk_nvme_map_prps(nvme, cmd, task, len);
+		if (spdk_unlikely(ret != 0)) {
+			SPDK_ERRLOG("nvme command map prps failed\n");
+			task->dnr = 1;
+			task->sct = SPDK_NVME_SCT_GENERIC;
+			task->sc = SPDK_NVME_SC_INVALID_FIELD;
+			spdk_vhost_nvme_task_complete(task);
+			return -1;
+		}
+	}
+
+	switch (cmd->opc) {
+	case SPDK_NVME_OPC_READ:
+		ret = spdk_bdev_readv(ns->bdev_desc, ns->bdev_io_channel,
+				      task->iovs, task->iovcnt, slba * block_size,
+				      nlba * block_size, blk_request_complete_cb, task);
+		break;
+	case SPDK_NVME_OPC_WRITE:
+		ret = spdk_bdev_writev(ns->bdev_desc, ns->bdev_io_channel,
+				       task->iovs, task->iovcnt, slba * block_size,
+				       nlba * block_size, blk_request_complete_cb, task);
+		break;
+	case SPDK_NVME_OPC_FLUSH:
+		ret = spdk_bdev_flush(ns->bdev_desc, ns->bdev_io_channel,
+				      0, ns->capacity,
+				      blk_request_complete_cb, task);
+		break;
+	case SPDK_NVME_OPC_DATASET_MANAGEMENT:
+		range = (struct spdk_nvme_dsm_range *)task->iovs[0].iov_base;
+		for (i = 0; i < num_ranges; i++) {
+			if (!STAILQ_EMPTY(&nvme->free_tasks)) {
+				child = STAILQ_FIRST(&nvme->free_tasks);
+				STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
+			} else {
+				SPDK_ERRLOG("No free task now\n");
+				ret = -1;
+				break;
+			}
+			task->num_children++;
+			child->parent = task;
+			ret = spdk_bdev_unmap(ns->bdev_desc, ns->bdev_io_channel,
+					      range[i].starting_lba * block_size,
+					      range[i].length * block_size,
+					      blk_unmap_complete_cb, child);
+			if (ret) {
+				STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
+				break;
+			}
+		}
+		break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	if (spdk_unlikely(ret)) {
+		if (ret == -ENOMEM) {
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "No memory, start to queue io.\n");
+			task->sq = sq;
+			ret = vhost_nvme_queue_task(task);
+		} else {
+			/* post error status to cqe */
+			SPDK_ERRLOG("Error Submission For Command %u, ret %d\n", cmd->opc, ret);
+			task->dnr = 1;
+			task->sct = SPDK_NVME_SCT_GENERIC;
+			task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+			spdk_vhost_nvme_task_complete(task);
+		}
+	}
+
+	return ret;
+}
+
+static int
+nvme_worker(void *arg)
+{
+	struct spdk_vhost_nvme_dev *nvme = (struct spdk_vhost_nvme_dev *)arg;
+	struct spdk_vhost_nvme_sq *sq;
+	struct spdk_vhost_nvme_cq *cq;
+	struct spdk_vhost_nvme_task *task;
+	uint32_t qid, dbbuf_sq;
+	int ret;
+	int count = -1;
+
+	if (spdk_unlikely(!nvme->num_sqs)) {
+		return -1;
+	}
+
+	/* worker thread can't start before the admin doorbell
+	 * buffer config command
+	 */
+	if (spdk_unlikely(!nvme->dbbuf_dbs)) {
+		return -1;
+	}
+
+	for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
+
+		sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
+		if (!sq->valid) {
+			continue;
+		}
+		cq = spdk_vhost_nvme_get_cq_from_qid(nvme, sq->cqid);
+		if (spdk_unlikely(!cq)) {
+			return -1;
+		}
+		cq->guest_signaled_cq_head = nvme->dbbuf_dbs[cq_offset(sq->cqid, 1)];
+		if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) &&
+				  !nvme_cq_is_full(cq))) {
+			task = STAILQ_FIRST(&cq->cq_full_waited_tasks);
+			STAILQ_REMOVE_HEAD(&cq->cq_full_waited_tasks, stailq);
+			spdk_vhost_nvme_task_complete(task);
+		}
+
+		dbbuf_sq = nvme->dbbuf_dbs[sq_offset(qid, 1)];
+		sq->sq_tail = (uint16_t)dbbuf_sq;
+		count = 0;
+
+		while (sq->sq_head != sq->sq_tail) {
+			if (spdk_unlikely(!sq->sq_cmd)) {
+				break;
+			}
+			if (spdk_likely(!STAILQ_EMPTY(&nvme->free_tasks))) {
+				task = STAILQ_FIRST(&nvme->free_tasks);
+				STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
+			} else {
+				return -1;
+			}
+
+			task->cmd = sq->sq_cmd[sq->sq_head];
+			nvme_inc_sq_head(sq);
+
+			/* processing IO */
+			ret = spdk_nvme_process_sq(nvme, sq, task);
+			if (spdk_unlikely(ret)) {
+				SPDK_ERRLOG("QID %u CID %u, SQ HEAD %u, DBBUF SQ TAIL %u\n", qid, task->cmd.cid, sq->sq_head,
+					    sq->sq_tail);
+			}
+
+			/* MMIO Control */
+			nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
+
+			/* Maximum batch I/Os to pick up at once */
+			if (count++ == MAX_BATCH_IO) {
+				break;
+			}
+		}
+	}
+
+	/* Completion Queue */
+	spdk_nvme_cq_signal_fd(nvme);
+
+	return count;
+}
+
+static int
+vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme,
+				  struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
+{
+	uint64_t dbs_dma_addr, eis_dma_addr;
+
+	dbs_dma_addr = cmd->dptr.prp.prp1;
+	eis_dma_addr = cmd->dptr.prp.prp2;
+
+	if ((dbs_dma_addr % 4096) || (eis_dma_addr % 4096)) {
+		return -1;
+	}
+	/* Guest Physical Address to Host Virtual Address */
+	nvme->dbbuf_dbs = spdk_vhost_gpa_to_vva(&nvme->vdev, dbs_dma_addr, 4096);
+	nvme->dbbuf_eis = spdk_vhost_gpa_to_vva(&nvme->vdev, eis_dma_addr, 4096);
+	if (!nvme->dbbuf_dbs || !nvme->dbbuf_eis) {
+		return -1;
+	}
+	/* zeroed the doorbell buffer memory */
+	memset((void *)nvme->dbbuf_dbs, 0, 4096);
+	memset((void *)nvme->dbbuf_eis, 0, 4096);
+
+	cpl->status.sc = 0;
+	cpl->status.sct = 0;
+	return 0;
+}
+
+static int
+vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme,
+			struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
+{
+	uint16_t qid, qsize, cqid;
+	uint64_t dma_addr;
+	uint64_t requested_len;
+	struct spdk_vhost_nvme_cq *cq;
+	struct spdk_vhost_nvme_sq *sq;
+
+	/* physical contiguous */
+	if (!(cmd->cdw11 & 0x1)) {
+		return -1;
+	}
+
+	cqid = (cmd->cdw11 >> 16) & 0xffff;
+	qid = cmd->cdw10 & 0xffff;
+	qsize = (cmd->cdw10 >> 16) & 0xffff;
+	dma_addr = cmd->dptr.prp.prp1;
+	if (!dma_addr || dma_addr % 4096) {
+		return -1;
+	}
+
+	sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
+	cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
+	if (!sq || !cq) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u or CQID %u\n",
+			      qid, cqid);
+		cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+		return -1;
+	}
+
+	sq->sqid = qid;
+	sq->cqid = cqid;
+	sq->size = qsize + 1;
+	sq->sq_head = sq->sq_tail = 0;
+	requested_len = sizeof(struct spdk_nvme_cmd) * sq->size;
+	sq->sq_cmd = spdk_vhost_gpa_to_vva(&nvme->vdev, dma_addr, requested_len);
+	if (!sq->sq_cmd) {
+		return -1;
+	}
+	nvme->num_sqs++;
+	sq->valid = true;
+
+	cpl->status.sc = 0;
+	cpl->status.sct = 0;
+	return 0;
+}
+
+static int
+vhost_nvme_delete_io_sq(struct spdk_vhost_nvme_dev *nvme,
+			struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
+{
+	uint16_t qid;
+	struct spdk_vhost_nvme_sq *sq;
+
+	qid = cmd->cdw10 & 0xffff;
+	sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
+	if (!sq) {
+		return -1;
+	}
+
+	/* We didn't see scenarios when deleting submission
+	 * queue while I/O is running against the submisson
+	 * queue for now, otherwise, we must ensure the poller
+	 * will not run with this submission queue.
+	 */
+	nvme->num_sqs--;
+	sq->valid = false;
+
+	memset(sq, 0, sizeof(*sq));
+	sq->sq_cmd = NULL;
+
+	cpl->status.sc = 0;
+	cpl->status.sct = 0;
+
+	return 0;
+}
+
+static int
+vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme,
+			struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
+{
+	uint16_t qsize, qid;
+	uint64_t dma_addr;
+	struct spdk_vhost_nvme_cq *cq;
+	uint64_t requested_len;
+
+	/* physical contiguous */
+	if (!(cmd->cdw11 & 0x1)) {
+		return -1;
+	}
+
+	qid = cmd->cdw10 & 0xffff;
+	qsize = (cmd->cdw10 >> 16) & 0xffff;
+	dma_addr = cmd->dptr.prp.prp1;
+	if (!dma_addr || dma_addr % 4096) {
+		return -1;
+	}
+
+	cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
+	if (!cq) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u\n", qid);
+		cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+		cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
+		return -1;
+	}
+	cq->cqid = qid;
+	cq->size = qsize + 1;
+	cq->phase = 1;
+	cq->irq_enabled = (cmd->cdw11 >> 1) & 0x1;
+	/* Setup virq through vhost messages */
+	cq->virq = -1;
+	cq->cq_head = 0;
+	cq->guest_signaled_cq_head = 0;
+	cq->need_signaled_cnt = 0;
+	requested_len = sizeof(struct spdk_nvme_cpl) * cq->size;
+	cq->cq_cqe = spdk_vhost_gpa_to_vva(&nvme->vdev, dma_addr, requested_len);
+	if (!cq->cq_cqe) {
+		return -1;
+	}
+	nvme->num_cqs++;
+	cq->valid = true;
+	STAILQ_INIT(&cq->cq_full_waited_tasks);
+
+	cpl->status.sc = 0;
+	cpl->status.sct = 0;
+	return 0;
+}
+
+static int
+vhost_nvme_delete_io_cq(struct spdk_vhost_nvme_dev *nvme,
+			struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
+{
+	uint16_t qid;
+	struct spdk_vhost_nvme_cq *cq;
+
+	qid = cmd->cdw10 & 0xffff;
+	cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
+	if (!cq) {
+		return -1;
+	}
+	nvme->num_cqs--;
+	cq->valid = false;
+
+	memset(cq, 0, sizeof(*cq));
+	cq->cq_cqe = NULL;
+
+	cpl->status.sc = 0;
+	cpl->status.sct = 0;
+	return 0;
+}
+
+static struct spdk_vhost_nvme_dev *
+spdk_vhost_nvme_get_by_name(int vid)
+{
+	struct spdk_vhost_nvme_dev *nvme;
+
+	TAILQ_FOREACH(nvme, &g_nvme_ctrlrs, tailq) {
+		if (nvme->vdev.vid == vid) {
+			return nvme;
+		}
+	}
+
+	return NULL;
+}
+
+int
+spdk_vhost_nvme_get_cap(int vid, uint64_t *cap)
+{
+	struct spdk_vhost_nvme_dev *nvme;
+
+	nvme = spdk_vhost_nvme_get_by_name(vid);
+	if (!nvme) {
+		return -1;
+	}
+
+	*cap = nvme->cap.raw;
+	return 0;
+}
+
+int
+spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
+{
+	struct spdk_nvme_cmd *req = (struct spdk_nvme_cmd *)cmd;
+	struct spdk_nvme_cpl *cpl = (struct spdk_nvme_cpl *)cqe;
+	struct spdk_vhost_nvme_ns *ns;
+	int ret = 0;
+	struct spdk_vhost_nvme_dev *nvme;
+	uint32_t cq_head, sq_tail;
+
+	nvme = spdk_vhost_nvme_get_by_name(vid);
+	if (!nvme) {
+		return -1;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Admin Command Opcode %u\n", req->opc);
+	switch (req->opc) {
+	case SPDK_NVME_OPC_IDENTIFY:
+		if (req->cdw10 == SPDK_NVME_IDENTIFY_CTRLR) {
+			memcpy(buf, &nvme->cdata, sizeof(struct spdk_nvme_ctrlr_data));
+
+		} else if (req->cdw10 == SPDK_NVME_IDENTIFY_NS) {
+			ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, req->nsid);
+			if (!ns) {
+				cpl->status.sc = SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE;
+				cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
+				break;
+			}
+			memcpy(buf, &ns->nsdata, sizeof(struct spdk_nvme_ns_data));
+		}
+		/* successfully */
+		cpl->status.sc = 0;
+		cpl->status.sct = 0;
+		break;
+	case SPDK_NVME_OPC_CREATE_IO_CQ:
+		ret = vhost_nvme_create_io_cq(nvme, req, cpl);
+		break;
+	case SPDK_NVME_OPC_DELETE_IO_CQ:
+		ret = vhost_nvme_delete_io_cq(nvme, req, cpl);
+		break;
+	case SPDK_NVME_OPC_CREATE_IO_SQ:
+		ret = vhost_nvme_create_io_sq(nvme, req, cpl);
+		break;
+	case SPDK_NVME_OPC_DELETE_IO_SQ:
+		ret = vhost_nvme_delete_io_sq(nvme, req, cpl);
+		break;
+	case SPDK_NVME_OPC_GET_FEATURES:
+	case SPDK_NVME_OPC_SET_FEATURES:
+		if (req->cdw10 == SPDK_NVME_FEAT_NUMBER_OF_QUEUES) {
+			cpl->status.sc = 0;
+			cpl->status.sct = 0;
+			cpl->cdw0 = (nvme->num_io_queues - 1) | ((nvme->num_io_queues - 1) << 16);
+		} else {
+			cpl->status.sc = SPDK_NVME_SC_INVALID_FIELD;
+			cpl->status.sct = SPDK_NVME_SCT_GENERIC;
+		}
+		break;
+	case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG:
+		ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl);
+		break;
+	case SPDK_NVME_OPC_ABORT:
+		sq_tail = nvme->dbbuf_dbs[sq_offset(1, 1)] & 0xffffu;
+		cq_head = nvme->dbbuf_dbs[cq_offset(1, 1)] & 0xffffu;
+		SPDK_NOTICELOG("ABORT: CID %u, SQ_TAIL %u, CQ_HEAD %u\n",
+			       (req->cdw10 >> 16) & 0xffffu, sq_tail, cq_head);
+		/* TODO: ABORT failed fow now */
+		cpl->cdw0 = 1;
+		cpl->status.sc = 0;
+		cpl->status.sct = 0;
+		break;
+	}
+
+	if (ret) {
+		SPDK_ERRLOG("Admin Passthrough Faild with %u\n", req->opc);
+	}
+
+	return 0;
+}
+
+int
+spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
+{
+	struct spdk_vhost_nvme_dev *nvme;
+	struct spdk_vhost_nvme_cq *cq;
+
+	nvme = spdk_vhost_nvme_get_by_name(vid);
+	if (!nvme) {
+		return -1;
+	}
+
+	cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
+	if (!cq) {
+		return -1;
+	}
+	if (cq->irq_enabled) {
+		cq->virq = fd;
+	} else {
+		SPDK_ERRLOG("NVMe Qid %d Disabled IRQ\n", qid);
+	}
+
+	return 0;
+}
+
+static void
+free_task_pool(struct spdk_vhost_nvme_dev *nvme)
+{
+	struct spdk_vhost_nvme_task *task;
+
+	while (!STAILQ_EMPTY(&nvme->free_tasks)) {
+		task = STAILQ_FIRST(&nvme->free_tasks);
+		STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
+		spdk_dma_free(task);
+	}
+}
+
+static int
+alloc_task_pool(struct spdk_vhost_nvme_dev *nvme)
+{
+	uint32_t entries, i;
+	struct spdk_vhost_nvme_task *task;
+
+	entries = nvme->num_io_queues * MAX_QUEUE_ENTRIES_SUPPORTED;
+
+	for (i = 0; i < entries; i++) {
+		task = spdk_dma_zmalloc(sizeof(struct spdk_vhost_nvme_task),
+					SPDK_CACHE_LINE_SIZE, NULL);
+		if (task == NULL) {
+			SPDK_ERRLOG("Controller %s alloc task pool failed\n",
+				    nvme->vdev.name);
+			free_task_pool(nvme);
+			return -1;
+		}
+		STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
+	}
+
+	return 0;
+}
+
+/* new device means enable the
+ * virtual NVMe controller
+ */
+static int
+spdk_vhost_nvme_start_device(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+	struct spdk_vhost_nvme_ns *ns_dev;
+	uint32_t i;
+
+	if (nvme == NULL) {
+		return -1;
+	}
+
+	if (alloc_task_pool(nvme)) {
+		return -1;
+	}
+
+	SPDK_NOTICELOG("Start Device %u, Path %s, lcore %d\n", vdev->vid,
+		       vdev->path, vdev->lcore);
+
+	for (i = 0; i < nvme->num_ns; i++) {
+		ns_dev = &nvme->ns[i];
+		ns_dev->bdev_io_channel = spdk_bdev_get_io_channel(ns_dev->bdev_desc);
+		if (!ns_dev->bdev_io_channel) {
+			return -1;
+		}
+	}
+
+	/* Start the NVMe Poller */
+	nvme->requestq_poller = spdk_poller_register(nvme_worker, nvme, 0);
+
+	spdk_vhost_dev_backend_event_done(event_ctx, 0);
+	return 0;
+}
+
+static void
+spdk_vhost_nvme_deactive_ns(struct spdk_vhost_nvme_ns *ns)
+{
+	ns->active_ns = 0;
+	spdk_bdev_close(ns->bdev_desc);
+	ns->bdev_desc = NULL;
+	ns->bdev = NULL;
+}
+
+static void
+bdev_remove_cb(void *remove_ctx)
+{
+	struct spdk_vhost_nvme_ns *ns = remove_ctx;
+
+	SPDK_NOTICELOG("Removing NS %u, Block Device %s\n",
+		       ns->nsid, spdk_bdev_get_name(ns->bdev));
+
+	spdk_vhost_nvme_deactive_ns(ns);
+}
+
+static int
+destroy_device_poller_cb(void *arg)
+{
+	struct spdk_vhost_nvme_dev *nvme = arg;
+	struct spdk_vhost_nvme_dev *dev, *tmp;
+	struct spdk_vhost_nvme_ns *ns_dev;
+	uint32_t i;
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Destroy device poller callback\n");
+
+	TAILQ_FOREACH_SAFE(dev, &g_nvme_ctrlrs, tailq, tmp) {
+		if (dev == nvme) {
+			for (i = 0; i < nvme->num_ns; i++) {
+				ns_dev = &nvme->ns[i];
+				if (ns_dev->bdev_io_channel) {
+					spdk_put_io_channel(ns_dev->bdev_io_channel);
+					ns_dev->bdev_io_channel = NULL;
+				}
+			}
+			nvme->num_sqs = 0;
+			nvme->num_cqs = 0;
+			nvme->dbbuf_dbs = NULL;
+			nvme->dbbuf_eis = NULL;
+		}
+	}
+
+	spdk_poller_unregister(&nvme->destroy_ctx.poller);
+	spdk_vhost_dev_backend_event_done(nvme->destroy_ctx.event_ctx, 0);
+
+	return -1;
+}
+
+/* Disable NVMe controller
+ */
+static int
+spdk_vhost_nvme_stop_device(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+
+	if (nvme == NULL) {
+		return -1;
+	}
+
+	free_task_pool(nvme);
+	SPDK_NOTICELOG("Stopping Device %u, Path %s\n", vdev->vid, vdev->path);
+
+	nvme->destroy_ctx.event_ctx = event_ctx;
+	spdk_poller_unregister(&nvme->requestq_poller);
+	nvme->destroy_ctx.poller = spdk_poller_register(destroy_device_poller_cb, nvme, 1000);
+
+	return 0;
+}
+
+static void
+spdk_vhost_nvme_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+	struct spdk_vhost_nvme_ns *ns_dev;
+	uint32_t i;
+
+	if (nvme == NULL) {
+		return;
+	}
+
+	spdk_json_write_named_array_begin(w, "namespaces");
+
+	for (i = 0; i < nvme->num_ns; i++) {
+		ns_dev = &nvme->ns[i];
+		if (!ns_dev->active_ns) {
+			continue;
+		}
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_uint32(w, "nsid", ns_dev->nsid);
+		spdk_json_write_named_string(w, "bdev",  spdk_bdev_get_name(ns_dev->bdev));
+		spdk_json_write_object_end(w);
+	}
+
+	spdk_json_write_array_end(w);
+}
+
+static void
+spdk_vhost_nvme_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+	struct spdk_vhost_nvme_ns *ns_dev;
+	uint32_t i;
+
+	if (nvme == NULL) {
+		return;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "construct_vhost_nvme_controller");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
+	spdk_json_write_named_uint32(w, "io_queues", nvme->num_io_queues);
+	spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(nvme->vdev.cpumask));
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	for (i = 0; i < nvme->num_ns; i++) {
+		ns_dev = &nvme->ns[i];
+		if (!ns_dev->active_ns) {
+			continue;
+		}
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "add_vhost_nvme_ns");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
+		spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(ns_dev->bdev));
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+}
+
+static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend = {
+	.start_device = spdk_vhost_nvme_start_device,
+	.stop_device = spdk_vhost_nvme_stop_device,
+	.dump_info_json = spdk_vhost_nvme_dump_info_json,
+	.write_config_json = spdk_vhost_nvme_write_config_json,
+	.remove_device = spdk_vhost_nvme_dev_remove,
+};
+
+static int
+spdk_vhost_nvme_ns_identify_update(struct spdk_vhost_nvme_dev *dev)
+{
+	struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
+	struct spdk_nvme_ns_data *nsdata;
+	uint64_t num_blocks;
+	uint32_t i;
+
+	/* Identify Namespace */
+	cdata->nn = dev->num_ns;
+	for (i = 0; i < dev->num_ns; i++) {
+		nsdata = &dev->ns[i].nsdata;
+		if (dev->ns[i].active_ns) {
+			num_blocks = spdk_bdev_get_num_blocks(dev->ns[i].bdev);
+			nsdata->nsze = num_blocks;
+			/* ncap must be non-zero for active Namespace */
+			nsdata->ncap = num_blocks;
+			nsdata->nuse = num_blocks;
+			nsdata->nlbaf = 0;
+			nsdata->flbas.format = 0;
+			nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(dev->ns[i].bdev));
+			nsdata->noiob = spdk_bdev_get_optimal_io_boundary(dev->ns[i].bdev);
+			dev->ns[i].block_size = spdk_bdev_get_block_size(dev->ns[i].bdev);
+			dev->ns[i].capacity = num_blocks * dev->ns[i].block_size;
+		} else {
+			memset(nsdata, 0, sizeof(*nsdata));
+		}
+	}
+	return 0;
+}
+
+static int
+spdk_vhost_nvme_ctrlr_identify_update(struct spdk_vhost_nvme_dev *dev)
+{
+	struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
+	char sn[20];
+
+	/* Controller Capabilities */
+	dev->cap.bits.cqr = 1;
+	dev->cap.bits.to = 1;
+	dev->cap.bits.dstrd = 0;
+	dev->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
+	dev->cap.bits.mpsmin = 0;
+	dev->cap.bits.mpsmax = 0;
+	/* MQES is 0 based value */
+	dev->cap.bits.mqes = MAX_QUEUE_ENTRIES_SUPPORTED - 1;
+
+	/* Controller Configuration */
+	dev->cc.bits.en = 0;
+
+	/* Controller Status */
+	dev->csts.bits.rdy = 0;
+
+	/* Identify Controller */
+	spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
+	cdata->vid = 0x8086;
+	cdata->ssvid = 0x8086;
+	spdk_strcpy_pad(cdata->mn, "SPDK Virtual NVMe Controller", sizeof(cdata->mn), ' ');
+	snprintf(sn, sizeof(sn), "NVMe_%s", dev->vdev.name);
+	spdk_strcpy_pad(cdata->sn, sn, sizeof(cdata->sn), ' ');
+	cdata->ieee[0] = 0xe4;
+	cdata->ieee[1] = 0xd2;
+	cdata->ieee[2] = 0x5c;
+	cdata->ver.bits.mjr = 1;
+	cdata->ver.bits.mnr = 0;
+	cdata->mdts = 5; /* 128 KiB */
+	cdata->rab = 6;
+	cdata->sqes.min = 6;
+	cdata->sqes.max = 6;
+	cdata->cqes.min = 4;
+	cdata->cqes.max = 4;
+	cdata->oncs.dsm = 1;
+	/* Emulated NVMe controller */
+	cdata->oacs.doorbell_buffer_config = 1;
+
+	spdk_vhost_nvme_ns_identify_update(dev);
+
+	return 0;
+}
+
+int
+spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t num_io_queues)
+{
+	struct spdk_vhost_nvme_dev *dev = spdk_dma_zmalloc(sizeof(struct spdk_vhost_nvme_dev),
+					  SPDK_CACHE_LINE_SIZE, NULL);
+	int rc;
+
+	if (dev == NULL) {
+		return -ENOMEM;
+	}
+
+	if (num_io_queues < 1 || num_io_queues > MAX_IO_QUEUES) {
+		spdk_dma_free(dev);
+		return -EINVAL;
+	}
+
+	spdk_vhost_lock();
+	rc = spdk_vhost_dev_register(&dev->vdev, name, cpumask,
+				     &spdk_vhost_nvme_device_backend);
+
+	if (rc) {
+		spdk_dma_free(dev);
+		spdk_vhost_unlock();
+		return rc;
+	}
+
+	dev->num_io_queues = num_io_queues;
+	STAILQ_INIT(&dev->free_tasks);
+	TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, dev, tailq);
+
+	spdk_vhost_nvme_ctrlr_identify_update(dev);
+
+	SPDK_NOTICELOG("Controller %s: Constructed\n", name);
+	spdk_vhost_unlock();
+	return rc;
+}
+
+int
+spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+	struct spdk_vhost_nvme_dev *dev, *tmp;
+	struct spdk_vhost_nvme_ns *ns;
+	int rc;
+	uint32_t i;
+
+	if (nvme == NULL) {
+		return -EINVAL;
+	}
+
+	TAILQ_FOREACH_SAFE(dev, &g_nvme_ctrlrs, tailq, tmp) {
+		if (dev == nvme) {
+			TAILQ_REMOVE(&g_nvme_ctrlrs, dev, tailq);
+			for (i = 0; i < nvme->num_ns; i++) {
+				ns = &nvme->ns[i];
+				if (ns->active_ns) {
+					spdk_vhost_nvme_deactive_ns(ns);
+				}
+			}
+		}
+	}
+
+	rc = spdk_vhost_dev_unregister(vdev);
+	if (rc != 0) {
+		return rc;
+	}
+
+	spdk_dma_free(nvme);
+	return 0;
+}
+
+int
+spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, const char *bdev_name)
+{
+	struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
+	struct spdk_vhost_nvme_ns *ns;
+	struct spdk_bdev *bdev;
+	int rc = -1;
+
+	if (nvme == NULL) {
+		return -ENODEV;
+	}
+
+	if (nvme->num_ns == MAX_NAMESPACE) {
+		SPDK_ERRLOG("Can't support %d Namespaces\n", nvme->num_ns);
+		return -ENOSPC;
+	}
+
+	bdev = spdk_bdev_get_by_name(bdev_name);
+	if (!bdev) {
+		SPDK_ERRLOG("could not find bdev %s\n", bdev_name);
+		return -ENODEV;
+	}
+
+	ns = &nvme->ns[nvme->num_ns];
+	rc = spdk_bdev_open(bdev, true, bdev_remove_cb, ns, &nvme->ns[nvme->num_ns].bdev_desc);
+	if (rc != 0) {
+		SPDK_ERRLOG("Could not open bdev '%s', error=%d\n",
+			    bdev_name, rc);
+		return rc;
+	}
+
+	nvme->ns[nvme->num_ns].bdev = bdev;
+	nvme->ns[nvme->num_ns].active_ns = 1;
+	nvme->ns[nvme->num_ns].nsid = nvme->num_ns + 1;
+	nvme->num_ns++;
+
+	spdk_vhost_nvme_ns_identify_update(nvme);
+
+	return rc;
+}
+
+int
+spdk_vhost_nvme_controller_construct(void)
+{
+	struct spdk_conf_section *sp;
+	const char *name;
+	const char *bdev_name;
+	const char *cpumask;
+	int rc, i = 0;
+	struct spdk_vhost_dev *vdev;
+	uint32_t ctrlr_num, io_queues;
+
+	for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+		if (!spdk_conf_section_match_prefix(sp, "VhostNvme")) {
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VhostNvme%u", &ctrlr_num) != 1) {
+			SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+				    spdk_conf_section_get_name(sp));
+			return -1;
+		}
+
+		name = spdk_conf_section_get_val(sp, "Name");
+		if (name == NULL) {
+			SPDK_ERRLOG("VhostNvme%u: missing Name\n", ctrlr_num);
+			return -1;
+		}
+
+		cpumask = spdk_conf_section_get_val(sp, "Cpumask");
+		rc = spdk_conf_section_get_intval(sp, "NumberOfQueues");
+		if (rc > 0) {
+			io_queues = rc;
+		} else {
+			io_queues = 1;
+		}
+
+		rc = spdk_vhost_nvme_dev_construct(name, cpumask, io_queues);
+		if (rc < 0) {
+			SPDK_ERRLOG("VhostNvme%u: Construct failed\n", ctrlr_num);
+			return -1;
+		}
+
+		vdev = spdk_vhost_dev_find(name);
+		if (!vdev) {
+			return -1;
+		}
+
+		for (i = 0; spdk_conf_section_get_nval(sp, "Namespace", i) != NULL; i++) {
+			bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0);
+			if (!bdev_name) {
+				SPDK_ERRLOG("namespace configuration missing bdev name\n");
+				break;
+			}
+			rc = spdk_vhost_nvme_dev_add_ns(vdev, bdev_name);
+			if (rc < 0) {
+				SPDK_WARNLOG("VhostNvme%u: Construct Namespace with %s failed\n",
+					     ctrlr_num, bdev_name);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vhost_nvme", SPDK_LOG_VHOST_NVME)
diff --git a/src/spdk/lib/vhost/vhost_rpc.c b/src/spdk/lib/vhost/vhost_rpc.c
new file mode 100644
index 00000000..0e546c36
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost_rpc.c
@@ -0,0 +1,814 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk/env.h"
+
+#include "spdk/scsi.h"
+#include "spdk/vhost.h"
+#include "vhost_internal.h"
+#include "spdk/bdev.h"
+
+struct rpc_vhost_scsi_ctrlr {
+	char *ctrlr;
+	char *cpumask;
+};
+
+static void
+free_rpc_vhost_scsi_ctrlr(struct rpc_vhost_scsi_ctrlr *req)
+{
+	free(req->ctrlr);
+	free(req->cpumask);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_vhost_ctrlr[] = {
+	{"ctrlr", offsetof(struct rpc_vhost_scsi_ctrlr, ctrlr), spdk_json_decode_string },
+	{"cpumask", offsetof(struct rpc_vhost_scsi_ctrlr, cpumask), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_construct_vhost_scsi_controller(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_vhost_scsi_ctrlr req = {0};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_vhost_ctrlr,
+				    SPDK_COUNTOF(rpc_construct_vhost_ctrlr),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_scsi_dev_construct(req.ctrlr, req.cpumask);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	free_rpc_vhost_scsi_ctrlr(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	free_rpc_vhost_scsi_ctrlr(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_vhost_scsi_controller", spdk_rpc_construct_vhost_scsi_controller,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_add_vhost_scsi_ctrlr_lun {
+	char *ctrlr;
+	uint32_t scsi_target_num;
+	char *bdev_name;
+
+	struct spdk_jsonrpc_request *request;
+};
+
+static void
+free_rpc_add_vhost_scsi_ctrlr_lun(struct rpc_add_vhost_scsi_ctrlr_lun *req)
+{
+	free(req->ctrlr);
+	free(req->bdev_name);
+	free(req);
+}
+
+static const struct spdk_json_object_decoder rpc_vhost_add_lun[] = {
+	{"ctrlr", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, ctrlr), spdk_json_decode_string },
+	{"scsi_target_num", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, scsi_target_num), spdk_json_decode_uint32},
+	{"bdev_name", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, bdev_name), spdk_json_decode_string },
+};
+
+static int
+spdk_rpc_add_vhost_scsi_lun_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_add_vhost_scsi_ctrlr_lun *rpc = arg;
+	struct spdk_jsonrpc_request *request = rpc->request;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (vdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_scsi_dev_add_tgt(vdev, rpc->scsi_target_num, rpc->bdev_name);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	free_rpc_add_vhost_scsi_ctrlr_lun(rpc);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return -1;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return 0;
+
+invalid:
+	free_rpc_add_vhost_scsi_ctrlr_lun(rpc);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	return rc;
+}
+
+static void
+spdk_rpc_add_vhost_scsi_lun(struct spdk_jsonrpc_request *request,
+			    const struct spdk_json_val *params)
+{
+	struct rpc_add_vhost_scsi_ctrlr_lun *req;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->request = request;
+	if (spdk_json_decode_object(params, rpc_vhost_add_lun,
+				    SPDK_COUNTOF(rpc_vhost_add_lun),
+				    req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	if (req->ctrlr == NULL) {
+		SPDK_ERRLOG("No controller name\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_add_vhost_scsi_lun_cb, req);
+
+	return;
+
+invalid:
+	if (req) {
+		free_rpc_add_vhost_scsi_ctrlr_lun(req);
+	}
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("add_vhost_scsi_lun", spdk_rpc_add_vhost_scsi_lun, SPDK_RPC_RUNTIME)
+
+struct rpc_remove_vhost_scsi_ctrlr_target {
+	char *ctrlr;
+	uint32_t scsi_target_num;
+
+	struct spdk_jsonrpc_request *request;
+};
+
+static void
+free_rpc_remove_vhost_scsi_ctrlr_target(struct rpc_remove_vhost_scsi_ctrlr_target *req)
+{
+	free(req->ctrlr);
+	free(req);
+}
+
+static const struct spdk_json_object_decoder rpc_vhost_remove_target[] = {
+	{"ctrlr", offsetof(struct rpc_remove_vhost_scsi_ctrlr_target, ctrlr), spdk_json_decode_string },
+	{"scsi_target_num", offsetof(struct rpc_remove_vhost_scsi_ctrlr_target, scsi_target_num), spdk_json_decode_uint32},
+};
+
+static int
+spdk_rpc_remove_vhost_scsi_target_finish_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_remove_vhost_scsi_ctrlr_target *rpc = arg;
+	struct spdk_jsonrpc_request *request = rpc->request;
+	struct spdk_json_write_ctx *w;
+
+	free_rpc_remove_vhost_scsi_ctrlr_target(rpc);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return -1;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return 0;
+}
+
+static int
+spdk_rpc_remove_vhost_scsi_target_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_remove_vhost_scsi_ctrlr_target *rpc = arg;
+	struct spdk_jsonrpc_request *request = rpc->request;
+	int rc;
+
+	if (vdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_scsi_dev_remove_tgt(vdev, rpc->scsi_target_num,
+					    spdk_rpc_remove_vhost_scsi_target_finish_cb, rpc);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	return 0;
+
+invalid:
+	free_rpc_remove_vhost_scsi_ctrlr_target(rpc);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+	return rc;
+}
+
+static void
+spdk_rpc_remove_vhost_scsi_target(struct spdk_jsonrpc_request *request,
+				  const struct spdk_json_val *params)
+{
+	struct rpc_remove_vhost_scsi_ctrlr_target *req;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->request = request;
+	if (spdk_json_decode_object(params, rpc_vhost_remove_target,
+				    SPDK_COUNTOF(rpc_vhost_remove_target),
+				    req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_remove_vhost_scsi_target_cb, req);
+
+	return;
+
+invalid:
+	if (req) {
+		free_rpc_remove_vhost_scsi_ctrlr_target(req);
+	}
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+
+SPDK_RPC_REGISTER("remove_vhost_scsi_target", spdk_rpc_remove_vhost_scsi_target, SPDK_RPC_RUNTIME)
+
+struct rpc_vhost_blk_ctrlr {
+	char *ctrlr;
+	char *dev_name;
+	char *cpumask;
+	bool readonly;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_vhost_blk_ctrlr[] = {
+	{"ctrlr", offsetof(struct rpc_vhost_blk_ctrlr, ctrlr), spdk_json_decode_string },
+	{"dev_name", offsetof(struct rpc_vhost_blk_ctrlr, dev_name), spdk_json_decode_string },
+	{"cpumask", offsetof(struct rpc_vhost_blk_ctrlr, cpumask), spdk_json_decode_string, true},
+	{"readonly", offsetof(struct rpc_vhost_blk_ctrlr, readonly), spdk_json_decode_bool, true},
+};
+
+static void
+free_rpc_vhost_blk_ctrlr(struct rpc_vhost_blk_ctrlr *req)
+{
+	free(req->ctrlr);
+	free(req->dev_name);
+	free(req->cpumask);
+}
+
+static void
+spdk_rpc_construct_vhost_blk_controller(struct spdk_jsonrpc_request *request,
+					const struct spdk_json_val *params)
+{
+	struct rpc_vhost_blk_ctrlr req = {0};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_vhost_blk_ctrlr,
+				    SPDK_COUNTOF(rpc_construct_vhost_blk_ctrlr),
+				    &req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_blk_construct(req.ctrlr, req.cpumask, req.dev_name, req.readonly);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	free_rpc_vhost_blk_ctrlr(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	free_rpc_vhost_blk_ctrlr(&req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+
+}
+SPDK_RPC_REGISTER("construct_vhost_blk_controller", spdk_rpc_construct_vhost_blk_controller,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_remove_vhost_ctrlr {
+	char *ctrlr;
+
+	struct spdk_jsonrpc_request *request;
+};
+
+static const struct spdk_json_object_decoder rpc_remove_vhost_ctrlr[] = {
+	{"ctrlr", offsetof(struct rpc_remove_vhost_ctrlr, ctrlr), spdk_json_decode_string },
+};
+
+static void
+free_rpc_remove_vhost_ctrlr(struct rpc_remove_vhost_ctrlr *req)
+{
+	free(req->ctrlr);
+	free(req);
+}
+
+static int
+spdk_rpc_remove_vhost_controller_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_remove_vhost_ctrlr *ctx = arg;
+	struct spdk_jsonrpc_request *request = ctx->request;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (vdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_dev_remove(vdev);
+	if (rc < 0) {
+		goto invalid;
+	}
+
+	free_rpc_remove_vhost_ctrlr(ctx);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return 0;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return 0;
+
+invalid:
+	free_rpc_remove_vhost_ctrlr(ctx);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	return -1;
+}
+
+static void
+spdk_rpc_remove_vhost_controller(struct spdk_jsonrpc_request *request,
+				 const struct spdk_json_val *params)
+{
+	struct rpc_remove_vhost_ctrlr *req;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->request = request;
+	if (spdk_json_decode_object(params, rpc_remove_vhost_ctrlr,
+				    SPDK_COUNTOF(rpc_remove_vhost_ctrlr), req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_remove_vhost_controller_cb, req);
+	return;
+
+invalid:
+	if (req) {
+		free_rpc_remove_vhost_ctrlr(req);
+	}
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+
+}
+SPDK_RPC_REGISTER("remove_vhost_controller", spdk_rpc_remove_vhost_controller, SPDK_RPC_RUNTIME)
+
+struct rpc_get_vhost_ctrlrs {
+	char *name;
+	struct spdk_json_write_ctx *w;
+	struct spdk_jsonrpc_request *request;
+};
+
+static void
+_spdk_rpc_get_vhost_controller(struct spdk_json_write_ctx *w, struct spdk_vhost_dev *vdev)
+{
+	uint32_t delay_base_us, iops_threshold;
+
+	spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold);
+
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_named_string(w, "ctrlr", spdk_vhost_dev_get_name(vdev));
+	spdk_json_write_named_string_fmt(w, "cpumask", "0x%s", spdk_cpuset_fmt(vdev->cpumask));
+	spdk_json_write_named_uint32(w, "delay_base_us", delay_base_us);
+	spdk_json_write_named_uint32(w, "iops_threshold", iops_threshold);
+	spdk_json_write_named_string(w, "socket", vdev->path);
+
+	spdk_json_write_named_object_begin(w, "backend_specific");
+	spdk_vhost_dump_info_json(vdev, w);
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+}
+
+static int
+spdk_rpc_get_vhost_controllers_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_get_vhost_ctrlrs *ctx = arg;
+
+	assert(ctx->name == NULL);
+
+	if (vdev == NULL) {
+		spdk_json_write_array_end(ctx->w);
+		spdk_jsonrpc_end_result(ctx->request, ctx->w);
+		free(ctx);
+		return 0;
+	}
+
+	_spdk_rpc_get_vhost_controller(ctx->w, vdev);
+	return 0;
+}
+
+static int
+spdk_rpc_get_vhost_controller_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_get_vhost_ctrlrs *ctx = arg;
+
+	assert(ctx->name != NULL);
+
+	if (vdev == NULL) {
+		spdk_jsonrpc_send_error_response(ctx->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 spdk_strerror(ENODEV));
+		goto free_name_ctx;
+	}
+
+	ctx->w = spdk_jsonrpc_begin_result(ctx->request);
+	if (ctx->w == NULL) {
+		goto free_name_ctx;
+	}
+
+	spdk_json_write_array_begin(ctx->w);
+	_spdk_rpc_get_vhost_controller(ctx->w, vdev);
+	spdk_json_write_array_end(ctx->w);
+
+	spdk_jsonrpc_end_result(ctx->request, ctx->w);
+
+free_name_ctx:
+	free(ctx->name);
+	free(ctx);
+	return 0;
+}
+
+static const struct spdk_json_object_decoder rpc_get_vhost_ctrlrs_decoders[] = {
+	{"name", offsetof(struct rpc_get_vhost_ctrlrs, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_vhost_controllers(struct spdk_jsonrpc_request *request,
+			       const struct spdk_json_val *params)
+{
+	struct rpc_get_vhost_ctrlrs *ctx;
+	struct spdk_json_write_ctx *w;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+						 spdk_strerror(ENOMEM));
+		return;
+	}
+
+	if (params && spdk_json_decode_object(params, rpc_get_vhost_ctrlrs_decoders,
+					      SPDK_COUNTOF(rpc_get_vhost_ctrlrs_decoders), ctx)) {
+		SPDK_ERRLOG("spdk_json_decode_object failed\n");
+		free(ctx);
+		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+						 "Invalid parameters");
+		return;
+	}
+
+	if (ctx->name) {
+		ctx->request = request;
+		spdk_vhost_call_external_event(ctx->name, spdk_rpc_get_vhost_controller_cb, ctx);
+		return;
+	}
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		free(ctx);
+		return;
+	}
+
+	spdk_json_write_array_begin(w);
+
+	ctx->w = w;
+	ctx->request = request;
+	spdk_vhost_call_external_event_foreach(spdk_rpc_get_vhost_controllers_cb, ctx);
+}
+SPDK_RPC_REGISTER("get_vhost_controllers", spdk_rpc_get_vhost_controllers, SPDK_RPC_RUNTIME)
+
+
+struct rpc_vhost_ctrlr_coalescing {
+	char *ctrlr;
+	uint32_t delay_base_us;
+	uint32_t iops_threshold;
+	struct spdk_jsonrpc_request *request;
+};
+
+static const struct spdk_json_object_decoder rpc_set_vhost_ctrlr_coalescing[] = {
+	{"ctrlr", offsetof(struct rpc_vhost_ctrlr_coalescing, ctrlr), spdk_json_decode_string },
+	{"delay_base_us", offsetof(struct rpc_vhost_ctrlr_coalescing, delay_base_us), spdk_json_decode_uint32},
+	{"iops_threshold", offsetof(struct rpc_vhost_ctrlr_coalescing, iops_threshold), spdk_json_decode_uint32},
+};
+
+static void
+free_rpc_set_vhost_controllers_event_coalescing(struct rpc_vhost_ctrlr_coalescing *req)
+{
+	if (!req) {
+		return;
+	}
+
+	free(req->ctrlr);
+	free(req);
+}
+
+static int
+spdk_rpc_set_vhost_controller_coalescing_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_vhost_ctrlr_coalescing *req = arg;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (vdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_set_coalescing(vdev, req->delay_base_us, req->iops_threshold);
+	if (rc) {
+		goto invalid;
+	}
+
+	w = spdk_jsonrpc_begin_result(req->request);
+	if (w != NULL) {
+		spdk_json_write_bool(w, true);
+		spdk_jsonrpc_end_result(req->request, w);
+	}
+
+	free_rpc_set_vhost_controllers_event_coalescing(req);
+	return 0;
+
+invalid:
+	spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	free_rpc_set_vhost_controllers_event_coalescing(req);
+	return 0;
+}
+
+static void
+spdk_rpc_set_vhost_controller_coalescing(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_vhost_ctrlr_coalescing *req;
+	int rc;
+
+	req = calloc(1, sizeof(struct rpc_vhost_ctrlr_coalescing));
+	if (!req) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	if (spdk_json_decode_object(params, rpc_set_vhost_ctrlr_coalescing,
+				    SPDK_COUNTOF(rpc_set_vhost_ctrlr_coalescing), req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	req->request = request;
+	spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_set_vhost_controller_coalescing_cb, req);
+	return;
+
+invalid:
+	free_rpc_set_vhost_controllers_event_coalescing(req);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("set_vhost_controller_coalescing", spdk_rpc_set_vhost_controller_coalescing,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_vhost_nvme_ctrlr {
+	char *ctrlr;
+	uint32_t io_queues;
+	char *cpumask;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_vhost_nvme_ctrlr[] = {
+	{"ctrlr", offsetof(struct rpc_vhost_nvme_ctrlr, ctrlr), spdk_json_decode_string },
+	{"io_queues", offsetof(struct rpc_vhost_nvme_ctrlr, io_queues), spdk_json_decode_uint32},
+	{"cpumask", offsetof(struct rpc_vhost_nvme_ctrlr, cpumask), spdk_json_decode_string, true},
+};
+
+static void
+free_rpc_vhost_nvme_ctrlr(struct rpc_vhost_nvme_ctrlr *req)
+{
+	free(req->ctrlr);
+	free(req->cpumask);
+}
+
+static void
+spdk_rpc_construct_vhost_nvme_controller(struct spdk_jsonrpc_request *request,
+		const struct spdk_json_val *params)
+{
+	struct rpc_vhost_nvme_ctrlr req = {0};
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (spdk_json_decode_object(params, rpc_construct_vhost_nvme_ctrlr,
+				    SPDK_COUNTOF(rpc_construct_vhost_nvme_ctrlr),
+				    &req)) {
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_nvme_dev_construct(req.ctrlr, req.cpumask, req.io_queues);
+	if (rc < 0) {
+		free_rpc_vhost_nvme_ctrlr(&req);
+		goto invalid;
+	}
+
+	free_rpc_vhost_nvme_ctrlr(&req);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return;
+
+invalid:
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+
+}
+SPDK_RPC_REGISTER("construct_vhost_nvme_controller", spdk_rpc_construct_vhost_nvme_controller,
+		  SPDK_RPC_RUNTIME)
+
+struct rpc_add_vhost_nvme_ctrlr_ns {
+	char *ctrlr;
+	char *bdev_name;
+	struct spdk_jsonrpc_request *request;
+};
+
+static void
+free_rpc_add_vhost_nvme_ctrlr_ns(struct rpc_add_vhost_nvme_ctrlr_ns *req)
+{
+	free(req->ctrlr);
+	free(req->bdev_name);
+	free(req);
+}
+
+static const struct spdk_json_object_decoder rpc_vhost_nvme_add_ns[] = {
+	{"ctrlr", offsetof(struct rpc_add_vhost_nvme_ctrlr_ns, ctrlr), spdk_json_decode_string },
+	{"bdev_name", offsetof(struct rpc_add_vhost_nvme_ctrlr_ns, bdev_name), spdk_json_decode_string },
+};
+
+static int
+spdk_rpc_add_vhost_nvme_ns_cb(struct spdk_vhost_dev *vdev, void *arg)
+{
+	struct rpc_add_vhost_nvme_ctrlr_ns *rpc = arg;
+	struct spdk_jsonrpc_request *request = rpc->request;
+	struct spdk_json_write_ctx *w;
+	int rc;
+
+	if (vdev == NULL) {
+		rc = -ENODEV;
+		goto invalid;
+	}
+
+	rc = spdk_vhost_nvme_dev_add_ns(vdev, rpc->bdev_name);
+	if (rc < 0) {
+		goto invalid;
+	}
+	free_rpc_add_vhost_nvme_ctrlr_ns(rpc);
+
+	w = spdk_jsonrpc_begin_result(request);
+	if (w == NULL) {
+		return -1;
+	}
+
+	spdk_json_write_bool(w, true);
+	spdk_jsonrpc_end_result(request, w);
+	return 0;
+
+invalid:
+	free_rpc_add_vhost_nvme_ctrlr_ns(rpc);
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+	return rc;
+}
+
+static void
+spdk_rpc_add_vhost_nvme_ns(struct spdk_jsonrpc_request *request,
+			   const struct spdk_json_val *params)
+{
+	struct rpc_add_vhost_nvme_ctrlr_ns *req;
+	int rc;
+
+	req = calloc(1, sizeof(*req));
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto invalid;
+	}
+
+	req->request = request;
+	if (spdk_json_decode_object(params, rpc_vhost_nvme_add_ns,
+				    SPDK_COUNTOF(rpc_vhost_nvme_add_ns),
+				    req)) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_RPC, "spdk_json_decode_object failed\n");
+		rc = -EINVAL;
+		goto invalid;
+	}
+
+	spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_add_vhost_nvme_ns_cb, req);
+	return;
+
+invalid:
+	if (req) {
+		free_rpc_add_vhost_nvme_ctrlr_ns(req);
+	}
+	spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+					 spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("add_vhost_nvme_ns", spdk_rpc_add_vhost_nvme_ns, SPDK_RPC_RUNTIME)
+
+
+SPDK_LOG_REGISTER_COMPONENT("vhost_rpc", SPDK_LOG_VHOST_RPC)
diff --git a/src/spdk/lib/vhost/vhost_scsi.c b/src/spdk/lib/vhost/vhost_scsi.c
new file mode 100644
index 00000000..aefa4c45
--- /dev/null
+++ b/src/spdk/lib/vhost/vhost_scsi.c
@@ -0,0 +1,1271 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <linux/virtio_scsi.h>
+
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/scsi.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/conf.h"
+#include "spdk/event.h"
+#include "spdk/util.h"
+#include "spdk/likely.h"
+
+#include "spdk/vhost.h"
+#include "vhost_internal.h"
+
+/* Features supported by SPDK VHOST lib. */
+#define SPDK_VHOST_SCSI_FEATURES	(SPDK_VHOST_FEATURES | \
+					(1ULL << VIRTIO_SCSI_F_INOUT) | \
+					(1ULL << VIRTIO_SCSI_F_HOTPLUG) | \
+					(1ULL << VIRTIO_SCSI_F_CHANGE ) | \
+					(1ULL << VIRTIO_SCSI_F_T10_PI ))
+
+/* Features that are specified in VIRTIO SCSI but currently not supported:
+ * - Live migration not supported yet
+ * - T10 PI
+ */
+#define SPDK_VHOST_SCSI_DISABLED_FEATURES	(SPDK_VHOST_DISABLED_FEATURES | \
+						(1ULL << VIRTIO_SCSI_F_T10_PI ))
+
+#define MGMT_POLL_PERIOD_US (1000 * 5)
+
+#define VIRTIO_SCSI_CONTROLQ   0
+#define VIRTIO_SCSI_EVENTQ   1
+#define VIRTIO_SCSI_REQUESTQ   2
+
+struct spdk_scsi_dev_vhost_state {
+	bool removed;
+	spdk_vhost_event_fn remove_cb;
+	void *remove_ctx;
+};
+
+struct spdk_vhost_scsi_dev {
+	struct spdk_vhost_dev vdev;
+	struct spdk_scsi_dev *scsi_dev[SPDK_VHOST_SCSI_CTRLR_MAX_DEVS];
+	struct spdk_scsi_dev_vhost_state scsi_dev_state[SPDK_VHOST_SCSI_CTRLR_MAX_DEVS];
+
+	struct spdk_poller *requestq_poller;
+	struct spdk_poller *mgmt_poller;
+	struct spdk_vhost_dev_destroy_ctx destroy_ctx;
+} __rte_cache_aligned;
+
+struct spdk_vhost_scsi_task {
+	struct spdk_scsi_task	scsi;
+	struct iovec iovs[SPDK_VHOST_IOVS_MAX];
+
+	union {
+		struct virtio_scsi_cmd_resp *resp;
+		struct virtio_scsi_ctrl_tmf_resp *tmf_resp;
+	};
+
+	struct spdk_vhost_scsi_dev *svdev;
+	struct spdk_scsi_dev *scsi_dev;
+
+	/** Number of bytes that were written. */
+	uint32_t used_len;
+
+	int req_idx;
+
+	/* If set, the task is currently used for I/O processing. */
+	bool used;
+
+	struct spdk_vhost_virtqueue *vq;
+};
+
+static int spdk_vhost_scsi_start(struct spdk_vhost_dev *, void *);
+static int spdk_vhost_scsi_stop(struct spdk_vhost_dev *, void *);
+static void spdk_vhost_scsi_dump_info_json(struct spdk_vhost_dev *vdev,
+		struct spdk_json_write_ctx *w);
+static void spdk_vhost_scsi_write_config_json(struct spdk_vhost_dev *vdev,
+		struct spdk_json_write_ctx *w);
+static int spdk_vhost_scsi_dev_remove(struct spdk_vhost_dev *vdev);
+
+const struct spdk_vhost_dev_backend spdk_vhost_scsi_device_backend = {
+	.virtio_features = SPDK_VHOST_SCSI_FEATURES,
+	.disabled_features = SPDK_VHOST_SCSI_DISABLED_FEATURES,
+	.start_device =  spdk_vhost_scsi_start,
+	.stop_device = spdk_vhost_scsi_stop,
+	.dump_info_json = spdk_vhost_scsi_dump_info_json,
+	.write_config_json = spdk_vhost_scsi_write_config_json,
+	.remove_device = spdk_vhost_scsi_dev_remove,
+};
+
+static void
+spdk_vhost_scsi_task_put(struct spdk_vhost_scsi_task *task)
+{
+	spdk_scsi_task_put(&task->scsi);
+}
+
+static void
+spdk_vhost_scsi_task_free_cb(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_vhost_scsi_task *task = SPDK_CONTAINEROF(scsi_task, struct spdk_vhost_scsi_task, scsi);
+
+	assert(task->svdev->vdev.task_cnt > 0);
+	task->svdev->vdev.task_cnt--;
+	task->used = false;
+}
+
+static void
+process_removed_devs(struct spdk_vhost_scsi_dev *svdev)
+{
+	struct spdk_scsi_dev *dev;
+	struct spdk_scsi_dev_vhost_state *state;
+	int i;
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; ++i) {
+		dev = svdev->scsi_dev[i];
+		state = &svdev->scsi_dev_state[i];
+
+		if (dev && state->removed && !spdk_scsi_dev_has_pending_tasks(dev)) {
+			spdk_scsi_dev_free_io_channels(dev);
+			svdev->scsi_dev[i] = NULL;
+			spdk_scsi_dev_destruct(dev);
+			if (state->remove_cb) {
+				state->remove_cb(&svdev->vdev, state->remove_ctx);
+				state->remove_cb = NULL;
+			}
+			SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: hot-detached device 'Dev %u'.\n",
+				     svdev->vdev.name, i);
+		}
+	}
+}
+
+static void
+eventq_enqueue(struct spdk_vhost_scsi_dev *svdev, unsigned scsi_dev_num, uint32_t event,
+	       uint32_t reason)
+{
+	struct spdk_vhost_virtqueue *vq;
+	struct vring_desc *desc, *desc_table;
+	struct virtio_scsi_event *desc_ev;
+	uint32_t desc_table_size, req_size = 0;
+	uint16_t req;
+	int rc;
+
+	assert(scsi_dev_num < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS);
+	vq = &svdev->vdev.virtqueue[VIRTIO_SCSI_EVENTQ];
+
+	if (spdk_vhost_vq_avail_ring_get(vq, &req, 1) != 1) {
+		SPDK_ERRLOG("Controller %s: Failed to send virtio event (no avail ring entries?).\n",
+			    svdev->vdev.name);
+		return;
+	}
+
+	rc = spdk_vhost_vq_get_desc(&svdev->vdev, vq, req, &desc, &desc_table, &desc_table_size);
+	if (rc != 0 || desc->len < sizeof(*desc_ev)) {
+		SPDK_ERRLOG("Controller %s: Invalid eventq descriptor at index %"PRIu16".\n",
+			    svdev->vdev.name, req);
+		goto out;
+	}
+
+	desc_ev = spdk_vhost_gpa_to_vva(&svdev->vdev, desc->addr, sizeof(*desc_ev));
+	if (desc_ev == NULL) {
+		SPDK_ERRLOG("Controller %s: Eventq descriptor at index %"PRIu16" points to unmapped guest memory address %p.\n",
+			    svdev->vdev.name, req, (void *)(uintptr_t)desc->addr);
+		goto out;
+	}
+
+	desc_ev->event = event;
+	desc_ev->lun[0] = 1;
+	desc_ev->lun[1] = scsi_dev_num;
+	/* virtio LUN id 0 can refer either to the entire device
+	 * or actual LUN 0 (the only supported by vhost for now)
+	 */
+	desc_ev->lun[2] = 0 >> 8;
+	desc_ev->lun[3] = 0 & 0xFF;
+	/* virtio doesn't specify any strict format for LUN id (bytes 2 and 3)
+	 * current implementation relies on linux kernel sources
+	 */
+	memset(&desc_ev->lun[4], 0, 4);
+	desc_ev->reason = reason;
+	req_size = sizeof(*desc_ev);
+
+out:
+	spdk_vhost_vq_used_ring_enqueue(&svdev->vdev, vq, req, req_size);
+}
+
+static void
+submit_completion(struct spdk_vhost_scsi_task *task)
+{
+	spdk_vhost_vq_used_ring_enqueue(&task->svdev->vdev, task->vq, task->req_idx,
+					task->used_len);
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "Finished task (%p) req_idx=%d\n", task, task->req_idx);
+
+	spdk_vhost_scsi_task_put(task);
+}
+
+static void
+spdk_vhost_scsi_task_mgmt_cpl(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_vhost_scsi_task *task = SPDK_CONTAINEROF(scsi_task, struct spdk_vhost_scsi_task, scsi);
+
+	submit_completion(task);
+}
+
+static void
+spdk_vhost_scsi_task_cpl(struct spdk_scsi_task *scsi_task)
+{
+	struct spdk_vhost_scsi_task *task = SPDK_CONTAINEROF(scsi_task, struct spdk_vhost_scsi_task, scsi);
+
+	/* The SCSI task has completed.  Do final processing and then post
+	   notification to the virtqueue's "used" ring.
+	 */
+	task->resp->status = task->scsi.status;
+
+	if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) {
+		memcpy(task->resp->sense, task->scsi.sense_data, task->scsi.sense_data_len);
+		task->resp->sense_len = task->scsi.sense_data_len;
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "Task (%p) req_idx=%d failed - status=%u\n", task, task->req_idx,
+			      task->scsi.status);
+	}
+	assert(task->scsi.transfer_len == task->scsi.length);
+	task->resp->resid = task->scsi.length - task->scsi.data_transferred;
+
+	submit_completion(task);
+}
+
+static void
+task_submit(struct spdk_vhost_scsi_task *task)
+{
+	task->resp->response = VIRTIO_SCSI_S_OK;
+	spdk_scsi_dev_queue_task(task->scsi_dev, &task->scsi);
+}
+
+static void
+mgmt_task_submit(struct spdk_vhost_scsi_task *task, enum spdk_scsi_task_func func)
+{
+	task->tmf_resp->response = VIRTIO_SCSI_S_OK;
+	spdk_scsi_dev_queue_mgmt_task(task->scsi_dev, &task->scsi, func);
+}
+
+static void
+invalid_request(struct spdk_vhost_scsi_task *task)
+{
+	spdk_vhost_vq_used_ring_enqueue(&task->svdev->vdev, task->vq, task->req_idx,
+					task->used_len);
+	spdk_vhost_scsi_task_put(task);
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "Invalid request (status=%" PRIu8")\n",
+		      task->resp ? task->resp->response : -1);
+}
+
+static int
+spdk_vhost_scsi_task_init_target(struct spdk_vhost_scsi_task *task, const __u8 *lun)
+{
+	struct spdk_scsi_dev *dev;
+	uint16_t lun_id = (((uint16_t)lun[2] << 8) | lun[3]) & 0x3FFF;
+
+	SPDK_TRACEDUMP(SPDK_LOG_VHOST_SCSI_QUEUE, "LUN", lun, 8);
+
+	/* First byte must be 1 and second is target */
+	if (lun[0] != 1 || lun[1] >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) {
+		return -1;
+	}
+
+	dev = task->svdev->scsi_dev[lun[1]];
+	task->scsi_dev = dev;
+	if (dev == NULL || task->svdev->scsi_dev_state[lun[1]].removed) {
+		/* If dev has been hotdetached, return 0 to allow sending
+		 * additional hotremove event via sense codes.
+		 */
+		return task->svdev->scsi_dev_state[lun[1]].removed ? 0 : -1;
+	}
+
+	task->scsi.target_port = spdk_scsi_dev_find_port_by_id(task->scsi_dev, 0);
+	task->scsi.lun = spdk_scsi_dev_get_lun(dev, lun_id);
+	return 0;
+}
+
+static void
+process_ctrl_request(struct spdk_vhost_scsi_task *task)
+{
+	struct spdk_vhost_dev *vdev = &task->svdev->vdev;
+	struct vring_desc *desc, *desc_table;
+	struct virtio_scsi_ctrl_tmf_req *ctrl_req;
+	struct virtio_scsi_ctrl_an_resp *an_resp;
+	uint32_t desc_table_size, used_len = 0;
+	int rc;
+
+	spdk_scsi_task_construct(&task->scsi, spdk_vhost_scsi_task_mgmt_cpl, spdk_vhost_scsi_task_free_cb);
+	rc = spdk_vhost_vq_get_desc(vdev, task->vq, task->req_idx, &desc, &desc_table, &desc_table_size);
+	if (spdk_unlikely(rc != 0)) {
+		SPDK_ERRLOG("%s: Invalid controlq descriptor at index %d.\n",
+			    vdev->name, task->req_idx);
+		goto out;
+	}
+
+	ctrl_req = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(*ctrl_req));
+	if (ctrl_req == NULL) {
+		SPDK_ERRLOG("%s: Invalid task management request at index %d.\n",
+			    vdev->name, task->req_idx);
+		goto out;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_QUEUE,
+		      "Processing controlq descriptor: desc %d/%p, desc_addr %p, len %d, flags %d, last_used_idx %d; kickfd %d; size %d\n",
+		      task->req_idx, desc, (void *)desc->addr, desc->len, desc->flags, task->vq->vring.last_used_idx,
+		      task->vq->vring.kickfd, task->vq->vring.size);
+	SPDK_TRACEDUMP(SPDK_LOG_VHOST_SCSI_QUEUE, "Request descriptor", (uint8_t *)ctrl_req,
+		       desc->len);
+
+	spdk_vhost_scsi_task_init_target(task, ctrl_req->lun);
+
+	spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
+	if (spdk_unlikely(desc == NULL)) {
+		SPDK_ERRLOG("%s: No response descriptor for controlq request %d.\n",
+			    vdev->name, task->req_idx);
+		goto out;
+	}
+
+	/* Process the TMF request */
+	switch (ctrl_req->type) {
+	case VIRTIO_SCSI_T_TMF:
+		task->tmf_resp = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(*task->tmf_resp));
+		if (spdk_unlikely(desc->len < sizeof(struct virtio_scsi_ctrl_tmf_resp) || task->tmf_resp == NULL)) {
+			SPDK_ERRLOG("%s: TMF response descriptor at index %d points to invalid guest memory region\n",
+				    vdev->name, task->req_idx);
+			goto out;
+		}
+
+		/* Check if we are processing a valid request */
+		if (task->scsi_dev == NULL) {
+			task->tmf_resp->response = VIRTIO_SCSI_S_BAD_TARGET;
+			break;
+		}
+
+		switch (ctrl_req->subtype) {
+		case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+			/* Handle LUN reset */
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_QUEUE, "LUN reset\n");
+
+			mgmt_task_submit(task, SPDK_SCSI_TASK_FUNC_LUN_RESET);
+			return;
+		default:
+			task->tmf_resp->response = VIRTIO_SCSI_S_ABORTED;
+			/* Unsupported command */
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_QUEUE, "Unsupported TMF command %x\n", ctrl_req->subtype);
+			break;
+		}
+		break;
+	case VIRTIO_SCSI_T_AN_QUERY:
+	case VIRTIO_SCSI_T_AN_SUBSCRIBE: {
+		an_resp = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(*an_resp));
+		if (spdk_unlikely(desc->len < sizeof(struct virtio_scsi_ctrl_an_resp) || an_resp == NULL)) {
+			SPDK_WARNLOG("%s: Asynchronous response descriptor points to invalid guest memory region\n",
+				     vdev->name);
+			goto out;
+		}
+
+		an_resp->response = VIRTIO_SCSI_S_ABORTED;
+		break;
+	}
+	default:
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_QUEUE, "Unsupported control command %x\n", ctrl_req->type);
+		break;
+	}
+
+	used_len = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+out:
+	spdk_vhost_vq_used_ring_enqueue(vdev, task->vq, task->req_idx, used_len);
+	spdk_vhost_scsi_task_put(task);
+}
+
+/*
+ * Process task's descriptor chain and setup data related fields.
+ * Return
+ *   -1 if request is invalid and must be aborted,
+ *    0 if all data are set.
+ */
+static int
+task_data_setup(struct spdk_vhost_scsi_task *task,
+		struct virtio_scsi_cmd_req **req)
+{
+	struct spdk_vhost_dev *vdev = &task->svdev->vdev;
+	struct vring_desc *desc, *desc_table;
+	struct iovec *iovs = task->iovs;
+	uint16_t iovcnt = 0;
+	uint32_t desc_table_len, len = 0;
+	int rc;
+
+	spdk_scsi_task_construct(&task->scsi, spdk_vhost_scsi_task_cpl, spdk_vhost_scsi_task_free_cb);
+
+	rc = spdk_vhost_vq_get_desc(vdev, task->vq, task->req_idx, &desc, &desc_table, &desc_table_len);
+	/* First descriptor must be readable */
+	if (spdk_unlikely(rc != 0  || spdk_vhost_vring_desc_is_wr(desc) ||
+			  desc->len < sizeof(struct virtio_scsi_cmd_req))) {
+		SPDK_WARNLOG("%s: invalid first (request) descriptor at index %"PRIu16".\n",
+			     vdev->name, task->req_idx);
+		goto invalid_task;
+	}
+
+	*req = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(**req));
+	if (spdk_unlikely(*req == NULL)) {
+		SPDK_WARNLOG("%s: Request descriptor at index %d points to invalid guest memory region\n",
+			     vdev->name, task->req_idx);
+		goto invalid_task;
+	}
+
+	/* Each request must have at least 2 descriptors (e.g. request and response) */
+	spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_len);
+	if (desc == NULL) {
+		SPDK_WARNLOG("%s: Descriptor chain at index %d contains neither payload nor response buffer.\n",
+			     vdev->name, task->req_idx);
+		goto invalid_task;
+	}
+	task->scsi.dxfer_dir = spdk_vhost_vring_desc_is_wr(desc) ? SPDK_SCSI_DIR_FROM_DEV :
+			       SPDK_SCSI_DIR_TO_DEV;
+	task->scsi.iovs = iovs;
+
+	if (task->scsi.dxfer_dir == SPDK_SCSI_DIR_FROM_DEV) {
+		/*
+		 * FROM_DEV (READ): [RD_req][WR_resp][WR_buf0]...[WR_bufN]
+		 */
+		task->resp = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(*task->resp));
+		if (spdk_unlikely(desc->len < sizeof(struct virtio_scsi_cmd_resp) || task->resp == NULL)) {
+			SPDK_WARNLOG("%s: Response descriptor at index %d points to invalid guest memory region\n",
+				     vdev->name, task->req_idx);
+			goto invalid_task;
+		}
+		rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_len);
+		if (spdk_unlikely(rc != 0)) {
+			SPDK_WARNLOG("%s: invalid descriptor chain at request index %d (descriptor id overflow?).\n",
+				     vdev->name, task->req_idx);
+			goto invalid_task;
+		}
+
+		if (desc == NULL) {
+			/*
+			 * TEST UNIT READY command and some others might not contain any payload and this is not an error.
+			 */
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_DATA,
+				      "No payload descriptors for FROM DEV command req_idx=%"PRIu16".\n", task->req_idx);
+			SPDK_TRACEDUMP(SPDK_LOG_VHOST_SCSI_DATA, "CDB=", (*req)->cdb, VIRTIO_SCSI_CDB_SIZE);
+			task->used_len = sizeof(struct virtio_scsi_cmd_resp);
+			task->scsi.iovcnt = 1;
+			task->scsi.iovs[0].iov_len = 0;
+			task->scsi.length = 0;
+			task->scsi.transfer_len = 0;
+			return 0;
+		}
+
+		/* All remaining descriptors are data. */
+		while (desc) {
+			if (spdk_unlikely(!spdk_vhost_vring_desc_is_wr(desc))) {
+				SPDK_WARNLOG("FROM DEV cmd: descriptor nr %" PRIu16" in payload chain is read only.\n", iovcnt);
+				goto invalid_task;
+			}
+
+			if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) {
+				goto invalid_task;
+			}
+			len += desc->len;
+
+			rc = spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_len);
+			if (spdk_unlikely(rc != 0)) {
+				SPDK_WARNLOG("%s: invalid payload in descriptor chain starting at index %d.\n",
+					     vdev->name, task->req_idx);
+				goto invalid_task;
+			}
+		}
+
+		task->used_len = sizeof(struct virtio_scsi_cmd_resp) + len;
+	} else {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_DATA, "TO DEV");
+		/*
+		 * TO_DEV (WRITE):[RD_req][RD_buf0]...[RD_bufN][WR_resp]
+		 * No need to check descriptor WR flag as this is done while setting scsi.dxfer_dir.
+		 */
+
+		/* Process descriptors up to response. */
+		while (!spdk_vhost_vring_desc_is_wr(desc)) {
+			if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) {
+				goto invalid_task;
+			}
+			len += desc->len;
+
+			spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_len);
+			if (spdk_unlikely(desc == NULL)) {
+				SPDK_WARNLOG("TO_DEV cmd: no response descriptor.\n");
+				goto invalid_task;
+			}
+		}
+
+		task->resp = spdk_vhost_gpa_to_vva(vdev, desc->addr, sizeof(*task->resp));
+		if (spdk_unlikely(desc->len < sizeof(struct virtio_scsi_cmd_resp) || task->resp == NULL)) {
+			SPDK_WARNLOG("%s: Response descriptor at index %d points to invalid guest memory region\n",
+				     vdev->name, task->req_idx);
+			goto invalid_task;
+		}
+
+		task->used_len = sizeof(struct virtio_scsi_cmd_resp);
+	}
+
+	task->scsi.iovcnt = iovcnt;
+	task->scsi.length = len;
+	task->scsi.transfer_len = len;
+	return 0;
+
+invalid_task:
+	SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI_DATA, "%s: Invalid task at index %"PRIu16".\n",
+		      vdev->name, task->req_idx);
+	return -1;
+}
+
+static int
+process_request(struct spdk_vhost_scsi_task *task)
+{
+	struct virtio_scsi_cmd_req *req;
+	int result;
+
+	result = task_data_setup(task, &req);
+	if (result) {
+		return result;
+	}
+
+	result = spdk_vhost_scsi_task_init_target(task, req->lun);
+	if (spdk_unlikely(result != 0)) {
+		task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
+		return -1;
+	}
+
+	task->scsi.cdb = req->cdb;
+	SPDK_TRACEDUMP(SPDK_LOG_VHOST_SCSI_DATA, "request CDB", req->cdb, VIRTIO_SCSI_CDB_SIZE);
+
+	if (spdk_unlikely(task->scsi.lun == NULL)) {
+		spdk_scsi_task_process_null_lun(&task->scsi);
+		task->resp->response = VIRTIO_SCSI_S_OK;
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+process_controlq(struct spdk_vhost_scsi_dev *svdev, struct spdk_vhost_virtqueue *vq)
+{
+	struct spdk_vhost_scsi_task *task;
+	uint16_t reqs[32];
+	uint16_t reqs_cnt, i;
+
+	reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
+	for (i = 0; i < reqs_cnt; i++) {
+		if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
+			SPDK_ERRLOG("%s: invalid entry in avail ring. Buffer '%"PRIu16"' exceeds virtqueue size (%"PRIu16")\n",
+				    svdev->vdev.name, reqs[i], vq->vring.size);
+			spdk_vhost_vq_used_ring_enqueue(&svdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		task = &((struct spdk_vhost_scsi_task *)vq->tasks)[reqs[i]];
+		if (spdk_unlikely(task->used)) {
+			SPDK_ERRLOG("%s: invalid entry in avail ring. Buffer '%"PRIu16"' is still in use!\n",
+				    svdev->vdev.name, reqs[i]);
+			spdk_vhost_vq_used_ring_enqueue(&svdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		svdev->vdev.task_cnt++;
+		memset(&task->scsi, 0, sizeof(task->scsi));
+		task->tmf_resp = NULL;
+		task->used = true;
+		process_ctrl_request(task);
+	}
+}
+
+static void
+process_requestq(struct spdk_vhost_scsi_dev *svdev, struct spdk_vhost_virtqueue *vq)
+{
+	struct spdk_vhost_scsi_task *task;
+	uint16_t reqs[32];
+	uint16_t reqs_cnt, i;
+	int result;
+
+	reqs_cnt = spdk_vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
+	assert(reqs_cnt <= 32);
+
+	for (i = 0; i < reqs_cnt; i++) {
+		SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "====== Starting processing request idx %"PRIu16"======\n",
+			      reqs[i]);
+
+		if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
+			SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
+				    svdev->vdev.name, reqs[i], vq->vring.size);
+			spdk_vhost_vq_used_ring_enqueue(&svdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		task = &((struct spdk_vhost_scsi_task *)vq->tasks)[reqs[i]];
+		if (spdk_unlikely(task->used)) {
+			SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
+				    svdev->vdev.name, reqs[i]);
+			spdk_vhost_vq_used_ring_enqueue(&svdev->vdev, vq, reqs[i], 0);
+			continue;
+		}
+
+		svdev->vdev.task_cnt++;
+		memset(&task->scsi, 0, sizeof(task->scsi));
+		task->resp = NULL;
+		task->used = true;
+		task->used_len = 0;
+		result = process_request(task);
+		if (likely(result == 0)) {
+			task_submit(task);
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "====== Task %p req_idx %d submitted ======\n", task,
+				      task->req_idx);
+		} else if (result > 0) {
+			spdk_vhost_scsi_task_cpl(&task->scsi);
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "====== Task %p req_idx %d finished early ======\n", task,
+				      task->req_idx);
+		} else {
+			invalid_request(task);
+			SPDK_DEBUGLOG(SPDK_LOG_VHOST_SCSI, "====== Task %p req_idx %d failed ======\n", task,
+				      task->req_idx);
+		}
+	}
+}
+
+static int
+vdev_mgmt_worker(void *arg)
+{
+	struct spdk_vhost_scsi_dev *svdev = arg;
+
+	process_removed_devs(svdev);
+	spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_EVENTQ]);
+
+	process_controlq(svdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_CONTROLQ]);
+	spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_CONTROLQ]);
+
+	return -1;
+}
+
+static int
+vdev_worker(void *arg)
+{
+	struct spdk_vhost_scsi_dev *svdev = arg;
+	uint32_t q_idx;
+
+	for (q_idx = VIRTIO_SCSI_REQUESTQ; q_idx < svdev->vdev.max_queues; q_idx++) {
+		process_requestq(svdev, &svdev->vdev.virtqueue[q_idx]);
+	}
+
+	spdk_vhost_dev_used_signal(&svdev->vdev);
+
+	return -1;
+}
+
+static struct spdk_vhost_scsi_dev *
+to_scsi_dev(struct spdk_vhost_dev *ctrlr)
+{
+	if (ctrlr == NULL) {
+		return NULL;
+	}
+
+	if (ctrlr->backend != &spdk_vhost_scsi_device_backend) {
+		SPDK_ERRLOG("%s: not a vhost-scsi device.\n", ctrlr->name);
+		return NULL;
+	}
+
+	return SPDK_CONTAINEROF(ctrlr, struct spdk_vhost_scsi_dev, vdev);
+}
+
+int
+spdk_vhost_scsi_dev_construct(const char *name, const char *cpumask)
+{
+	struct spdk_vhost_scsi_dev *svdev = spdk_dma_zmalloc(sizeof(struct spdk_vhost_scsi_dev),
+					    SPDK_CACHE_LINE_SIZE, NULL);
+	int rc;
+
+	if (svdev == NULL) {
+		return -ENOMEM;
+	}
+
+	spdk_vhost_lock();
+	rc = spdk_vhost_dev_register(&svdev->vdev, name, cpumask,
+				     &spdk_vhost_scsi_device_backend);
+
+	if (rc) {
+		spdk_dma_free(svdev);
+	}
+
+	spdk_vhost_unlock();
+	return rc;
+}
+
+static int
+spdk_vhost_scsi_dev_remove(struct spdk_vhost_dev *vdev)
+{
+	struct spdk_vhost_scsi_dev *svdev = to_scsi_dev(vdev);
+	int rc, i;
+
+	if (svdev == NULL) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; ++i) {
+		if (svdev->scsi_dev[i]) {
+			if (vdev->registered) {
+				SPDK_ERRLOG("Trying to remove non-empty controller: %s.\n", vdev->name);
+				return -EBUSY;
+			}
+
+			rc = spdk_vhost_scsi_dev_remove_tgt(vdev, i, NULL, NULL);
+			if (rc != 0) {
+				SPDK_ERRLOG("%s: failed to force-remove target %d\n", vdev->name, i);
+				return rc;
+			}
+		}
+	}
+
+	rc = spdk_vhost_dev_unregister(vdev);
+	if (rc != 0) {
+		return rc;
+	}
+
+	spdk_dma_free(svdev);
+	return 0;
+}
+
+struct spdk_scsi_dev *
+spdk_vhost_scsi_dev_get_tgt(struct spdk_vhost_dev *vdev, uint8_t num)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+
+	assert(num < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS);
+	svdev = to_scsi_dev(vdev);
+
+	return svdev ? svdev->scsi_dev[num] : NULL;
+}
+
+static void
+spdk_vhost_scsi_lun_hotremove(const struct spdk_scsi_lun *lun, void *arg)
+{
+	struct spdk_vhost_scsi_dev *svdev = arg;
+	const struct spdk_scsi_dev *scsi_dev;
+	unsigned scsi_dev_num;
+
+	assert(lun != NULL);
+	assert(svdev != NULL);
+	if (svdev->vdev.lcore != -1 &&
+	    !spdk_vhost_dev_has_feature(&svdev->vdev, VIRTIO_SCSI_F_HOTPLUG)) {
+		SPDK_WARNLOG("%s: hotremove is not enabled for this controller.\n", svdev->vdev.name);
+		return;
+	}
+
+	scsi_dev = spdk_scsi_lun_get_dev(lun);
+	for (scsi_dev_num = 0; scsi_dev_num < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; scsi_dev_num++) {
+		if (svdev->scsi_dev[scsi_dev_num] == scsi_dev) {
+			break;
+		}
+	}
+
+	if (scsi_dev_num == SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) {
+		/* The entire device has been already removed. */
+		return;
+	}
+
+	/* remove entire device */
+	spdk_vhost_scsi_dev_remove_tgt(&svdev->vdev, scsi_dev_num, NULL, NULL);
+}
+
+int
+spdk_vhost_scsi_dev_add_tgt(struct spdk_vhost_dev *vdev, unsigned scsi_tgt_num,
+			    const char *bdev_name)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+	char target_name[SPDK_SCSI_DEV_MAX_NAME];
+	int lun_id_list[1];
+	const char *bdev_names_list[1];
+
+	svdev = to_scsi_dev(vdev);
+	if (svdev == NULL) {
+		return -EINVAL;
+	}
+
+	if (scsi_tgt_num >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) {
+		SPDK_ERRLOG("Controller %d target number too big (max %d)\n", scsi_tgt_num,
+			    SPDK_VHOST_SCSI_CTRLR_MAX_DEVS);
+		return -EINVAL;
+	}
+
+	if (bdev_name == NULL) {
+		SPDK_ERRLOG("No lun name specified\n");
+		return -EINVAL;
+	}
+
+	if (svdev->scsi_dev[scsi_tgt_num] != NULL) {
+		SPDK_ERRLOG("Controller %s target %u already occupied\n", vdev->name, scsi_tgt_num);
+		return -EEXIST;
+	}
+
+	/*
+	 * At this stage only one LUN per target
+	 */
+	snprintf(target_name, sizeof(target_name), "Target %u", scsi_tgt_num);
+	lun_id_list[0] = 0;
+	bdev_names_list[0] = (char *)bdev_name;
+
+	svdev->scsi_dev_state[scsi_tgt_num].removed = false;
+	svdev->scsi_dev[scsi_tgt_num] = spdk_scsi_dev_construct(target_name, bdev_names_list, lun_id_list,
+					1,
+					SPDK_SPC_PROTOCOL_IDENTIFIER_SAS, spdk_vhost_scsi_lun_hotremove, svdev);
+
+	if (svdev->scsi_dev[scsi_tgt_num] == NULL) {
+		SPDK_ERRLOG("Couldn't create spdk SCSI target '%s' using bdev '%s' in controller: %s\n",
+			    target_name, bdev_name, vdev->name);
+		return -EINVAL;
+	}
+	spdk_scsi_dev_add_port(svdev->scsi_dev[scsi_tgt_num], 0, "vhost");
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: defined target '%s' using bdev '%s'\n",
+		     vdev->name, target_name, bdev_name);
+
+	if (vdev->lcore == -1) {
+		/* All done. */
+		return 0;
+	}
+
+	spdk_scsi_dev_allocate_io_channels(svdev->scsi_dev[scsi_tgt_num]);
+
+	if (spdk_vhost_dev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
+		eventq_enqueue(svdev, scsi_tgt_num, VIRTIO_SCSI_T_TRANSPORT_RESET,
+			       VIRTIO_SCSI_EVT_RESET_RESCAN);
+	} else {
+		SPDK_NOTICELOG("Device %s does not support hotplug. "
+			       "Please restart the driver or perform a rescan.\n",
+			       vdev->name);
+	}
+
+	return 0;
+}
+
+int
+spdk_vhost_scsi_dev_remove_tgt(struct spdk_vhost_dev *vdev, unsigned scsi_tgt_num,
+			       spdk_vhost_event_fn cb_fn, void *cb_arg)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+	struct spdk_scsi_dev *scsi_dev;
+	struct spdk_scsi_dev_vhost_state *scsi_dev_state;
+	int rc = 0;
+
+	if (scsi_tgt_num >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) {
+		SPDK_ERRLOG("%s: invalid target number %d\n", vdev->name, scsi_tgt_num);
+		return -EINVAL;
+	}
+
+	svdev = to_scsi_dev(vdev);
+	if (svdev == NULL) {
+		return -ENODEV;
+	}
+
+	scsi_dev = svdev->scsi_dev[scsi_tgt_num];
+	if (scsi_dev == NULL) {
+		SPDK_ERRLOG("Controller %s target %u is not occupied\n", vdev->name, scsi_tgt_num);
+		return -ENODEV;
+	}
+
+	if (svdev->vdev.lcore == -1) {
+		/* controller is not in use, remove dev and exit */
+		svdev->scsi_dev[scsi_tgt_num] = NULL;
+		spdk_scsi_dev_destruct(scsi_dev);
+		if (cb_fn) {
+			rc = cb_fn(vdev, cb_arg);
+		}
+		SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: removed target 'Target %u'\n",
+			     vdev->name, scsi_tgt_num);
+		return rc;
+	}
+
+	if (!spdk_vhost_dev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
+		SPDK_WARNLOG("%s: 'Target %u' is in use and hot-detach is not enabled for this controller.\n",
+			     svdev->vdev.name, scsi_tgt_num);
+		return -ENOTSUP;
+	}
+
+	scsi_dev_state = &svdev->scsi_dev_state[scsi_tgt_num];
+	if (scsi_dev_state->removed) {
+		SPDK_WARNLOG("%s: 'Target %u' has been already marked to hotremove.\n", svdev->vdev.name,
+			     scsi_tgt_num);
+		return -EBUSY;
+	}
+
+	scsi_dev_state->remove_cb = cb_fn;
+	scsi_dev_state->remove_ctx = cb_arg;
+	scsi_dev_state->removed = true;
+	eventq_enqueue(svdev, scsi_tgt_num, VIRTIO_SCSI_T_TRANSPORT_RESET, VIRTIO_SCSI_EVT_RESET_REMOVED);
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "%s: queued 'Target %u' for hot-detach.\n", vdev->name, scsi_tgt_num);
+	return 0;
+}
+
+int
+spdk_vhost_scsi_controller_construct(void)
+{
+	struct spdk_conf_section *sp = spdk_conf_first_section(NULL);
+	struct spdk_vhost_dev *vdev;
+	int i, dev_num;
+	unsigned ctrlr_num = 0;
+	char *bdev_name, *tgt_num_str;
+	char *cpumask;
+	char *name;
+	char *tgt = NULL;
+
+	while (sp != NULL) {
+		if (!spdk_conf_section_match_prefix(sp, "VhostScsi")) {
+			sp = spdk_conf_next_section(sp);
+			continue;
+		}
+
+		if (sscanf(spdk_conf_section_get_name(sp), "VhostScsi%u", &ctrlr_num) != 1) {
+			SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+				    spdk_conf_section_get_name(sp));
+			return -1;
+		}
+
+		name =  spdk_conf_section_get_val(sp, "Name");
+		cpumask = spdk_conf_section_get_val(sp, "Cpumask");
+
+		if (spdk_vhost_scsi_dev_construct(name, cpumask) < 0) {
+			return -1;
+		}
+
+		vdev = spdk_vhost_dev_find(name);
+		assert(vdev);
+
+		for (i = 0; ; i++) {
+
+			tgt = spdk_conf_section_get_nval(sp, "Target", i);
+			if (tgt == NULL) {
+				break;
+			}
+
+			tgt_num_str = spdk_conf_section_get_nmval(sp, "Target", i, 0);
+			if (tgt_num_str == NULL) {
+				SPDK_ERRLOG("%s: Invalid or missing target number\n", name);
+				return -1;
+			}
+
+			dev_num = (int)strtol(tgt_num_str, NULL, 10);
+			bdev_name = spdk_conf_section_get_nmval(sp, "Target", i, 1);
+			if (bdev_name == NULL) {
+				SPDK_ERRLOG("%s: Invalid or missing bdev name for target %d\n", name, dev_num);
+				return -1;
+			} else if (spdk_conf_section_get_nmval(sp, "Target", i, 2)) {
+				SPDK_ERRLOG("%s: Only one LUN per vhost SCSI device supported\n", name);
+				return -1;
+			}
+
+			if (spdk_vhost_scsi_dev_add_tgt(vdev, dev_num, bdev_name) < 0) {
+				return -1;
+			}
+		}
+
+		sp = spdk_conf_next_section(sp);
+	}
+
+	return 0;
+}
+
+static void
+free_task_pool(struct spdk_vhost_scsi_dev *svdev)
+{
+	struct spdk_vhost_virtqueue *vq;
+	uint16_t i;
+
+	for (i = 0; i < svdev->vdev.max_queues; i++) {
+		vq = &svdev->vdev.virtqueue[i];
+		if (vq->tasks == NULL) {
+			continue;
+		}
+
+		spdk_dma_free(vq->tasks);
+		vq->tasks = NULL;
+	}
+}
+
+static int
+alloc_task_pool(struct spdk_vhost_scsi_dev *svdev)
+{
+	struct spdk_vhost_virtqueue *vq;
+	struct spdk_vhost_scsi_task *task;
+	uint32_t task_cnt;
+	uint16_t i;
+	uint32_t j;
+
+	for (i = 0; i < svdev->vdev.max_queues; i++) {
+		vq = &svdev->vdev.virtqueue[i];
+		if (vq->vring.desc == NULL) {
+			continue;
+		}
+
+		task_cnt = vq->vring.size;
+		if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
+			/* sanity check */
+			SPDK_ERRLOG("Controller %s: virtuque %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
+				    svdev->vdev.name, i, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
+			free_task_pool(svdev);
+			return -1;
+		}
+		vq->tasks = spdk_dma_zmalloc(sizeof(struct spdk_vhost_scsi_task) * task_cnt,
+					     SPDK_CACHE_LINE_SIZE, NULL);
+		if (vq->tasks == NULL) {
+			SPDK_ERRLOG("Controller %s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
+				    svdev->vdev.name, task_cnt, i);
+			free_task_pool(svdev);
+			return -1;
+		}
+
+		for (j = 0; j < task_cnt; j++) {
+			task = &((struct spdk_vhost_scsi_task *)vq->tasks)[j];
+			task->svdev = svdev;
+			task->vq = vq;
+			task->req_idx = j;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * A new device is added to a data core. First the device is added to the main linked list
+ * and then allocated to a specific data core.
+ */
+static int
+spdk_vhost_scsi_start(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+	uint32_t i;
+	int rc;
+
+	svdev = to_scsi_dev(vdev);
+	if (svdev == NULL) {
+		SPDK_ERRLOG("Trying to start non-scsi controller as a scsi one.\n");
+		rc = -1;
+		goto out;
+	}
+
+	/* validate all I/O queues are in a contiguous index range */
+	for (i = VIRTIO_SCSI_REQUESTQ; i < vdev->max_queues; i++) {
+		if (vdev->virtqueue[i].vring.desc == NULL) {
+			SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vdev->name, i);
+			rc = -1;
+			goto out;
+		}
+	}
+
+	rc = alloc_task_pool(svdev);
+	if (rc != 0) {
+		SPDK_ERRLOG("%s: failed to alloc task pool.\n", vdev->name);
+		goto out;
+	}
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+		if (svdev->scsi_dev[i] == NULL) {
+			continue;
+		}
+		spdk_scsi_dev_allocate_io_channels(svdev->scsi_dev[i]);
+	}
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Started poller for vhost controller %s on lcore %d\n",
+		     vdev->name, vdev->lcore);
+
+	svdev->requestq_poller = spdk_poller_register(vdev_worker, svdev, 0);
+	if (vdev->virtqueue[VIRTIO_SCSI_CONTROLQ].vring.desc &&
+	    vdev->virtqueue[VIRTIO_SCSI_EVENTQ].vring.desc) {
+		svdev->mgmt_poller = spdk_poller_register(vdev_mgmt_worker, svdev,
+				     MGMT_POLL_PERIOD_US);
+	}
+out:
+	spdk_vhost_dev_backend_event_done(event_ctx, rc);
+	return rc;
+}
+
+static int
+destroy_device_poller_cb(void *arg)
+{
+	struct spdk_vhost_scsi_dev *svdev = arg;
+	uint32_t i;
+
+	if (svdev->vdev.task_cnt > 0) {
+		return -1;
+	}
+
+
+	for (i = 0; i < svdev->vdev.max_queues; i++) {
+		spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[i]);
+	}
+
+	for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
+		if (svdev->scsi_dev[i] == NULL) {
+			continue;
+		}
+		spdk_scsi_dev_free_io_channels(svdev->scsi_dev[i]);
+	}
+
+	SPDK_INFOLOG(SPDK_LOG_VHOST, "Stopping poller for vhost controller %s\n", svdev->vdev.name);
+
+	free_task_pool(svdev);
+
+	spdk_poller_unregister(&svdev->destroy_ctx.poller);
+	spdk_vhost_dev_backend_event_done(svdev->destroy_ctx.event_ctx, 0);
+
+	return -1;
+}
+
+static int
+spdk_vhost_scsi_stop(struct spdk_vhost_dev *vdev, void *event_ctx)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+
+	svdev = to_scsi_dev(vdev);
+	if (svdev == NULL) {
+		SPDK_ERRLOG("Trying to stop non-scsi controller as a scsi one.\n");
+		goto err;
+	}
+
+	svdev->destroy_ctx.event_ctx = event_ctx;
+	spdk_poller_unregister(&svdev->requestq_poller);
+	spdk_poller_unregister(&svdev->mgmt_poller);
+	svdev->destroy_ctx.poller = spdk_poller_register(destroy_device_poller_cb, svdev,
+				    1000);
+
+	return 0;
+
+err:
+	spdk_vhost_dev_backend_event_done(event_ctx, -1);
+	return -1;
+}
+
+static void
+spdk_vhost_scsi_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_scsi_dev *sdev;
+	struct spdk_scsi_lun *lun;
+	uint32_t dev_idx;
+	uint32_t lun_idx;
+
+	assert(vdev != NULL);
+	spdk_json_write_name(w, "scsi");
+	spdk_json_write_array_begin(w);
+	for (dev_idx = 0; dev_idx < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; dev_idx++) {
+		sdev = spdk_vhost_scsi_dev_get_tgt(vdev, dev_idx);
+		if (!sdev) {
+			continue;
+		}
+
+		spdk_json_write_object_begin(w);
+
+		spdk_json_write_name(w, "scsi_dev_num");
+		spdk_json_write_uint32(w, dev_idx);
+
+		spdk_json_write_name(w, "id");
+		spdk_json_write_int32(w, spdk_scsi_dev_get_id(sdev));
+
+		spdk_json_write_name(w, "target_name");
+		spdk_json_write_string(w, spdk_scsi_dev_get_name(sdev));
+
+		spdk_json_write_name(w, "luns");
+		spdk_json_write_array_begin(w);
+
+		for (lun_idx = 0; lun_idx < SPDK_SCSI_DEV_MAX_LUN; lun_idx++) {
+			lun = spdk_scsi_dev_get_lun(sdev, lun_idx);
+			if (!lun) {
+				continue;
+			}
+
+			spdk_json_write_object_begin(w);
+
+			spdk_json_write_name(w, "id");
+			spdk_json_write_int32(w, spdk_scsi_lun_get_id(lun));
+
+			spdk_json_write_name(w, "bdev_name");
+			spdk_json_write_string(w, spdk_scsi_lun_get_bdev_name(lun));
+
+			spdk_json_write_object_end(w);
+		}
+
+		spdk_json_write_array_end(w);
+		spdk_json_write_object_end(w);
+	}
+
+	spdk_json_write_array_end(w);
+}
+
+static void
+spdk_vhost_scsi_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct spdk_vhost_scsi_dev *svdev;
+	struct spdk_scsi_lun *lun;
+	uint32_t i;
+
+	svdev = to_scsi_dev(vdev);
+	if (!svdev) {
+		return;
+	}
+
+	spdk_json_write_object_begin(w);
+	spdk_json_write_named_string(w, "method", "construct_vhost_scsi_controller");
+
+	spdk_json_write_named_object_begin(w, "params");
+	spdk_json_write_named_string(w, "ctrlr", vdev->name);
+	spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(vdev->cpumask));
+	spdk_json_write_object_end(w);
+
+	spdk_json_write_object_end(w);
+
+	for (i = 0; i < SPDK_COUNTOF(svdev->scsi_dev); i++) {
+		if (svdev->scsi_dev[i] == NULL || svdev->scsi_dev_state[i].removed) {
+			continue;
+		}
+
+		lun = spdk_scsi_dev_get_lun(svdev->scsi_dev[i], 0);
+
+		spdk_json_write_object_begin(w);
+		spdk_json_write_named_string(w, "method", "add_vhost_scsi_lun");
+
+		spdk_json_write_named_object_begin(w, "params");
+		spdk_json_write_named_string(w, "ctrlr", vdev->name);
+		spdk_json_write_named_uint32(w, "scsi_target_num", i);
+
+		spdk_json_write_named_string(w, "bdev_name", spdk_scsi_lun_get_bdev_name(lun));
+		spdk_json_write_object_end(w);
+
+		spdk_json_write_object_end(w);
+	}
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vhost_scsi", SPDK_LOG_VHOST_SCSI)
+SPDK_LOG_REGISTER_COMPONENT("vhost_scsi_queue", SPDK_LOG_VHOST_SCSI_QUEUE)
+SPDK_LOG_REGISTER_COMPONENT("vhost_scsi_data", SPDK_LOG_VHOST_SCSI_DATA)
diff --git a/src/spdk/lib/virtio/Makefile b/src/spdk/lib/virtio/Makefile
new file mode 100644
index 00000000..db61c1f2
--- /dev/null
+++ b/src/spdk/lib/virtio/Makefile
@@ -0,0 +1,42 @@
+#
+#  BSD LICENSE
+#
+#  Copyright (c) Intel Corporation.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(ENV_CFLAGS)
+C_SRCS = virtio.c virtio_user.c virtio_pci.c
+C_SRCS += virtio_user/vhost_user.c
+LIBNAME = virtio
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/virtio/virtio.c b/src/spdk/lib/virtio/virtio.c
new file mode 100644
index 00000000..b03034cf
--- /dev/null
+++ b/src/spdk/lib/virtio/virtio.c
@@ -0,0 +1,738 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <linux/virtio_scsi.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_config.h>
+
+#include <rte_config.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_prefetch.h>
+
+#include "spdk/env.h"
+#include "spdk/barrier.h"
+
+#include "spdk_internal/virtio.h"
+
+/* We use SMP memory barrier variants as all virtio_pci devices
+ * are purely virtual. All MMIO is executed on a CPU core, so
+ * there's no need to do full MMIO synchronization.
+ */
+#define virtio_mb()	spdk_smp_mb()
+#define virtio_rmb()	spdk_smp_rmb()
+#define virtio_wmb()	spdk_smp_wmb()
+
+/* Chain all the descriptors in the ring with an END */
+static inline void
+vring_desc_init(struct vring_desc *dp, uint16_t n)
+{
+	uint16_t i;
+
+	for (i = 0; i < n - 1; i++) {
+		dp[i].next = (uint16_t)(i + 1);
+	}
+	dp[i].next = VQ_RING_DESC_CHAIN_END;
+}
+
+static void
+virtio_init_vring(struct virtqueue *vq)
+{
+	int size = vq->vq_nentries;
+	struct vring *vr = &vq->vq_ring;
+	uint8_t *ring_mem = vq->vq_ring_virt_mem;
+
+	/*
+	 * Reinitialise since virtio port might have been stopped and restarted
+	 */
+	memset(ring_mem, 0, vq->vq_ring_size);
+	vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_used_cons_idx = 0;
+	vq->vq_desc_head_idx = 0;
+	vq->vq_avail_idx = 0;
+	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
+	vq->vq_free_cnt = vq->vq_nentries;
+	vq->req_start = VQ_RING_DESC_CHAIN_END;
+	vq->req_end = VQ_RING_DESC_CHAIN_END;
+	vq->reqs_finished = 0;
+	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
+
+	vring_desc_init(vr->desc, size);
+
+	/* Tell the backend not to interrupt us.
+	 * If F_EVENT_IDX is negotiated, we will always set incredibly high
+	 * used event idx, so that we will practically never receive an
+	 * interrupt. See virtqueue_req_flush()
+	 */
+	if (vq->vdev->negotiated_features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+		vring_used_event(&vq->vq_ring) = UINT16_MAX;
+	} else {
+		vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+	}
+}
+
+static int
+virtio_init_queue(struct virtio_dev *dev, uint16_t vtpci_queue_idx)
+{
+	unsigned int vq_size, size;
+	struct virtqueue *vq;
+	int rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "setting up queue: %"PRIu16"\n", vtpci_queue_idx);
+
+	/*
+	 * Read the virtqueue size from the Queue Size field
+	 * Always power of 2 and if 0 virtqueue does not exist
+	 */
+	vq_size = virtio_dev_backend_ops(dev)->get_queue_size(dev, vtpci_queue_idx);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "vq_size: %u\n", vq_size);
+	if (vq_size == 0) {
+		SPDK_ERRLOG("virtqueue %"PRIu16" does not exist\n", vtpci_queue_idx);
+		return -EINVAL;
+	}
+
+	if (!rte_is_power_of_2(vq_size)) {
+		SPDK_ERRLOG("virtqueue %"PRIu16" size (%u) is not powerof 2\n",
+			    vtpci_queue_idx, vq_size);
+		return -EINVAL;
+	}
+
+	size = RTE_ALIGN_CEIL(sizeof(*vq) +
+			      vq_size * sizeof(struct vq_desc_extra),
+			      RTE_CACHE_LINE_SIZE);
+
+	vq = spdk_dma_zmalloc(size, RTE_CACHE_LINE_SIZE, NULL);
+	if (vq == NULL) {
+		SPDK_ERRLOG("can not allocate vq\n");
+		return -ENOMEM;
+	}
+	dev->vqs[vtpci_queue_idx] = vq;
+
+	vq->vdev = dev;
+	vq->vq_queue_index = vtpci_queue_idx;
+	vq->vq_nentries = vq_size;
+
+	/*
+	 * Reserve a memzone for vring elements
+	 */
+	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
+	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "vring_size: %u, rounded_vring_size: %u\n",
+		      size, vq->vq_ring_size);
+
+	vq->owner_thread = NULL;
+
+	rc = virtio_dev_backend_ops(dev)->setup_queue(dev, vq);
+	if (rc < 0) {
+		SPDK_ERRLOG("setup_queue failed\n");
+		spdk_dma_free(vq);
+		dev->vqs[vtpci_queue_idx] = NULL;
+		return rc;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "vq->vq_ring_mem:      0x%" PRIx64 "\n",
+		      vq->vq_ring_mem);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "vq->vq_ring_virt_mem: 0x%" PRIx64 "\n",
+		      (uint64_t)(uintptr_t)vq->vq_ring_virt_mem);
+
+	virtio_init_vring(vq);
+	return 0;
+}
+
+static void
+virtio_free_queues(struct virtio_dev *dev)
+{
+	uint16_t nr_vq = dev->max_queues;
+	struct virtqueue *vq;
+	uint16_t i;
+
+	if (dev->vqs == NULL) {
+		return;
+	}
+
+	for (i = 0; i < nr_vq; i++) {
+		vq = dev->vqs[i];
+		if (!vq) {
+			continue;
+		}
+
+		virtio_dev_backend_ops(dev)->del_queue(dev, vq);
+
+		rte_free(vq);
+		dev->vqs[i] = NULL;
+	}
+
+	rte_free(dev->vqs);
+	dev->vqs = NULL;
+}
+
+static int
+virtio_alloc_queues(struct virtio_dev *dev, uint16_t request_vq_num, uint16_t fixed_vq_num)
+{
+	uint16_t nr_vq;
+	uint16_t i;
+	int ret;
+
+	nr_vq = request_vq_num + fixed_vq_num;
+	if (nr_vq == 0) {
+		/* perfectly fine to have a device with no virtqueues. */
+		return 0;
+	}
+
+	assert(dev->vqs == NULL);
+	dev->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
+	if (!dev->vqs) {
+		SPDK_ERRLOG("failed to allocate %"PRIu16" vqs\n", nr_vq);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_vq; i++) {
+		ret = virtio_init_queue(dev, i);
+		if (ret < 0) {
+			virtio_free_queues(dev);
+			return ret;
+		}
+	}
+
+	dev->max_queues = nr_vq;
+	dev->fixed_queues_num = fixed_vq_num;
+	return 0;
+}
+
+/**
+ * Negotiate virtio features. For virtio_user this will also set
+ * dev->modern flag if VIRTIO_F_VERSION_1 flag is negotiated.
+ */
+static int
+virtio_negotiate_features(struct virtio_dev *dev, uint64_t req_features)
+{
+	uint64_t host_features = virtio_dev_backend_ops(dev)->get_features(dev);
+	int rc;
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "guest features = %" PRIx64 "\n", req_features);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "device features = %" PRIx64 "\n", host_features);
+
+	rc = virtio_dev_backend_ops(dev)->set_features(dev, req_features & host_features);
+	if (rc != 0) {
+		SPDK_ERRLOG("failed to negotiate device features.\n");
+		return rc;
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "negotiated features = %" PRIx64 "\n",
+		      dev->negotiated_features);
+
+	virtio_dev_set_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+	if (!(virtio_dev_get_status(dev) & VIRTIO_CONFIG_S_FEATURES_OK)) {
+		SPDK_ERRLOG("failed to set FEATURES_OK status!\n");
+		/* either the device failed, or we offered some features that
+		 * depend on other, not offered features.
+		 */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+virtio_dev_construct(struct virtio_dev *vdev, const char *name,
+		     const struct virtio_dev_ops *ops, void *ctx)
+{
+	int rc;
+
+	vdev->name = strdup(name);
+	if (vdev->name == NULL) {
+		return -ENOMEM;
+	}
+
+	rc = pthread_mutex_init(&vdev->mutex, NULL);
+	if (rc != 0) {
+		free(vdev->name);
+		return -rc;
+	}
+
+	vdev->backend_ops = ops;
+	vdev->ctx = ctx;
+
+	return 0;
+}
+
+int
+virtio_dev_reset(struct virtio_dev *dev, uint64_t req_features)
+{
+	req_features |= (1ULL << VIRTIO_F_VERSION_1);
+
+	virtio_dev_stop(dev);
+
+	virtio_dev_set_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+	if (!(virtio_dev_get_status(dev) & VIRTIO_CONFIG_S_ACKNOWLEDGE)) {
+		SPDK_ERRLOG("Failed to set VIRTIO_CONFIG_S_ACKNOWLEDGE status.\n");
+		return -EIO;
+	}
+
+	virtio_dev_set_status(dev, VIRTIO_CONFIG_S_DRIVER);
+	if (!(virtio_dev_get_status(dev) & VIRTIO_CONFIG_S_DRIVER)) {
+		SPDK_ERRLOG("Failed to set VIRTIO_CONFIG_S_DRIVER status.\n");
+		return -EIO;
+	}
+
+	return virtio_negotiate_features(dev, req_features);
+}
+
+int
+virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues, uint16_t fixed_queue_num)
+{
+	int ret;
+
+	ret = virtio_alloc_queues(vdev, max_queues, fixed_queue_num);
+	if (ret < 0) {
+		return ret;
+	}
+
+	virtio_dev_set_status(vdev, VIRTIO_CONFIG_S_DRIVER_OK);
+	if (!(virtio_dev_get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) {
+		SPDK_ERRLOG("Failed to set VIRTIO_CONFIG_S_DRIVER_OK status.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+virtio_dev_destruct(struct virtio_dev *dev)
+{
+	virtio_dev_backend_ops(dev)->destruct_dev(dev);
+	pthread_mutex_destroy(&dev->mutex);
+	free(dev->name);
+}
+
+static void
+vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
+{
+	struct vring_desc *dp, *dp_tail;
+	struct vq_desc_extra *dxp;
+	uint16_t desc_idx_last = desc_idx;
+
+	dp  = &vq->vq_ring.desc[desc_idx];
+	dxp = &vq->vq_descx[desc_idx];
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
+	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
+		while (dp->flags & VRING_DESC_F_NEXT) {
+			desc_idx_last = dp->next;
+			dp = &vq->vq_ring.desc[dp->next];
+		}
+	}
+	dxp->ndescs = 0;
+
+	/*
+	 * We must append the existing free chain, if any, to the end of
+	 * newly freed chain. If the virtqueue was completely used, then
+	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
+	 */
+	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
+		vq->vq_desc_head_idx = desc_idx;
+	} else {
+		dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
+		dp_tail->next = desc_idx;
+	}
+
+	vq->vq_desc_tail_idx = desc_idx_last;
+	dp->next = VQ_RING_DESC_CHAIN_END;
+}
+
+static uint16_t
+virtqueue_dequeue_burst_rx(struct virtqueue *vq, void **rx_pkts,
+			   uint32_t *len, uint16_t num)
+{
+	struct vring_used_elem *uep;
+	struct virtio_req *cookie;
+	uint16_t used_idx, desc_idx;
+	uint16_t i;
+
+	/*  Caller does the check */
+	for (i = 0; i < num ; i++) {
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		desc_idx = (uint16_t) uep->id;
+		len[i] = uep->len;
+		cookie = (struct virtio_req *)vq->vq_descx[desc_idx].cookie;
+
+		if (spdk_unlikely(cookie == NULL)) {
+			SPDK_WARNLOG("vring descriptor with no mbuf cookie at %"PRIu16"\n",
+				     vq->vq_used_cons_idx);
+			break;
+		}
+
+		rte_prefetch0(cookie);
+		rx_pkts[i]  = cookie;
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+		vq->vq_descx[desc_idx].cookie = NULL;
+	}
+
+	return i;
+}
+
+static void
+finish_req(struct virtqueue *vq)
+{
+	struct vring_desc *desc;
+	uint16_t avail_idx;
+
+	desc = &vq->vq_ring.desc[vq->req_end];
+	desc->flags &= ~VRING_DESC_F_NEXT;
+
+	/*
+	 * Place the head of the descriptor chain into the next slot and make
+	 * it usable to the host. The chain is made available now rather than
+	 * deferring to virtqueue_req_flush() in the hopes that if the host is
+	 * currently running on another CPU, we can keep it processing the new
+	 * descriptor.
+	 */
+	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
+	vq->vq_ring.avail->ring[avail_idx] = vq->req_start;
+	vq->vq_avail_idx++;
+	vq->req_end = VQ_RING_DESC_CHAIN_END;
+	virtio_wmb();
+	vq->vq_ring.avail->idx = vq->vq_avail_idx;
+	vq->reqs_finished++;
+}
+
+int
+virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt)
+{
+	struct vq_desc_extra *dxp;
+
+	if (iovcnt > vq->vq_free_cnt) {
+		return iovcnt > vq->vq_nentries ? -EINVAL : -ENOMEM;
+	}
+
+	if (vq->req_end != VQ_RING_DESC_CHAIN_END) {
+		finish_req(vq);
+	}
+
+	vq->req_start = vq->vq_desc_head_idx;
+	dxp = &vq->vq_descx[vq->req_start];
+	dxp->cookie = cookie;
+	dxp->ndescs = 0;
+
+	return 0;
+}
+
+void
+virtqueue_req_flush(struct virtqueue *vq)
+{
+	uint16_t reqs_finished;
+
+	if (vq->req_end == VQ_RING_DESC_CHAIN_END) {
+		/* no non-empty requests have been started */
+		return;
+	}
+
+	finish_req(vq);
+	virtio_mb();
+
+	reqs_finished = vq->reqs_finished;
+	vq->reqs_finished = 0;
+
+	if (vq->vdev->negotiated_features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+		/* Set used event idx to a value the device will never reach.
+		 * This effectively disables interrupts.
+		 */
+		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx - vq->vq_nentries - 1;
+
+		if (!vring_need_event(vring_avail_event(&vq->vq_ring),
+				      vq->vq_avail_idx,
+				      vq->vq_avail_idx - reqs_finished)) {
+			return;
+		}
+	} else if (vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) {
+		return;
+	}
+
+	virtio_dev_backend_ops(vq->vdev)->notify_queue(vq->vdev, vq);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_DEV, "Notified backend after xmit\n");
+}
+
+void
+virtqueue_req_abort(struct virtqueue *vq)
+{
+	struct vring_desc *desc;
+
+	if (vq->req_start == VQ_RING_DESC_CHAIN_END) {
+		/* no requests have been started */
+		return;
+	}
+
+	desc = &vq->vq_ring.desc[vq->req_end];
+	desc->flags &= ~VRING_DESC_F_NEXT;
+
+	vq_ring_free_chain(vq, vq->req_start);
+	vq->req_start = VQ_RING_DESC_CHAIN_END;
+}
+
+void
+virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
+		       enum spdk_virtio_desc_type desc_type)
+{
+	struct vring_desc *desc;
+	struct vq_desc_extra *dxp;
+	uint16_t i, prev_head, new_head;
+
+	assert(vq->req_start != VQ_RING_DESC_CHAIN_END);
+	assert(iovcnt <= vq->vq_free_cnt);
+
+	/* TODO use indirect descriptors if iovcnt is high enough
+	 * or the caller specifies SPDK_VIRTIO_DESC_F_INDIRECT
+	 */
+
+	prev_head = vq->req_end;
+	new_head = vq->vq_desc_head_idx;
+	for (i = 0; i < iovcnt; ++i) {
+		desc = &vq->vq_ring.desc[new_head];
+
+		if (!vq->vdev->is_hw) {
+			desc->addr  = (uintptr_t)iovs[i].iov_base;
+		} else {
+			desc->addr = spdk_vtophys(iovs[i].iov_base);
+		}
+
+		desc->len = iovs[i].iov_len;
+		/* always set NEXT flag. unset it on the last descriptor
+		 * in the request-ending function.
+		 */
+		desc->flags = desc_type | VRING_DESC_F_NEXT;
+
+		prev_head = new_head;
+		new_head = desc->next;
+	}
+
+	dxp = &vq->vq_descx[vq->req_start];
+	dxp->ndescs += iovcnt;
+
+	vq->req_end = prev_head;
+	vq->vq_desc_head_idx = new_head;
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - iovcnt);
+	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
+		assert(vq->vq_free_cnt == 0);
+		vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
+	}
+}
+
+#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
+uint16_t
+virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t nb_pkts)
+{
+	uint16_t nb_used, num;
+
+	nb_used = vq->vq_ring.used->idx - vq->vq_used_cons_idx;
+	virtio_rmb();
+
+	num = (uint16_t)(spdk_likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
+	if (spdk_likely(num > DESC_PER_CACHELINE)) {
+		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+	}
+
+	return virtqueue_dequeue_burst_rx(vq, io, len, num);
+}
+
+int
+virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index)
+{
+	struct virtqueue *vq = NULL;
+
+	if (index >= vdev->max_queues) {
+		SPDK_ERRLOG("requested vq index %"PRIu16" exceeds max queue count %"PRIu16".\n",
+			    index, vdev->max_queues);
+		return -1;
+	}
+
+	pthread_mutex_lock(&vdev->mutex);
+	vq = vdev->vqs[index];
+	if (vq == NULL || vq->owner_thread != NULL) {
+		pthread_mutex_unlock(&vdev->mutex);
+		return -1;
+	}
+
+	vq->owner_thread = spdk_get_thread();
+	pthread_mutex_unlock(&vdev->mutex);
+	return 0;
+}
+
+int32_t
+virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index)
+{
+	struct virtqueue *vq = NULL;
+	uint16_t i;
+
+	pthread_mutex_lock(&vdev->mutex);
+	for (i = start_index; i < vdev->max_queues; ++i) {
+		vq = vdev->vqs[i];
+		if (vq != NULL && vq->owner_thread == NULL) {
+			break;
+		}
+	}
+
+	if (vq == NULL || i == vdev->max_queues) {
+		SPDK_ERRLOG("no more unused virtio queues with idx >= %"PRIu16".\n", start_index);
+		pthread_mutex_unlock(&vdev->mutex);
+		return -1;
+	}
+
+	vq->owner_thread = spdk_get_thread();
+	pthread_mutex_unlock(&vdev->mutex);
+	return i;
+}
+
+struct spdk_thread *
+virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index)
+{
+	struct spdk_thread *thread = NULL;
+
+	if (index >= vdev->max_queues) {
+		SPDK_ERRLOG("given vq index %"PRIu16" exceeds max queue count %"PRIu16"\n",
+			    index, vdev->max_queues);
+		abort(); /* This is not recoverable */
+	}
+
+	pthread_mutex_lock(&vdev->mutex);
+	thread = vdev->vqs[index]->owner_thread;
+	pthread_mutex_unlock(&vdev->mutex);
+
+	return thread;
+}
+
+bool
+virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index)
+{
+	return virtio_dev_queue_get_thread(vdev, index) != NULL;
+}
+
+void
+virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index)
+{
+	struct virtqueue *vq = NULL;
+
+	if (index >= vdev->max_queues) {
+		SPDK_ERRLOG("given vq index %"PRIu16" exceeds max queue count %"PRIu16".\n",
+			    index, vdev->max_queues);
+		return;
+	}
+
+	pthread_mutex_lock(&vdev->mutex);
+	vq = vdev->vqs[index];
+	if (vq == NULL) {
+		SPDK_ERRLOG("virtqueue at index %"PRIu16" is not initialized.\n", index);
+		pthread_mutex_unlock(&vdev->mutex);
+		return;
+	}
+
+	assert(vq->owner_thread == spdk_get_thread());
+	vq->owner_thread = NULL;
+	pthread_mutex_unlock(&vdev->mutex);
+}
+
+int
+virtio_dev_read_dev_config(struct virtio_dev *dev, size_t offset,
+			   void *dst, int length)
+{
+	return virtio_dev_backend_ops(dev)->read_dev_cfg(dev, offset, dst, length);
+}
+
+int
+virtio_dev_write_dev_config(struct virtio_dev *dev, size_t offset,
+			    const void *src, int length)
+{
+	return virtio_dev_backend_ops(dev)->write_dev_cfg(dev, offset, src, length);
+}
+
+void
+virtio_dev_stop(struct virtio_dev *dev)
+{
+	virtio_dev_backend_ops(dev)->set_status(dev, VIRTIO_CONFIG_S_RESET);
+	/* flush status write */
+	virtio_dev_backend_ops(dev)->get_status(dev);
+	virtio_free_queues(dev);
+}
+
+void
+virtio_dev_set_status(struct virtio_dev *dev, uint8_t status)
+{
+	if (status != VIRTIO_CONFIG_S_RESET) {
+		status |= virtio_dev_backend_ops(dev)->get_status(dev);
+	}
+
+	virtio_dev_backend_ops(dev)->set_status(dev, status);
+}
+
+uint8_t
+virtio_dev_get_status(struct virtio_dev *dev)
+{
+	return virtio_dev_backend_ops(dev)->get_status(dev);
+}
+
+const struct virtio_dev_ops *
+virtio_dev_backend_ops(struct virtio_dev *dev)
+{
+	return dev->backend_ops;
+}
+
+void
+virtio_dev_dump_json_info(struct virtio_dev *hw, struct spdk_json_write_ctx *w)
+{
+	spdk_json_write_name(w, "virtio");
+	spdk_json_write_object_begin(w);
+
+	spdk_json_write_name(w, "vq_count");
+	spdk_json_write_uint32(w, hw->max_queues);
+
+	spdk_json_write_name(w, "vq_size");
+	spdk_json_write_uint32(w, virtio_dev_backend_ops(hw)->get_queue_size(hw, 0));
+
+	virtio_dev_backend_ops(hw)->dump_json_info(hw, w);
+
+	spdk_json_write_object_end(w);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio_dev", SPDK_LOG_VIRTIO_DEV)
diff --git a/src/spdk/lib/virtio/virtio_pci.c b/src/spdk/lib/virtio/virtio_pci.c
new file mode 100644
index 00000000..c21492a7
--- /dev/null
+++ b/src/spdk/lib/virtio/virtio_pci.c
@@ -0,0 +1,590 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/mmio.h"
+#include "spdk/string.h"
+#include "spdk/env.h"
+
+#include "spdk_internal/virtio.h"
+
+struct virtio_hw {
+	uint8_t	    use_msix;
+	uint32_t    notify_off_multiplier;
+	uint8_t     *isr;
+	uint16_t    *notify_base;
+
+	struct {
+		/** Mem-mapped resources from given PCI BAR */
+		void        *vaddr;
+
+		/** Length of the address space */
+		uint32_t    len;
+	} pci_bar[6];
+
+	struct virtio_pci_common_cfg *common_cfg;
+	struct spdk_pci_device *pci_dev;
+
+	/** Device-specific PCI config space */
+	void *dev_cfg;
+};
+
+struct virtio_pci_probe_ctx {
+	virtio_pci_create_cb enum_cb;
+	void *enum_ctx;
+	uint16_t device_id;
+};
+
+/*
+ * Following macros are derived from linux/pci_regs.h, however,
+ * we can't simply include that header here, as there is no such
+ * file for non-Linux platform.
+ */
+#define PCI_CAPABILITY_LIST	0x34
+#define PCI_CAP_ID_VNDR		0x09
+#define PCI_CAP_ID_MSIX		0x11
+
+static inline int
+check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
+	    (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		SPDK_ERRLOG("vring address shouldn't be above 16TB!\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static void
+free_virtio_hw(struct virtio_hw *hw)
+{
+	unsigned i;
+
+	for (i = 0; i < 6; ++i) {
+		if (hw->pci_bar[i].vaddr == NULL) {
+			continue;
+		}
+
+		spdk_pci_device_unmap_bar(hw->pci_dev, i, hw->pci_bar[i].vaddr);
+	}
+
+	free(hw);
+}
+
+static void
+pci_dump_json_info(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
+{
+	struct virtio_hw *hw = dev->ctx;
+	struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr((struct spdk_pci_device *)hw->pci_dev);
+	char addr[32];
+
+	spdk_json_write_name(w, "type");
+	if (dev->modern) {
+		spdk_json_write_string(w, "pci-modern");
+	} else {
+		spdk_json_write_string(w, "pci-legacy");
+	}
+
+	spdk_json_write_name(w, "pci_address");
+	spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
+	spdk_json_write_string(w, addr);
+}
+
+static void
+pci_write_json_config(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
+{
+	struct virtio_hw *hw = dev->ctx;
+	struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr(hw->pci_dev);
+	char addr[32];
+
+	spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
+
+	spdk_json_write_named_string(w, "trtype", "pci");
+	spdk_json_write_named_string(w, "traddr", addr);
+}
+
+static inline void
+io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	spdk_mmio_write_4(lo, val & ((1ULL << 32) - 1));
+	spdk_mmio_write_4(hi, val >> 32);
+}
+
+static int
+modern_read_dev_config(struct virtio_dev *dev, size_t offset,
+		       void *dst, int length)
+{
+	struct virtio_hw *hw = dev->ctx;
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++) {
+			*p++ = spdk_mmio_read_1((uint8_t *)hw->dev_cfg + offset + i);
+		}
+
+		new_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+
+	return 0;
+}
+
+static int
+modern_write_dev_config(struct virtio_dev *dev, size_t offset,
+			const void *src, int length)
+{
+	struct virtio_hw *hw = dev->ctx;
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++) {
+		spdk_mmio_write_1(((uint8_t *)hw->dev_cfg) + offset + i, *p++);
+	}
+
+	return 0;
+}
+
+static uint64_t
+modern_get_features(struct virtio_dev *dev)
+{
+	struct virtio_hw *hw = dev->ctx;
+	uint32_t features_lo, features_hi;
+
+	spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 0);
+	features_lo = spdk_mmio_read_4(&hw->common_cfg->device_feature);
+
+	spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 1);
+	features_hi = spdk_mmio_read_4(&hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static int
+modern_set_features(struct virtio_dev *dev, uint64_t features)
+{
+	struct virtio_hw *hw = dev->ctx;
+
+	if ((features & (1ULL << VIRTIO_F_VERSION_1)) == 0) {
+		SPDK_ERRLOG("VIRTIO_F_VERSION_1 feature is not enabled.\n");
+		return -EINVAL;
+	}
+
+	spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 0);
+	spdk_mmio_write_4(&hw->common_cfg->guest_feature, features & ((1ULL << 32) - 1));
+
+	spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 1);
+	spdk_mmio_write_4(&hw->common_cfg->guest_feature, features >> 32);
+
+	dev->negotiated_features = features;
+
+	return 0;
+}
+
+static void
+modern_destruct_dev(struct virtio_dev *vdev)
+{
+	struct virtio_hw *hw = vdev->ctx;
+	struct spdk_pci_device *pci_dev = hw->pci_dev;
+
+	free_virtio_hw(hw);
+	spdk_pci_device_detach(pci_dev);
+}
+
+static uint8_t
+modern_get_status(struct virtio_dev *dev)
+{
+	struct virtio_hw *hw = dev->ctx;
+
+	return spdk_mmio_read_1(&hw->common_cfg->device_status);
+}
+
+static void
+modern_set_status(struct virtio_dev *dev, uint8_t status)
+{
+	struct virtio_hw *hw = dev->ctx;
+
+	spdk_mmio_write_1(&hw->common_cfg->device_status, status);
+}
+
+static uint16_t
+modern_get_queue_size(struct virtio_dev *dev, uint16_t queue_id)
+{
+	struct virtio_hw *hw = dev->ctx;
+
+	spdk_mmio_write_2(&hw->common_cfg->queue_select, queue_id);
+	return spdk_mmio_read_2(&hw->common_cfg->queue_size);
+}
+
+static int
+modern_setup_queue(struct virtio_dev *dev, struct virtqueue *vq)
+{
+	struct virtio_hw *hw = dev->ctx;
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+	void *queue_mem;
+	uint64_t queue_mem_phys_addr;
+
+	/* To ensure physical address contiguity we make the queue occupy
+	 * only a single hugepage (2MB). As of Virtio 1.0, the queue size
+	 * always falls within this limit.
+	 */
+	if (vq->vq_ring_size > 0x200000) {
+		return -ENOMEM;
+	}
+
+	queue_mem = spdk_dma_zmalloc(vq->vq_ring_size, 0x200000, &queue_mem_phys_addr);
+	if (queue_mem == NULL) {
+		return -ENOMEM;
+	}
+
+	vq->vq_ring_mem = queue_mem_phys_addr;
+	vq->vq_ring_virt_mem = queue_mem;
+
+	if (!check_vq_phys_addr_ok(vq)) {
+		spdk_dma_free(queue_mem);
+		return -ENOMEM;
+	}
+
+	desc_addr = vq->vq_ring_mem;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = (avail_addr + offsetof(struct vring_avail, ring[vq->vq_nentries])
+		     + VIRTIO_PCI_VRING_ALIGN - 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1);
+
+	spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
+
+	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi);
+	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi);
+	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi);
+
+	notify_off = spdk_mmio_read_2(&hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				   notify_off * hw->notify_off_multiplier);
+
+	spdk_mmio_write_2(&hw->common_cfg->queue_enable, 1);
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "queue %"PRIu16" addresses:\n", vq->vq_queue_index);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t desc_addr: %" PRIx64 "\n", desc_addr);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t aval_addr: %" PRIx64 "\n", avail_addr);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t used_addr: %" PRIx64 "\n", used_addr);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t notify addr: %p (notify offset: %"PRIu16")\n",
+		      vq->notify_addr, notify_off);
+
+	return 0;
+}
+
+static void
+modern_del_queue(struct virtio_dev *dev, struct virtqueue *vq)
+{
+	struct virtio_hw *hw = dev->ctx;
+
+	spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
+
+	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi);
+	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi);
+	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi);
+
+	spdk_mmio_write_2(&hw->common_cfg->queue_enable, 0);
+
+	spdk_dma_free(vq->vq_ring_virt_mem);
+}
+
+static void
+modern_notify_queue(struct virtio_dev *dev, struct virtqueue *vq)
+{
+	spdk_mmio_write_2(vq->notify_addr, vq->vq_queue_index);
+}
+
+static const struct virtio_dev_ops modern_ops = {
+	.read_dev_cfg	= modern_read_dev_config,
+	.write_dev_cfg	= modern_write_dev_config,
+	.get_status	= modern_get_status,
+	.set_status	= modern_set_status,
+	.get_features	= modern_get_features,
+	.set_features	= modern_set_features,
+	.destruct_dev	= modern_destruct_dev,
+	.get_queue_size	= modern_get_queue_size,
+	.setup_queue	= modern_setup_queue,
+	.del_queue	= modern_del_queue,
+	.notify_queue	= modern_notify_queue,
+	.dump_json_info = pci_dump_json_info,
+	.write_json_config = pci_write_json_config,
+};
+
+static void *
+get_cfg_addr(struct virtio_hw *hw, struct virtio_pci_cap *cap)
+{
+	uint8_t  bar    = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+
+	if (bar > 5) {
+		SPDK_ERRLOG("invalid bar: %"PRIu8"\n", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		SPDK_ERRLOG("offset(%"PRIu32") + length(%"PRIu32") overflows\n",
+			    offset, length);
+		return NULL;
+	}
+
+	if (offset + length > hw->pci_bar[bar].len) {
+		SPDK_ERRLOG("invalid cap: overflows bar space: %"PRIu32" > %"PRIu32"\n",
+			    offset + length, hw->pci_bar[bar].len);
+		return NULL;
+	}
+
+	if (hw->pci_bar[bar].vaddr == NULL) {
+		SPDK_ERRLOG("bar %"PRIu8" base addr is NULL\n", bar);
+		return NULL;
+	}
+
+	return hw->pci_bar[bar].vaddr + offset;
+}
+
+static int
+virtio_read_caps(struct virtio_hw *hw)
+{
+	uint8_t pos;
+	struct virtio_pci_cap cap;
+	int ret;
+
+	ret = spdk_pci_device_cfg_read(hw->pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
+	if (ret < 0) {
+		SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "failed to read pci capability list\n");
+		return ret;
+	}
+
+	while (pos) {
+		ret = spdk_pci_device_cfg_read(hw->pci_dev, &cap, sizeof(cap), pos);
+		if (ret < 0) {
+			SPDK_ERRLOG("failed to read pci cap at pos: %"PRIx8"\n", pos);
+			break;
+		}
+
+		if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
+			hw->use_msix = 1;
+		}
+
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
+			SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI,
+				      "[%2"PRIx8"] skipping non VNDR cap id: %02"PRIx8"\n",
+				      pos, cap.cap_vndr);
+			goto next;
+		}
+
+		SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI,
+			      "[%2"PRIx8"] cfg type: %"PRIu8", bar: %"PRIu8", offset: %04"PRIx32", len: %"PRIu32"\n",
+			      pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+		switch (cap.cfg_type) {
+		case VIRTIO_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_NOTIFY_CFG:
+			spdk_pci_device_cfg_read(hw->pci_dev, &hw->notify_off_multiplier,
+						 4, pos + sizeof(cap));
+			hw->notify_base = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_ISR_CFG:
+			hw->isr = get_cfg_addr(hw, &cap);
+			break;
+		}
+
+next:
+		pos = cap.cap_next;
+	}
+
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+	    hw->dev_cfg == NULL    || hw->isr == NULL) {
+		SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "no modern virtio pci device found.\n");
+		if (ret < 0) {
+			return ret;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "found modern virtio pci device.\n");
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "common cfg mapped at: %p\n", hw->common_cfg);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "device cfg mapped at: %p\n", hw->dev_cfg);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "isr cfg mapped at: %p\n", hw->isr);
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "notify base: %p, notify off multiplier: %u\n",
+		      hw->notify_base, hw->notify_off_multiplier);
+
+	return 0;
+}
+
+static int
+virtio_pci_dev_probe(struct spdk_pci_device *pci_dev, struct virtio_pci_probe_ctx *ctx)
+{
+	struct virtio_hw *hw;
+	uint8_t *bar_vaddr;
+	uint64_t bar_paddr, bar_len;
+	int rc;
+	unsigned i;
+	char bdf[32];
+	struct spdk_pci_addr addr;
+
+	addr = spdk_pci_device_get_addr(pci_dev);
+	rc = spdk_pci_addr_fmt(bdf, sizeof(bdf), &addr);
+	if (rc != 0) {
+		SPDK_ERRLOG("Ignoring a device with non-parseable PCI address\n");
+		return -1;
+	}
+
+	hw = calloc(1, sizeof(*hw));
+	if (hw == NULL) {
+		SPDK_ERRLOG("%s: calloc failed\n", bdf);
+		return -1;
+	}
+
+	hw->pci_dev = pci_dev;
+
+	for (i = 0; i < 6; ++i) {
+		rc = spdk_pci_device_map_bar(pci_dev, i, (void *) &bar_vaddr, &bar_paddr,
+					     &bar_len);
+		if (rc != 0) {
+			SPDK_ERRLOG("%s: failed to memmap PCI BAR %u\n", bdf, i);
+			free_virtio_hw(hw);
+			return -1;
+		}
+
+		hw->pci_bar[i].vaddr = bar_vaddr;
+		hw->pci_bar[i].len = bar_len;
+	}
+
+	/* Virtio PCI caps exist only on modern PCI devices.
+	 * Legacy devices are not supported.
+	 */
+	if (virtio_read_caps(hw) != 0) {
+		SPDK_NOTICELOG("Ignoring legacy PCI device at %s\n", bdf);
+		free_virtio_hw(hw);
+		return -1;
+	}
+
+	rc = ctx->enum_cb((struct virtio_pci_ctx *)hw, ctx->enum_ctx);
+	if (rc != 0) {
+		free_virtio_hw(hw);
+	}
+
+	return rc;
+}
+
+static int
+virtio_pci_dev_probe_cb(void *probe_ctx, struct spdk_pci_device *pci_dev)
+{
+	struct virtio_pci_probe_ctx *ctx = probe_ctx;
+	uint16_t pci_device_id = spdk_pci_device_get_device_id(pci_dev);
+
+	if (pci_device_id != ctx->device_id) {
+		return 1;
+	}
+
+	return virtio_pci_dev_probe(pci_dev, ctx);
+}
+
+int
+virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
+			 uint16_t pci_device_id)
+{
+	struct virtio_pci_probe_ctx ctx;
+
+	if (!spdk_process_is_primary()) {
+		SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
+		return 0;
+	}
+
+	ctx.enum_cb = enum_cb;
+	ctx.enum_ctx = enum_ctx;
+	ctx.device_id = pci_device_id;
+
+	return spdk_pci_virtio_enumerate(virtio_pci_dev_probe_cb, &ctx);
+}
+
+int
+virtio_pci_dev_attach(virtio_pci_create_cb enum_cb, void *enum_ctx,
+		      uint16_t pci_device_id, struct spdk_pci_addr *pci_address)
+{
+	struct virtio_pci_probe_ctx ctx;
+
+	if (!spdk_process_is_primary()) {
+		SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
+		return 0;
+	}
+
+	ctx.enum_cb = enum_cb;
+	ctx.enum_ctx = enum_ctx;
+	ctx.device_id = pci_device_id;
+
+	return spdk_pci_virtio_device_attach(virtio_pci_dev_probe_cb, &ctx, pci_address);
+}
+
+int
+virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
+		    struct virtio_pci_ctx *pci_ctx)
+{
+	int rc;
+
+	rc = virtio_dev_construct(vdev, name, &modern_ops, pci_ctx);
+	if (rc != 0) {
+		return rc;
+	}
+
+	vdev->is_hw = 1;
+	vdev->modern = 1;
+
+	return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio_pci", SPDK_LOG_VIRTIO_PCI)
diff --git a/src/spdk/lib/virtio/virtio_user.c b/src/spdk/lib/virtio/virtio_user.c
new file mode 100644
index 00000000..5dadda61
--- /dev/null
+++ b/src/spdk/lib/virtio/virtio_user.c
@@ -0,0 +1,621 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include <sys/eventfd.h>
+
+#include <linux/virtio_scsi.h>
+
+#include <rte_config.h>
+#include <rte_malloc.h>
+#include <rte_alarm.h>
+
+#include "virtio_user/vhost.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/virtio.h"
+
+#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+static int
+virtio_user_create_queue(struct virtio_dev *vdev, uint32_t queue_sel)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
+	 * firstly because vhost depends on this msg to allocate virtqueue
+	 * pair.
+	 */
+	struct vhost_vring_file file;
+
+	file.index = queue_sel;
+	file.fd = dev->callfds[queue_sel];
+	return dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file);
+}
+
+static int
+virtio_user_kick_queue(struct virtio_dev *vdev, uint32_t queue_sel)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	struct vhost_vring_file file;
+	struct vhost_vring_state state;
+	struct vring *vring = &dev->vrings[queue_sel];
+	struct vhost_vring_addr addr = {
+		.index = queue_sel,
+		.desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
+		.avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
+		.used_user_addr = (uint64_t)(uintptr_t)vring->used,
+		.log_guest_addr = 0,
+		.flags = 0, /* disable log */
+	};
+	int rc;
+
+	state.index = queue_sel;
+	state.num = vring->num;
+	rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state);
+	if (rc < 0) {
+		return rc;
+	}
+
+	state.index = queue_sel;
+	state.num = 0; /* no reservation */
+	rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state);
+	if (rc < 0) {
+		return rc;
+	}
+
+	rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr);
+	if (rc < 0) {
+		return rc;
+	}
+
+	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
+	 * lastly because vhost depends on this msg to judge if
+	 * virtio is ready.
+	 */
+	file.index = queue_sel;
+	file.fd = dev->kickfds[queue_sel];
+	return dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file);
+}
+
+static int
+virtio_user_stop_queue(struct virtio_dev *vdev, uint32_t queue_sel)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	struct vhost_vring_state state;
+
+	state.index = queue_sel;
+	state.num = 0;
+
+	return dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE, &state);
+}
+
+static int
+virtio_user_queue_setup(struct virtio_dev *vdev,
+			int (*fn)(struct virtio_dev *, uint32_t))
+{
+	uint32_t i;
+	int rc;
+
+	for (i = 0; i < vdev->max_queues; ++i) {
+		rc = fn(vdev, i);
+		if (rc < 0) {
+			SPDK_ERRLOG("setup tx vq fails: %"PRIu32".\n", i);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int
+virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map,
+		       enum spdk_mem_map_notify_action action,
+		       void *vaddr, size_t size)
+{
+	struct virtio_dev *vdev = cb_ctx;
+	struct virtio_user_dev *dev = vdev->ctx;
+	uint64_t features;
+	int ret;
+
+	/* We have to resend all mappings anyway, so don't bother with any
+	 * page tracking.
+	 */
+	ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
+	if (ret < 0) {
+		return ret;
+	}
+
+	/* We have to send SET_VRING_ADDR to make rte_vhost flush a pending
+	 * SET_MEM_TABLE...
+	 */
+	ret = virtio_user_queue_setup(vdev, virtio_user_kick_queue);
+	if (ret < 0) {
+		return ret;
+	}
+
+	/* Since we might want to use that mapping straight away, we have to
+	 * make sure the guest has already processed our SET_MEM_TABLE message.
+	 * F_REPLY_ACK is just a feature and the host is not obliged to
+	 * support it, so we send a simple message that always has a response
+	 * and we wait for that response. Messages are always processed in order.
+	 */
+	return dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, &features);
+}
+
+static int
+virtio_user_register_mem(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	const struct spdk_mem_map_ops virtio_user_map_ops = {
+		.notify_cb = virtio_user_map_notify,
+		.are_contiguous = NULL
+	};
+
+	dev->mem_map = spdk_mem_map_alloc(0, &virtio_user_map_ops, vdev);
+	if (dev->mem_map == NULL) {
+		SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+virtio_user_unregister_mem(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	spdk_mem_map_free(&dev->mem_map);
+}
+
+static int
+virtio_user_start_device(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	uint64_t host_max_queues;
+	int ret;
+
+	if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) == 0 &&
+	    vdev->max_queues > 1 + vdev->fixed_queues_num) {
+		SPDK_WARNLOG("%s: requested %"PRIu16" request queues, but the "
+			     "host doesn't support VHOST_USER_PROTOCOL_F_MQ. "
+			     "Only one request queue will be used.\n",
+			     vdev->name, vdev->max_queues - vdev->fixed_queues_num);
+		vdev->max_queues = 1 + vdev->fixed_queues_num;
+	}
+
+	/* negotiate the number of I/O queues. */
+	ret = dev->ops->send_request(dev, VHOST_USER_GET_QUEUE_NUM, &host_max_queues);
+	if (ret < 0) {
+		return ret;
+	}
+
+	if (vdev->max_queues > host_max_queues + vdev->fixed_queues_num) {
+		SPDK_WARNLOG("%s: requested %"PRIu16" request queues"
+			     "but only %"PRIu64" available\n",
+			     vdev->name, vdev->max_queues - vdev->fixed_queues_num,
+			     host_max_queues);
+		vdev->max_queues = host_max_queues;
+	}
+
+	/* tell vhost to create queues */
+	ret = virtio_user_queue_setup(vdev, virtio_user_create_queue);
+	if (ret < 0) {
+		return ret;
+	}
+
+	ret = virtio_user_register_mem(vdev);
+	if (ret < 0) {
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+virtio_user_stop_device(struct virtio_dev *vdev)
+{
+	int ret;
+
+	ret = virtio_user_queue_setup(vdev, virtio_user_stop_queue);
+	/* a queue might fail to stop for various reasons, e.g. socket
+	 * connection going down, but this mustn't prevent us from freeing
+	 * the mem map.
+	 */
+	virtio_user_unregister_mem(vdev);
+	return ret;
+}
+
+static int
+virtio_user_dev_setup(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	uint16_t i;
+
+	dev->vhostfd = -1;
+
+	for (i = 0; i < SPDK_VIRTIO_MAX_VIRTQUEUES; ++i) {
+		dev->callfds[i] = -1;
+		dev->kickfds[i] = -1;
+	}
+
+	dev->ops = &ops_user;
+
+	return dev->ops->setup(dev);
+}
+
+static int
+virtio_user_read_dev_config(struct virtio_dev *vdev, size_t offset,
+			    void *dst, int length)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	struct vhost_user_config cfg = {0};
+	int rc;
+
+	if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
+		return -ENOTSUP;
+	}
+
+	cfg.offset = 0;
+	cfg.size = VHOST_USER_MAX_CONFIG_SIZE;
+
+	rc = dev->ops->send_request(dev, VHOST_USER_GET_CONFIG, &cfg);
+	if (rc < 0) {
+		SPDK_ERRLOG("get_config failed: %s\n", spdk_strerror(-rc));
+		return rc;
+	}
+
+	memcpy(dst, cfg.region + offset, length);
+	return 0;
+}
+
+static int
+virtio_user_write_dev_config(struct virtio_dev *vdev, size_t offset,
+			     const void *src, int length)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	struct vhost_user_config cfg = {0};
+	int rc;
+
+	if ((dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) == 0) {
+		return -ENOTSUP;
+	}
+
+	cfg.offset = offset;
+	cfg.size = length;
+	memcpy(cfg.region, src, length);
+
+	rc = dev->ops->send_request(dev, VHOST_USER_SET_CONFIG, &cfg);
+	if (rc < 0) {
+		SPDK_ERRLOG("set_config failed: %s\n", spdk_strerror(-rc));
+		return rc;
+	}
+
+	return 0;
+}
+
+static void
+virtio_user_set_status(struct virtio_dev *vdev, uint8_t status)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	int rc = 0;
+
+	if ((dev->status & VIRTIO_CONFIG_S_NEEDS_RESET) &&
+	    status != VIRTIO_CONFIG_S_RESET) {
+		rc = -1;
+	} else if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+		rc = virtio_user_start_device(vdev);
+	} else if (status == VIRTIO_CONFIG_S_RESET &&
+		   (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+		rc = virtio_user_stop_device(vdev);
+	}
+
+	if (rc != 0) {
+		dev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;
+	} else {
+		dev->status = status;
+	}
+}
+
+static uint8_t
+virtio_user_get_status(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	return dev->status;
+}
+
+static uint64_t
+virtio_user_get_features(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	uint64_t features;
+	int rc;
+
+	rc = dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, &features);
+	if (rc < 0) {
+		SPDK_ERRLOG("get_features failed: %s\n", spdk_strerror(-rc));
+		return 0;
+	}
+
+	return features;
+}
+
+static int
+virtio_user_set_features(struct virtio_dev *vdev, uint64_t features)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	uint64_t protocol_features;
+	int ret;
+
+	ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features);
+	if (ret < 0) {
+		return ret;
+	}
+
+	vdev->negotiated_features = features;
+	vdev->modern = virtio_dev_has_feature(vdev, VIRTIO_F_VERSION_1);
+
+	if (!virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
+		/* nothing else to do */
+		return 0;
+	}
+
+	ret = dev->ops->send_request(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features);
+	if (ret < 0) {
+		return ret;
+	}
+
+	protocol_features &= VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES;
+	ret = dev->ops->send_request(dev, VHOST_USER_SET_PROTOCOL_FEATURES, &protocol_features);
+	if (ret < 0) {
+		return ret;
+	}
+
+	dev->protocol_features = protocol_features;
+	return 0;
+}
+
+static uint16_t
+virtio_user_get_queue_size(struct virtio_dev *vdev, uint16_t queue_id)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	/* Currently each queue has same queue size */
+	return dev->queue_size;
+}
+
+static int
+virtio_user_setup_queue(struct virtio_dev *vdev, struct virtqueue *vq)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+	struct vhost_vring_state state;
+	uint16_t queue_idx = vq->vq_queue_index;
+	void *queue_mem;
+	uint64_t desc_addr, avail_addr, used_addr;
+	int callfd, kickfd, rc;
+
+	if (dev->callfds[queue_idx] != -1 || dev->kickfds[queue_idx] != -1) {
+		SPDK_ERRLOG("queue %"PRIu16" already exists\n", queue_idx);
+		return -EEXIST;
+	}
+
+	/* May use invalid flag, but some backend uses kickfd and
+	 * callfd as criteria to judge if dev is alive. so finally we
+	 * use real event_fd.
+	 */
+	callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+	if (callfd < 0) {
+		SPDK_ERRLOG("callfd error, %s\n", spdk_strerror(errno));
+		return -errno;
+	}
+
+	kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+	if (kickfd < 0) {
+		SPDK_ERRLOG("kickfd error, %s\n", spdk_strerror(errno));
+		close(callfd);
+		return -errno;
+	}
+
+	queue_mem = spdk_dma_zmalloc(vq->vq_ring_size, VIRTIO_PCI_VRING_ALIGN, NULL);
+	if (queue_mem == NULL) {
+		close(kickfd);
+		close(callfd);
+		return -ENOMEM;
+	}
+
+	vq->vq_ring_mem = SPDK_VTOPHYS_ERROR;
+	vq->vq_ring_virt_mem = queue_mem;
+
+	state.index = vq->vq_queue_index;
+	state.num = 0;
+
+	if (virtio_dev_has_feature(vdev, VHOST_USER_F_PROTOCOL_FEATURES)) {
+		rc = dev->ops->send_request(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+		if (rc < 0) {
+			SPDK_ERRLOG("failed to send VHOST_USER_SET_VRING_ENABLE: %s\n",
+				    spdk_strerror(-rc));
+			spdk_dma_free(queue_mem);
+			return -rc;
+		}
+	}
+
+	dev->callfds[queue_idx] = callfd;
+	dev->kickfds[queue_idx] = kickfd;
+
+	desc_addr = (uintptr_t)vq->vq_ring_virt_mem;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+				   ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	dev->vrings[queue_idx].num = vq->vq_nentries;
+	dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr;
+	dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr;
+	dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr;
+
+	return 0;
+}
+
+static void
+virtio_user_del_queue(struct virtio_dev *vdev, struct virtqueue *vq)
+{
+	/* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU
+	 * correspondingly stops the ioeventfds, and reset the status of
+	 * the device.
+	 * For modern devices, set queue desc, avail, used in PCI bar to 0,
+	 * not see any more behavior in QEMU.
+	 *
+	 * Here we just care about what information to deliver to vhost-user.
+	 * So we just close ioeventfd for now.
+	 */
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	close(dev->callfds[vq->vq_queue_index]);
+	close(dev->kickfds[vq->vq_queue_index]);
+	dev->callfds[vq->vq_queue_index] = -1;
+	dev->kickfds[vq->vq_queue_index] = -1;
+
+	spdk_dma_free(vq->vq_ring_virt_mem);
+}
+
+static void
+virtio_user_notify_queue(struct virtio_dev *vdev, struct virtqueue *vq)
+{
+	uint64_t buf = 1;
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) {
+		SPDK_ERRLOG("failed to kick backend: %s.\n", spdk_strerror(errno));
+	}
+}
+
+static void
+virtio_user_destroy(struct virtio_dev *vdev)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	close(dev->vhostfd);
+	free(dev);
+}
+
+static void
+virtio_user_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	spdk_json_write_name(w, "type");
+	spdk_json_write_string(w, "user");
+
+	spdk_json_write_name(w, "socket");
+	spdk_json_write_string(w, dev->path);
+}
+
+static void
+virtio_user_write_json_config(struct virtio_dev *vdev, struct spdk_json_write_ctx *w)
+{
+	struct virtio_user_dev *dev = vdev->ctx;
+
+	spdk_json_write_named_string(w, "trtype", "user");
+	spdk_json_write_named_string(w, "traddr", dev->path);
+	spdk_json_write_named_uint32(w, "vq_count", vdev->max_queues - vdev->fixed_queues_num);
+	spdk_json_write_named_uint32(w, "vq_size", virtio_dev_backend_ops(vdev)->get_queue_size(vdev, 0));
+}
+
+static const struct virtio_dev_ops virtio_user_ops = {
+	.read_dev_cfg	= virtio_user_read_dev_config,
+	.write_dev_cfg	= virtio_user_write_dev_config,
+	.get_status	= virtio_user_get_status,
+	.set_status	= virtio_user_set_status,
+	.get_features	= virtio_user_get_features,
+	.set_features	= virtio_user_set_features,
+	.destruct_dev	= virtio_user_destroy,
+	.get_queue_size	= virtio_user_get_queue_size,
+	.setup_queue	= virtio_user_setup_queue,
+	.del_queue	= virtio_user_del_queue,
+	.notify_queue	= virtio_user_notify_queue,
+	.dump_json_info = virtio_user_dump_json_info,
+	.write_json_config = virtio_user_write_json_config,
+};
+
+int
+virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
+		     uint32_t queue_size)
+{
+	struct virtio_user_dev *dev;
+	int rc;
+
+	if (name == NULL) {
+		SPDK_ERRLOG("No name gived for controller: %s\n", path);
+		return -EINVAL;
+	}
+
+	dev = calloc(1, sizeof(*dev));
+	if (dev == NULL) {
+		return -ENOMEM;
+	}
+
+	rc = virtio_dev_construct(vdev, name, &virtio_user_ops, dev);
+	if (rc != 0) {
+		SPDK_ERRLOG("Failed to init device: %s\n", path);
+		free(dev);
+		return rc;
+	}
+
+	vdev->is_hw = 0;
+
+	snprintf(dev->path, PATH_MAX, "%s", path);
+	dev->queue_size = queue_size;
+
+	rc = virtio_user_dev_setup(vdev);
+	if (rc < 0) {
+		SPDK_ERRLOG("backend set up fails\n");
+		goto err;
+	}
+
+	rc = dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
+	if (rc < 0) {
+		SPDK_ERRLOG("set_owner fails: %s\n", spdk_strerror(-rc));
+		goto err;
+	}
+
+	return 0;
+
+err:
+	virtio_dev_destruct(vdev);
+	return rc;
+}
diff --git a/src/spdk/lib/virtio/virtio_user/vhost.h b/src/spdk/lib/virtio/virtio_user/vhost.h
new file mode 100644
index 00000000..0ac7c5b1
--- /dev/null
+++ b/src/spdk/lib/virtio/virtio_user/vhost.h
@@ -0,0 +1,113 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/vhost.h>
+
+#include "spdk_internal/log.h"
+#include "spdk_internal/virtio.h"
+
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
+enum vhost_user_request {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+	VHOST_USER_MAX
+};
+
+struct virtio_user_backend_ops;
+
+struct virtio_user_dev {
+	int		vhostfd;
+
+	int		callfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
+	int		kickfds[SPDK_VIRTIO_MAX_VIRTQUEUES];
+	uint32_t	queue_size;
+
+	uint8_t		status;
+	char		path[PATH_MAX];
+	uint64_t	protocol_features;
+	struct vring	vrings[SPDK_VIRTIO_MAX_VIRTQUEUES];
+	struct virtio_user_backend_ops *ops;
+	struct spdk_mem_map *mem_map;
+};
+
+struct virtio_user_backend_ops {
+	int (*setup)(struct virtio_user_dev *dev);
+	int (*send_request)(struct virtio_user_dev *dev,
+			    enum vhost_user_request req,
+			    void *arg);
+};
+
+/* get/set config msg */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+extern struct virtio_user_backend_ops ops_user;
+
+#endif
diff --git a/src/spdk/lib/virtio/virtio_user/vhost_user.c b/src/spdk/lib/virtio/virtio_user/vhost_user.c
new file mode 100644
index 00000000..46765af5
--- /dev/null
+++ b/src/spdk/lib/virtio/virtio_user/vhost_user.c
@@ -0,0 +1,518 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "vhost.h"
+
+#include "spdk/string.h"
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+/** Fixed-size vhost_memory struct */
+struct vhost_memory_padded {
+	uint32_t nregions;
+	uint32_t padding;
+	struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+	enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		struct vhost_memory_padded memory;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+	(sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+static int
+vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
+{
+	int r;
+	struct msghdr msgh;
+	struct iovec iov;
+	size_t fd_size = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+	struct cmsghdr *cmsg;
+
+	memset(&msgh, 0, sizeof(msgh));
+	memset(control, 0, sizeof(control));
+
+	iov.iov_base = (uint8_t *)buf;
+	iov.iov_len = len;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fd_size);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fd_size);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		r = sendmsg(fd, &msgh, 0);
+	} while (r < 0 && errno == EINTR);
+
+	if (r == -1) {
+		return -errno;
+	}
+
+	return 0;
+}
+
+static int
+vhost_user_read(int fd, struct vhost_user_msg *msg)
+{
+	uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
+	ssize_t ret;
+	size_t sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
+
+	ret = recv(fd, (void *)msg, sz_hdr, 0);
+	if ((size_t)ret != sz_hdr) {
+		SPDK_WARNLOG("Failed to recv msg hdr: %zd instead of %zu.\n",
+			     ret, sz_hdr);
+		if (ret == -1) {
+			return -errno;
+		} else {
+			return -EBUSY;
+		}
+	}
+
+	/* validate msg flags */
+	if (msg->flags != (valid_flags)) {
+		SPDK_WARNLOG("Failed to recv msg: flags %"PRIx32" instead of %"PRIx32".\n",
+			     msg->flags, valid_flags);
+		return -EIO;
+	}
+
+	sz_payload = msg->size;
+
+	if (sizeof(*msg) - sz_hdr < sz_payload) {
+		SPDK_WARNLOG("Received oversized msg: payload size %zu > available space %zu\n",
+			     sz_payload, sizeof(*msg) - sz_hdr);
+		return -EIO;
+	}
+
+	if (sz_payload) {
+		ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
+		if ((size_t)ret != sz_payload) {
+			SPDK_WARNLOG("Failed to recv msg payload: %zd instead of %"PRIu32".\n",
+				     ret, msg->size);
+			if (ret == -1) {
+				return -errno;
+			} else {
+				return -EBUSY;
+			}
+		}
+	}
+
+	return 0;
+}
+
+struct hugepage_file_info {
+	uint64_t addr;            /**< virtual addr */
+	size_t   size;            /**< the file size */
+	char     path[PATH_MAX];  /**< path to backing file */
+};
+
+/* Two possible options:
+ * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
+ * array. This is simple but cannot be used in secondary process because
+ * secondary process will close and munmap that file.
+ * 2. Match HUGEFILE_FMT to find hugepage files directly.
+ *
+ * We choose option 2.
+ */
+static int
+get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+{
+	int idx, rc;
+	FILE *f;
+	char buf[BUFSIZ], *tmp, *tail;
+	char *str_underline, *str_start;
+	int huge_index;
+	uint64_t v_start, v_end;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		SPDK_ERRLOG("cannot open /proc/self/maps\n");
+		rc = -errno;
+		assert(rc < 0); /* scan-build hack */
+		return rc;
+	}
+
+	idx = 0;
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
+			SPDK_ERRLOG("Failed to parse address\n");
+			rc = -EIO;
+			goto out;
+		}
+
+		tmp = strchr(buf, ' ') + 1; /** skip address */
+		tmp = strchr(tmp, ' ') + 1; /** skip perm */
+		tmp = strchr(tmp, ' ') + 1; /** skip offset */
+		tmp = strchr(tmp, ' ') + 1; /** skip dev */
+		tmp = strchr(tmp, ' ') + 1; /** skip inode */
+		while (*tmp == ' ') {       /** skip spaces */
+			tmp++;
+		}
+		tail = strrchr(tmp, '\n');  /** remove newline if exists */
+		if (tail) {
+			*tail = '\0';
+		}
+
+		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
+		 * which is defined in eal_filesystem.h
+		 */
+		str_underline = strrchr(tmp, '_');
+		if (!str_underline) {
+			continue;
+		}
+
+		str_start = str_underline - strlen("map");
+		if (str_start < tmp) {
+			continue;
+		}
+
+		if (sscanf(str_start, "map_%d", &huge_index) != 1) {
+			continue;
+		}
+
+		if (idx >= max) {
+			SPDK_ERRLOG("Exceed maximum of %d\n", max);
+			rc = -ENOSPC;
+			goto out;
+		}
+
+		if (idx > 0 &&
+		    strncmp(tmp, huges[idx - 1].path, PATH_MAX) == 0 &&
+		    v_start == huges[idx - 1].addr + huges[idx - 1].size) {
+			huges[idx - 1].size += (v_end - v_start);
+			continue;
+		}
+
+		huges[idx].addr = v_start;
+		huges[idx].size = v_end - v_start;
+		snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
+		idx++;
+	}
+
+	rc = idx;
+out:
+	fclose(f);
+	return rc;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+	int i, num;
+	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+
+	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
+	if (num < 0) {
+		SPDK_ERRLOG("Failed to prepare memory for vhost-user\n");
+		return num;
+	}
+
+	for (i = 0; i < num; ++i) {
+		/* the memory regions are unaligned */
+		msg->payload.memory.regions[i].guest_phys_addr = huges[i].addr; /* use vaddr! */
+		msg->payload.memory.regions[i].userspace_addr = huges[i].addr;
+		msg->payload.memory.regions[i].memory_size = huges[i].size;
+		msg->payload.memory.regions[i].flags_padding = 0;
+		fds[i] = open(huges[i].path, O_RDWR);
+	}
+
+	msg->payload.memory.nregions = num;
+	msg->payload.memory.padding = 0;
+
+	return 0;
+}
+
+static const char *const vhost_msg_strings[VHOST_USER_MAX] = {
+	[VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER",
+	[VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES",
+	[VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE",
+	[VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE",
+	[VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
+	[VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG",
+};
+
+static int
+vhost_user_sock(struct virtio_user_dev *dev,
+		enum vhost_user_request req,
+		void *arg)
+{
+	struct vhost_user_msg msg;
+	struct vhost_vring_file *file = 0;
+	int need_reply = 0;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num = 0;
+	int i, len, rc;
+	int vhostfd = dev->vhostfd;
+
+	SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_USER, "sent message %d = %s\n", req, vhost_msg_strings[req]);
+
+	msg.request = req;
+	msg.flags = VHOST_USER_VERSION;
+	msg.size = 0;
+
+	switch (req) {
+	case VHOST_USER_GET_FEATURES:
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+	case VHOST_USER_GET_QUEUE_NUM:
+		need_reply = 1;
+		break;
+
+	case VHOST_USER_SET_FEATURES:
+	case VHOST_USER_SET_LOG_BASE:
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		msg.payload.u64 = *((__u64 *)arg);
+		msg.size = sizeof(msg.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+	case VHOST_USER_RESET_OWNER:
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		rc = prepare_vhost_memory_user(&msg, fds);
+		if (rc < 0) {
+			return rc;
+		}
+		fd_num = msg.payload.memory.nregions;
+		msg.size = sizeof(msg.payload.memory.nregions);
+		msg.size += sizeof(msg.payload.memory.padding);
+		msg.size += fd_num * sizeof(struct vhost_memory_region);
+		break;
+
+	case VHOST_USER_SET_LOG_FD:
+		fds[fd_num++] = *((int *)arg);
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+	case VHOST_USER_SET_VRING_BASE:
+	case VHOST_USER_SET_VRING_ENABLE:
+		memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+		msg.size = sizeof(msg.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+		msg.size = sizeof(msg.payload.state);
+		need_reply = 1;
+		break;
+
+	case VHOST_USER_SET_VRING_ADDR:
+		memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
+		msg.size = sizeof(msg.payload.addr);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+	case VHOST_USER_SET_VRING_CALL:
+	case VHOST_USER_SET_VRING_ERR:
+		file = arg;
+		msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+		msg.size = sizeof(msg.payload.u64);
+		if (file->fd > 0) {
+			fds[fd_num++] = file->fd;
+		} else {
+			msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+		}
+		break;
+
+	case VHOST_USER_GET_CONFIG:
+		memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
+		msg.size = sizeof(msg.payload.cfg);
+		need_reply = 1;
+		break;
+
+	case VHOST_USER_SET_CONFIG:
+		memcpy(&msg.payload.cfg, arg, sizeof(msg.payload.cfg));
+		msg.size = sizeof(msg.payload.cfg);
+		break;
+
+	default:
+		SPDK_ERRLOG("trying to send unknown msg\n");
+		return -EINVAL;
+	}
+
+	len = VHOST_USER_HDR_SIZE + msg.size;
+	rc = vhost_user_write(vhostfd, &msg, len, fds, fd_num);
+	if (rc < 0) {
+		SPDK_ERRLOG("%s failed: %s\n",
+			    vhost_msg_strings[req], spdk_strerror(-rc));
+		return rc;
+	}
+
+	if (req == VHOST_USER_SET_MEM_TABLE)
+		for (i = 0; i < fd_num; ++i) {
+			close(fds[i]);
+		}
+
+	if (need_reply) {
+		rc = vhost_user_read(vhostfd, &msg);
+		if (rc < 0) {
+			SPDK_WARNLOG("Received msg failed: %s\n", spdk_strerror(-rc));
+			return rc;
+		}
+
+		if (req != msg.request) {
+			SPDK_WARNLOG("Received unexpected msg type\n");
+			return -EIO;
+		}
+
+		switch (req) {
+		case VHOST_USER_GET_FEATURES:
+		case VHOST_USER_GET_PROTOCOL_FEATURES:
+		case VHOST_USER_GET_QUEUE_NUM:
+			if (msg.size != sizeof(msg.payload.u64)) {
+				SPDK_WARNLOG("Received bad msg size\n");
+				return -EIO;
+			}
+			*((__u64 *)arg) = msg.payload.u64;
+			break;
+		case VHOST_USER_GET_VRING_BASE:
+			if (msg.size != sizeof(msg.payload.state)) {
+				SPDK_WARNLOG("Received bad msg size\n");
+				return -EIO;
+			}
+			memcpy(arg, &msg.payload.state,
+			       sizeof(struct vhost_vring_state));
+			break;
+		case VHOST_USER_GET_CONFIG:
+			if (msg.size != sizeof(msg.payload.cfg)) {
+				SPDK_WARNLOG("Received bad msg size\n");
+				return -EIO;
+			}
+			memcpy(arg, &msg.payload.cfg, sizeof(msg.payload.cfg));
+			break;
+		default:
+			SPDK_WARNLOG("Received unexpected msg type\n");
+			return -EBADMSG;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Set up environment to talk with a vhost user backend.
+ *
+ * @return
+ *   - (-1) if fail;
+ *   - (0) if succeed.
+ */
+static int
+vhost_user_setup(struct virtio_user_dev *dev)
+{
+	int fd;
+	int flag;
+	struct sockaddr_un un;
+	ssize_t rc;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0) {
+		SPDK_ERRLOG("socket() error, %s\n", spdk_strerror(errno));
+		return -errno;
+	}
+
+	flag = fcntl(fd, F_GETFD);
+	if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) {
+		SPDK_ERRLOG("fcntl failed, %s\n", spdk_strerror(errno));
+	}
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+	rc = snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
+	if (rc < 0 || (size_t)rc >= sizeof(un.sun_path)) {
+		SPDK_ERRLOG("socket path too long\n");
+		close(fd);
+		if (rc < 0) {
+			return -errno;
+		} else {
+			return -EINVAL;
+		}
+	}
+	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+		SPDK_ERRLOG("connect error, %s\n", spdk_strerror(errno));
+		close(fd);
+		return -errno;
+	}
+
+	dev->vhostfd = fd;
+	return 0;
+}
+
+struct virtio_user_backend_ops ops_user = {
+	.setup = vhost_user_setup,
+	.send_request = vhost_user_sock,
+};
+
+SPDK_LOG_REGISTER_COMPONENT("virtio_user", SPDK_LOG_VIRTIO_USER)
-- 
cgit v1.2.3