summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/bdev
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/lib/bdev')
-rw-r--r--src/spdk/lib/bdev/Makefile60
-rw-r--r--src/spdk/lib/bdev/aio/Makefile41
-rw-r--r--src/spdk/lib/bdev/aio/bdev_aio.c751
-rw-r--r--src/spdk/lib/bdev/aio/bdev_aio.h80
-rw-r--r--src/spdk/lib/bdev/aio/bdev_aio_rpc.c160
-rw-r--r--src/spdk/lib/bdev/bdev.c3950
-rw-r--r--src/spdk/lib/bdev/crypto/Makefile42
-rw-r--r--src/spdk/lib/bdev/crypto/vbdev_crypto.c1506
-rw-r--r--src/spdk/lib/bdev/crypto/vbdev_crypto.h66
-rw-r--r--src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c163
-rw-r--r--src/spdk/lib/bdev/error/Makefile40
-rw-r--r--src/spdk/lib/bdev/error/vbdev_error.c513
-rw-r--r--src/spdk/lib/bdev/error/vbdev_error.h76
-rw-r--r--src/spdk/lib/bdev/error/vbdev_error_rpc.c258
-rw-r--r--src/spdk/lib/bdev/gpt/Makefile40
-rw-r--r--src/spdk/lib/bdev/gpt/gpt.c239
-rw-r--r--src/spdk/lib/bdev/gpt/gpt.h62
-rw-r--r--src/spdk/lib/bdev/gpt/vbdev_gpt.c463
-rw-r--r--src/spdk/lib/bdev/iscsi/Makefile46
-rw-r--r--src/spdk/lib/bdev/iscsi/bdev_iscsi.c875
-rw-r--r--src/spdk/lib/bdev/iscsi/bdev_iscsi.h75
-rw-r--r--src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c173
-rw-r--r--src/spdk/lib/bdev/lvol/Makefile41
-rw-r--r--src/spdk/lib/bdev/lvol/vbdev_lvol.c1321
-rw-r--r--src/spdk/lib/bdev/lvol/vbdev_lvol.h120
-rw-r--r--src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c1089
-rw-r--r--src/spdk/lib/bdev/malloc/Makefile41
-rw-r--r--src/spdk/lib/bdev/malloc/bdev_malloc.c524
-rw-r--r--src/spdk/lib/bdev/malloc/bdev_malloc.h48
-rw-r--r--src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c170
-rw-r--r--src/spdk/lib/bdev/null/Makefile40
-rw-r--r--src/spdk/lib/bdev/null/bdev_null.c384
-rw-r--r--src/spdk/lib/bdev/null/bdev_null.h57
-rw-r--r--src/spdk/lib/bdev/null/bdev_null_rpc.c169
-rw-r--r--src/spdk/lib/bdev/nvme/Makefile40
-rw-r--r--src/spdk/lib/bdev/nvme/bdev_nvme.c1856
-rw-r--r--src/spdk/lib/bdev/nvme/bdev_nvme.h112
-rw-r--r--src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c740
-rw-r--r--src/spdk/lib/bdev/nvme/nvme_rpc.c487
-rw-r--r--src/spdk/lib/bdev/part.c373
-rw-r--r--src/spdk/lib/bdev/passthru/Makefile42
-rw-r--r--src/spdk/lib/bdev/passthru/vbdev_passthru.c671
-rw-r--r--src/spdk/lib/bdev/passthru/vbdev_passthru.h62
-rw-r--r--src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c160
-rw-r--r--src/spdk/lib/bdev/pmem/Makefile40
-rw-r--r--src/spdk/lib/bdev/pmem/bdev_pmem.c465
-rw-r--r--src/spdk/lib/bdev/pmem/bdev_pmem.h64
-rw-r--r--src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c350
-rw-r--r--src/spdk/lib/bdev/raid/Makefile41
-rw-r--r--src/spdk/lib/bdev/raid/bdev_raid.c1624
-rw-r--r--src/spdk/lib/bdev/raid/bdev_raid.h225
-rw-r--r--src/spdk/lib/bdev/raid/bdev_raid_rpc.c408
-rw-r--r--src/spdk/lib/bdev/rbd/Makefile40
-rw-r--r--src/spdk/lib/bdev/rbd/bdev_rbd.c740
-rw-r--r--src/spdk/lib/bdev/rbd/bdev_rbd.h55
-rw-r--r--src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c157
-rw-r--r--src/spdk/lib/bdev/rpc/Makefile40
-rw-r--r--src/spdk/lib/bdev/rpc/bdev_rpc.c587
-rw-r--r--src/spdk/lib/bdev/scsi_nvme.c261
-rw-r--r--src/spdk/lib/bdev/split/Makefile40
-rw-r--r--src/spdk/lib/bdev/split/vbdev_split.c565
-rw-r--r--src/spdk/lib/bdev/split/vbdev_split.h68
-rw-r--r--src/spdk/lib/bdev/split/vbdev_split_rpc.c151
-rw-r--r--src/spdk/lib/bdev/virtio/Makefile40
-rw-r--r--src/spdk/lib/bdev/virtio/bdev_virtio.h164
-rw-r--r--src/spdk/lib/bdev/virtio/bdev_virtio_blk.c707
-rw-r--r--src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c613
-rw-r--r--src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c2017
-rw-r--r--src/spdk/lib/bdev/vtune.c49
69 files changed, 27737 insertions, 0 deletions
diff --git a/src/spdk/lib/bdev/Makefile b/src/spdk/lib/bdev/Makefile
new file mode 100644
index 00000000..a5d30a9c
--- /dev/null
+++ b/src/spdk/lib/bdev/Makefile
@@ -0,0 +1,60 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+ifeq ($(CONFIG_VTUNE),y)
+CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify
+endif
+
+C_SRCS = bdev.c part.c scsi_nvme.c
+C_SRCS-$(CONFIG_VTUNE) += vtune.c
+LIBNAME = bdev
+
+DIRS-y += error gpt lvol malloc null nvme passthru raid rpc split
+
+ifeq ($(CONFIG_CRYPTO),y)
+DIRS-y += crypto
+endif
+
+ifeq ($(OS),Linux)
+DIRS-y += aio
+DIRS-$(CONFIG_ISCSI_INITIATOR) += iscsi
+DIRS-$(CONFIG_VIRTIO) += virtio
+DIRS-$(CONFIG_PMDK) += pmem
+endif
+
+DIRS-$(CONFIG_RBD) += rbd
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/aio/Makefile b/src/spdk/lib/bdev/aio/Makefile
new file mode 100644
index 00000000..7a39e3d2
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/Makefile
@@ -0,0 +1,41 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_aio.c bdev_aio_rpc.c
+LIBNAME = bdev_aio
+LOCAL_SYS_LIBS = -laio
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/aio/bdev_aio.c b/src/spdk/lib/bdev/aio/bdev_aio.c
new file mode 100644
index 00000000..bb0289ed
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio.c
@@ -0,0 +1,751 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_aio.h"
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/fd.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+static int bdev_aio_initialize(void);
+static void bdev_aio_fini(void);
+static void aio_free_disk(struct file_disk *fdisk);
+static void bdev_aio_get_spdk_running_config(FILE *fp);
+static TAILQ_HEAD(, file_disk) g_aio_disk_head;
+
+#define SPDK_AIO_QUEUE_DEPTH 128
+#define MAX_EVENTS_PER_POLL 32
+
+static int
+bdev_aio_get_ctx_size(void)
+{
+ return sizeof(struct bdev_aio_task);
+}
+
+static struct spdk_bdev_module aio_if = {
+ .name = "aio",
+ .module_init = bdev_aio_initialize,
+ .module_fini = bdev_aio_fini,
+ .config_text = bdev_aio_get_spdk_running_config,
+ .get_ctx_size = bdev_aio_get_ctx_size,
+};
+
+struct bdev_aio_group_channel {
+ struct spdk_poller *poller;
+ int epfd;
+};
+
+SPDK_BDEV_MODULE_REGISTER(&aio_if)
+
+static int
+bdev_aio_open(struct file_disk *disk)
+{
+ int fd;
+
+ fd = open(disk->filename, O_RDWR | O_DIRECT);
+ if (fd < 0) {
+ /* Try without O_DIRECT for non-disk files */
+ fd = open(disk->filename, O_RDWR);
+ if (fd < 0) {
+ SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
+ disk->filename, errno, spdk_strerror(errno));
+ disk->fd = -1;
+ return -1;
+ }
+ }
+
+ disk->fd = fd;
+
+ return 0;
+}
+
+static int
+bdev_aio_close(struct file_disk *disk)
+{
+ int rc;
+
+ if (disk->fd == -1) {
+ return 0;
+ }
+
+ rc = close(disk->fd);
+ if (rc < 0) {
+ SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
+ disk->fd, errno, spdk_strerror(errno));
+ return -1;
+ }
+
+ disk->fd = -1;
+
+ return 0;
+}
+
+static int64_t
+bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
+ struct bdev_aio_task *aio_task,
+ struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
+{
+ struct iocb *iocb = &aio_task->iocb;
+ struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
+ iocb->data = aio_task;
+ aio_task->len = nbytes;
+ io_set_eventfd(iocb, aio_ch->efd);
+
+ SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
+ iovcnt, nbytes, offset);
+
+ rc = io_submit(aio_ch->io_ctx, 1, &iocb);
+ if (rc < 0) {
+ if (rc == -EAGAIN) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
+ SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
+ }
+ return -1;
+ }
+ aio_ch->io_inflight++;
+ return nbytes;
+}
+
+static int64_t
+bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
+ struct bdev_aio_task *aio_task,
+ struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+ struct iocb *iocb = &aio_task->iocb;
+ struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
+ iocb->data = aio_task;
+ aio_task->len = len;
+ io_set_eventfd(iocb, aio_ch->efd);
+
+ SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
+ iovcnt, len, offset);
+
+ rc = io_submit(aio_ch->io_ctx, 1, &iocb);
+ if (rc < 0) {
+ if (rc == -EAGAIN) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
+ SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
+ }
+ return -1;
+ }
+ aio_ch->io_inflight++;
+ return len;
+}
+
+static void
+bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
+{
+ int rc = fsync(fdisk->fd);
+
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
+ rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static int
+bdev_aio_destruct(void *ctx)
+{
+ struct file_disk *fdisk = ctx;
+ int rc = 0;
+
+ TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
+ rc = bdev_aio_close(fdisk);
+ if (rc < 0) {
+ SPDK_ERRLOG("bdev_aio_close() failed\n");
+ }
+ return rc;
+}
+
+static int
+bdev_aio_initialize_io_channel(struct bdev_aio_io_channel *ch)
+{
+ ch->efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (ch->efd == -1) {
+ SPDK_ERRLOG("Cannot create efd\n");
+ return -1;
+ }
+
+ if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
+ close(ch->efd);
+ SPDK_ERRLOG("async I/O context setup failure\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+bdev_aio_group_poll(void *arg)
+{
+ struct bdev_aio_group_channel *group_ch = arg;
+ struct bdev_aio_io_channel *ch;
+ int nr, i, j, rc, total_nr = 0;
+ enum spdk_bdev_io_status status;
+ struct bdev_aio_task *aio_task;
+ struct timespec timeout;
+ struct io_event events[SPDK_AIO_QUEUE_DEPTH];
+ struct epoll_event epevents[MAX_EVENTS_PER_POLL];
+
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 0;
+ rc = epoll_wait(group_ch->epfd, epevents, MAX_EVENTS_PER_POLL, 0);
+ if (rc == -1) {
+ SPDK_ERRLOG("epoll_wait error(%d): %s on ch=%p\n", errno, spdk_strerror(errno), group_ch);
+ return -1;
+ }
+
+ for (j = 0; j < rc; j++) {
+ ch = epevents[j].data.ptr;
+ nr = io_getevents(ch->io_ctx, 1, SPDK_AIO_QUEUE_DEPTH,
+ events, &timeout);
+
+ if (nr < 0) {
+ SPDK_ERRLOG("Returned %d on bdev_aio_io_channel %p\n", nr, ch);
+ continue;
+ }
+
+ total_nr += nr;
+ for (i = 0; i < nr; i++) {
+ aio_task = events[i].data;
+ if (events[i].res != aio_task->len) {
+ status = SPDK_BDEV_IO_STATUS_FAILED;
+ } else {
+ status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ }
+
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
+ ch->io_inflight--;
+ }
+ }
+
+ return total_nr;
+}
+
+static void
+_bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
+
+ if (aio_ch->io_inflight) {
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static int bdev_aio_reset_retry_timer(void *arg);
+
+static void
+_bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
+
+ if (status == -1) {
+ fdisk->reset_retry_timer = spdk_poller_register(bdev_aio_reset_retry_timer, fdisk, 500);
+ return;
+ }
+
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static int
+bdev_aio_reset_retry_timer(void *arg)
+{
+ struct file_disk *fdisk = arg;
+
+ if (fdisk->reset_retry_timer) {
+ spdk_poller_unregister(&fdisk->reset_retry_timer);
+ }
+
+ spdk_for_each_channel(fdisk,
+ _bdev_aio_get_io_inflight,
+ fdisk,
+ _bdev_aio_get_io_inflight_done);
+
+ return -1;
+}
+
+static void
+bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
+{
+ fdisk->reset_task = aio_task;
+
+ bdev_aio_reset_retry_timer(fdisk);
+}
+
+static void bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct bdev_aio_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+}
+
+static int _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct bdev_aio_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
+ (struct bdev_aio_task *)bdev_io->driver_ctx);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
+ (struct bdev_aio_task *)bdev_io->driver_ctx);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static void bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_aio_create_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_aio_io_channel *ch = ctx_buf;
+ struct bdev_aio_group_channel *group_ch_ctx;
+ struct epoll_event epevent;
+
+ if (bdev_aio_initialize_io_channel(ch) != 0) {
+ return -1;
+ }
+
+ ch->group_ch = spdk_get_io_channel(&aio_if);
+ group_ch_ctx = spdk_io_channel_get_ctx(ch->group_ch);
+
+ epevent.events = EPOLLIN | EPOLLET;
+ epevent.data.ptr = ch;
+ if (epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_ADD, ch->efd, &epevent)) {
+ close(ch->efd);
+ io_destroy(ch->io_ctx);
+ spdk_put_io_channel(ch->group_ch);
+ SPDK_ERRLOG("epoll_ctl error\n");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_aio_io_channel *io_channel = ctx_buf;
+ struct bdev_aio_group_channel *group_ch_ctx;
+ struct epoll_event event;
+
+ group_ch_ctx = spdk_io_channel_get_ctx(io_channel->group_ch);
+ epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_DEL, io_channel->efd, &event);
+ spdk_put_io_channel(io_channel->group_ch);
+ close(io_channel->efd);
+ io_destroy(io_channel->io_ctx);
+
+}
+
+static struct spdk_io_channel *
+bdev_aio_get_io_channel(void *ctx)
+{
+ struct file_disk *fdisk = ctx;
+
+ return spdk_get_io_channel(fdisk);
+}
+
+
+static int
+bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct file_disk *fdisk = ctx;
+
+ spdk_json_write_name(w, "aio");
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "filename");
+ spdk_json_write_string(w, fdisk->filename);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct file_disk *fdisk = bdev->ctxt;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_aio_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ if (fdisk->block_size_override) {
+ spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+ }
+ spdk_json_write_named_string(w, "filename", fdisk->filename);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table aio_fn_table = {
+ .destruct = bdev_aio_destruct,
+ .submit_request = bdev_aio_submit_request,
+ .io_type_supported = bdev_aio_io_type_supported,
+ .get_io_channel = bdev_aio_get_io_channel,
+ .dump_info_json = bdev_aio_dump_info_json,
+ .write_config_json = bdev_aio_write_json_config,
+};
+
+static void aio_free_disk(struct file_disk *fdisk)
+{
+ if (fdisk == NULL) {
+ return;
+ }
+ free(fdisk->filename);
+ free(fdisk->disk.name);
+ free(fdisk);
+}
+
+static int
+bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_aio_group_channel *ch = ctx_buf;
+
+ ch->epfd = epoll_create1(0);
+ if (ch->epfd == -1) {
+ SPDK_ERRLOG("cannot create epoll fd\n");
+ return -1;
+ }
+
+ ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0);
+ return 0;
+}
+
+static void
+bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_aio_group_channel *ch = ctx_buf;
+
+ close(ch->epfd);
+ spdk_poller_unregister(&ch->poller);
+}
+
+struct spdk_bdev *
+create_aio_disk(const char *name, const char *filename, uint32_t block_size)
+{
+ struct file_disk *fdisk;
+ uint32_t detected_block_size;
+ uint64_t disk_size;
+ int rc;
+
+ fdisk = calloc(1, sizeof(*fdisk));
+ if (!fdisk) {
+ SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
+ return NULL;
+ }
+
+ fdisk->filename = strdup(filename);
+ if (!fdisk->filename) {
+ goto error_return;
+ }
+
+ if (bdev_aio_open(fdisk)) {
+ SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
+ goto error_return;
+ }
+
+ disk_size = spdk_fd_get_size(fdisk->fd);
+
+ fdisk->disk.name = strdup(name);
+ if (!fdisk->disk.name) {
+ goto error_return;
+ }
+ fdisk->disk.product_name = "AIO disk";
+ fdisk->disk.module = &aio_if;
+
+ fdisk->disk.need_aligned_buffer = 1;
+ fdisk->disk.write_cache = 1;
+
+ detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
+ if (block_size == 0) {
+ /* User did not specify block size - use autodetected block size. */
+ if (detected_block_size == 0) {
+ SPDK_ERRLOG("Block size could not be auto-detected\n");
+ goto error_return;
+ }
+ fdisk->block_size_override = false;
+ block_size = detected_block_size;
+ } else {
+ if (block_size < detected_block_size) {
+ SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
+ "auto-detected block size %" PRIu32 "\n",
+ block_size, detected_block_size);
+ goto error_return;
+ } else if (detected_block_size != 0 && block_size != detected_block_size) {
+ SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
+ "auto-detected block size %" PRIu32 "\n",
+ block_size, detected_block_size);
+ }
+ fdisk->block_size_override = true;
+ }
+
+ if (block_size < 512) {
+ SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
+ goto error_return;
+ }
+
+ if (!spdk_u32_is_pow2(block_size)) {
+ SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
+ goto error_return;
+ }
+
+ fdisk->disk.blocklen = block_size;
+
+ if (disk_size % fdisk->disk.blocklen != 0) {
+ SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
+ disk_size, fdisk->disk.blocklen);
+ goto error_return;
+ }
+
+ fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
+ fdisk->disk.ctxt = fdisk;
+
+ fdisk->disk.fn_table = &aio_fn_table;
+
+ spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
+ sizeof(struct bdev_aio_io_channel),
+ fdisk->disk.name);
+ rc = spdk_bdev_register(&fdisk->disk);
+ if (rc) {
+ spdk_io_device_unregister(fdisk, NULL);
+ goto error_return;
+ }
+
+ TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
+ return &fdisk->disk;
+
+error_return:
+ bdev_aio_close(fdisk);
+ aio_free_disk(fdisk);
+ return NULL;
+}
+
+static void
+aio_io_device_unregister_cb(void *io_device)
+{
+ struct file_disk *fdisk = io_device;
+ spdk_delete_aio_complete cb_fn = fdisk->delete_cb_fn;
+ void *cb_arg = fdisk->delete_cb_arg;
+
+ aio_free_disk(fdisk);
+ cb_fn(cb_arg, 0);
+}
+
+static void
+aio_bdev_unregister_cb(void *arg, int bdeverrno)
+{
+ struct file_disk *fdisk = arg;
+
+ if (bdeverrno != 0) {
+ fdisk->delete_cb_fn(fdisk->delete_cb_arg, bdeverrno);
+ return;
+ }
+
+ spdk_io_device_unregister(fdisk, aio_io_device_unregister_cb);
+}
+
+void
+delete_aio_disk(struct spdk_bdev *bdev, spdk_delete_aio_complete cb_fn, void *cb_arg)
+{
+ struct file_disk *fdisk;
+
+ if (!bdev || bdev->module != &aio_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ fdisk = bdev->ctxt;
+ fdisk->delete_cb_fn = cb_fn;
+ fdisk->delete_cb_arg = cb_arg;
+ spdk_bdev_unregister(bdev, aio_bdev_unregister_cb, fdisk);
+}
+
+static int
+bdev_aio_initialize(void)
+{
+ size_t i;
+ struct spdk_conf_section *sp;
+ struct spdk_bdev *bdev;
+
+ TAILQ_INIT(&g_aio_disk_head);
+ spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
+ sizeof(struct bdev_aio_group_channel),
+ "aio_module");
+
+ sp = spdk_conf_find_section(NULL, "AIO");
+ if (!sp) {
+ return 0;
+ }
+
+ i = 0;
+ while (true) {
+ const char *file;
+ const char *name;
+ const char *block_size_str;
+ uint32_t block_size = 0;
+
+ file = spdk_conf_section_get_nmval(sp, "AIO", i, 0);
+ if (!file) {
+ break;
+ }
+
+ name = spdk_conf_section_get_nmval(sp, "AIO", i, 1);
+ if (!name) {
+ SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file);
+ i++;
+ continue;
+ }
+
+ block_size_str = spdk_conf_section_get_nmval(sp, "AIO", i, 2);
+ if (block_size_str) {
+ block_size = atoi(block_size_str);
+ }
+
+ bdev = create_aio_disk(name, file, block_size);
+ if (!bdev) {
+ SPDK_ERRLOG("Unable to create AIO bdev from file %s\n", file);
+ i++;
+ continue;
+ }
+
+ i++;
+ }
+
+ return 0;
+}
+
+static void
+bdev_aio_fini(void)
+{
+ spdk_io_device_unregister(&aio_if, NULL);
+}
+
+static void
+bdev_aio_get_spdk_running_config(FILE *fp)
+{
+ char *file;
+ char *name;
+ uint32_t block_size;
+ struct file_disk *fdisk;
+
+ fprintf(fp,
+ "\n"
+ "# Users must change this section to match the /dev/sdX devices to be\n"
+ "# exported as iSCSI LUNs. The devices are accessed using Linux AIO.\n"
+ "# The format is:\n"
+ "# AIO <file name> <bdev name> [<block size>]\n"
+ "# The file name is the backing device\n"
+ "# The bdev name can be referenced from elsewhere in the configuration file.\n"
+ "# Block size may be omitted to automatically detect the block size of a disk.\n"
+ "[AIO]\n");
+
+ TAILQ_FOREACH(fdisk, &g_aio_disk_head, link) {
+ file = fdisk->filename;
+ name = fdisk->disk.name;
+ block_size = fdisk->disk.blocklen;
+ fprintf(fp, " AIO %s %s ", file, name);
+ if (fdisk->block_size_override) {
+ fprintf(fp, "%d", block_size);
+ }
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, "\n");
+}
+
+SPDK_LOG_REGISTER_COMPONENT("aio", SPDK_LOG_AIO)
diff --git a/src/spdk/lib/bdev/aio/bdev_aio.h b/src/spdk/lib/bdev/aio/bdev_aio.h
new file mode 100644
index 00000000..f58e9822
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio.h
@@ -0,0 +1,80 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_AIO_H
+#define SPDK_BDEV_AIO_H
+
+#include "spdk/stdinc.h"
+
+#include <libaio.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+
+#include "spdk/queue.h"
+#include "spdk/bdev.h"
+
+#include "spdk/bdev_module.h"
+
+struct bdev_aio_task {
+ struct iocb iocb;
+ uint64_t len;
+ TAILQ_ENTRY(bdev_aio_task) link;
+};
+
+struct bdev_aio_io_channel {
+ io_context_t io_ctx;
+ uint64_t io_inflight;
+ struct spdk_io_channel *group_ch;
+ TAILQ_ENTRY(bdev_aio_io_channel) link;
+ int efd;
+};
+
+typedef void (*spdk_delete_aio_complete)(void *cb_arg, int bdeverrno);
+
+struct file_disk {
+ struct bdev_aio_task *reset_task;
+ struct spdk_poller *reset_retry_timer;
+ struct spdk_bdev disk;
+ char *filename;
+ int fd;
+ TAILQ_ENTRY(file_disk) link;
+ bool block_size_override;
+ spdk_delete_aio_complete delete_cb_fn;
+ void *delete_cb_arg;
+};
+
+struct spdk_bdev *create_aio_disk(const char *name, const char *filename, uint32_t block_size);
+
+void delete_aio_disk(struct spdk_bdev *bdev, spdk_delete_aio_complete cb_fn, void *cb_arg);
+
+#endif // SPDK_BDEV_AIO_H
diff --git a/src/spdk/lib/bdev/aio/bdev_aio_rpc.c b/src/spdk/lib/bdev/aio/bdev_aio_rpc.c
new file mode 100644
index 00000000..10dd237a
--- /dev/null
+++ b/src/spdk/lib/bdev/aio/bdev_aio_rpc.c
@@ -0,0 +1,160 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_aio.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_aio {
+ char *name;
+ char *filename;
+ uint32_t block_size;
+};
+
+static void
+free_rpc_construct_aio(struct rpc_construct_aio *req)
+{
+ free(req->name);
+ free(req->filename);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_aio_decoders[] = {
+ {"name", offsetof(struct rpc_construct_aio, name), spdk_json_decode_string},
+ {"filename", offsetof(struct rpc_construct_aio, filename), spdk_json_decode_string, true},
+ {"block_size", offsetof(struct rpc_construct_aio, block_size), spdk_json_decode_uint32, true},
+};
+
+static void
+spdk_rpc_construct_aio_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_aio req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_construct_aio_decoders,
+ SPDK_COUNTOF(rpc_construct_aio_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.filename == NULL) {
+ goto invalid;
+ }
+
+ bdev = create_aio_disk(req.name, req.filename, req.block_size);
+ if (bdev == NULL) {
+ goto invalid;
+ }
+
+ free_rpc_construct_aio(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_construct_aio(&req);
+}
+SPDK_RPC_REGISTER("construct_aio_bdev", spdk_rpc_construct_aio_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_aio {
+ char *name;
+};
+
+static void
+free_rpc_delete_aio(struct rpc_delete_aio *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_aio_decoders[] = {
+ {"name", offsetof(struct rpc_delete_aio, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_aio_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_aio_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_aio req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_aio_decoders,
+ SPDK_COUNTOF(rpc_delete_aio_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_aio_disk(bdev, _spdk_rpc_delete_aio_bdev_cb, request);
+
+ free_rpc_delete_aio(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_aio(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_aio_bdev", spdk_rpc_delete_aio_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/bdev.c b/src/spdk/lib/bdev/bdev.c
new file mode 100644
index 00000000..ab82fffd
--- /dev/null
+++ b/src/spdk/lib/bdev/bdev.c
@@ -0,0 +1,3950 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+
+#include "spdk/config.h"
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/thread.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/util.h"
+#include "spdk/trace.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+
+#ifdef SPDK_CONFIG_VTUNE
+#include "ittnotify.h"
+#include "ittnotify_types.h"
+int __itt_init_ittlib(const char *, __itt_group_id);
+#endif
+
+#define SPDK_BDEV_IO_POOL_SIZE (64 * 1024)
+#define SPDK_BDEV_IO_CACHE_SIZE 256
+#define BUF_SMALL_POOL_SIZE 8192
+#define BUF_LARGE_POOL_SIZE 1024
+#define NOMEM_THRESHOLD_COUNT 8
+#define ZERO_BUFFER_SIZE 0x100000
+
+#define OWNER_BDEV 0x2
+
+#define OBJECT_BDEV_IO 0x2
+
+#define TRACE_GROUP_BDEV 0x3
+#define TRACE_BDEV_IO_START SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x0)
+#define TRACE_BDEV_IO_DONE SPDK_TPOINT_ID(TRACE_GROUP_BDEV, 0x1)
+
+#define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
+#define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
+#define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512
+#define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 10000
+#define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC (10 * 1024 * 1024)
+#define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED UINT64_MAX
+
+static const char *qos_conf_type[] = {"Limit_IOPS", "Limit_BPS"};
+static const char *qos_rpc_type[] = {"rw_ios_per_sec", "rw_mbytes_per_sec"};
+
+TAILQ_HEAD(spdk_bdev_list, spdk_bdev);
+
+struct spdk_bdev_mgr {
+ struct spdk_mempool *bdev_io_pool;
+
+ struct spdk_mempool *buf_small_pool;
+ struct spdk_mempool *buf_large_pool;
+
+ void *zero_buffer;
+
+ TAILQ_HEAD(bdev_module_list, spdk_bdev_module) bdev_modules;
+
+ struct spdk_bdev_list bdevs;
+
+ bool init_complete;
+ bool module_init_complete;
+
+#ifdef SPDK_CONFIG_VTUNE
+ __itt_domain *domain;
+#endif
+};
+
+static struct spdk_bdev_mgr g_bdev_mgr = {
+ .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
+ .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
+ .init_complete = false,
+ .module_init_complete = false,
+};
+
+static struct spdk_bdev_opts g_bdev_opts = {
+ .bdev_io_pool_size = SPDK_BDEV_IO_POOL_SIZE,
+ .bdev_io_cache_size = SPDK_BDEV_IO_CACHE_SIZE,
+};
+
+static spdk_bdev_init_cb g_init_cb_fn = NULL;
+static void *g_init_cb_arg = NULL;
+
+static spdk_bdev_fini_cb g_fini_cb_fn = NULL;
+static void *g_fini_cb_arg = NULL;
+static struct spdk_thread *g_fini_thread = NULL;
+
+struct spdk_bdev_qos_limit {
+ /** IOs or bytes allowed per second (i.e., 1s). */
+ uint64_t limit;
+
+ /** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms).
+ * For remaining bytes, allowed to run negative if an I/O is submitted when
+ * some bytes are remaining, but the I/O is bigger than that amount. The
+ * excess will be deducted from the next timeslice.
+ */
+ int64_t remaining_this_timeslice;
+
+ /** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
+ uint32_t min_per_timeslice;
+
+ /** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
+ uint32_t max_per_timeslice;
+};
+
+struct spdk_bdev_qos {
+ /** Types of structure of rate limits. */
+ struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+
+ /** The channel that all I/O are funneled through. */
+ struct spdk_bdev_channel *ch;
+
+ /** The thread on which the poller is running. */
+ struct spdk_thread *thread;
+
+ /** Queue of I/O waiting to be issued. */
+ bdev_io_tailq_t queued;
+
+ /** Size of a timeslice in tsc ticks. */
+ uint64_t timeslice_size;
+
+ /** Timestamp of start of last timeslice. */
+ uint64_t last_timeslice;
+
+ /** Poller that processes queued I/O commands each time slice. */
+ struct spdk_poller *poller;
+};
+
+struct spdk_bdev_mgmt_channel {
+ bdev_io_stailq_t need_buf_small;
+ bdev_io_stailq_t need_buf_large;
+
+ /*
+ * Each thread keeps a cache of bdev_io - this allows
+ * bdev threads which are *not* DPDK threads to still
+ * benefit from a per-thread bdev_io cache. Without
+ * this, non-DPDK threads fetching from the mempool
+ * incur a cmpxchg on get and put.
+ */
+ bdev_io_stailq_t per_thread_cache;
+ uint32_t per_thread_cache_count;
+ uint32_t bdev_io_cache_size;
+
+ TAILQ_HEAD(, spdk_bdev_shared_resource) shared_resources;
+ TAILQ_HEAD(, spdk_bdev_io_wait_entry) io_wait_queue;
+};
+
+/*
+ * Per-module (or per-io_device) data. Multiple bdevs built on the same io_device
+ * will queue here their IO that awaits retry. It makes it possible to retry sending
+ * IO to one bdev after IO from other bdev completes.
+ */
+struct spdk_bdev_shared_resource {
+ /* The bdev management channel */
+ struct spdk_bdev_mgmt_channel *mgmt_ch;
+
+ /*
+ * Count of I/O submitted to bdev module and waiting for completion.
+ * Incremented before submit_request() is called on an spdk_bdev_io.
+ */
+ uint64_t io_outstanding;
+
+ /*
+ * Queue of IO awaiting retry because of a previous NOMEM status returned
+ * on this channel.
+ */
+ bdev_io_tailq_t nomem_io;
+
+ /*
+ * Threshold which io_outstanding must drop to before retrying nomem_io.
+ */
+ uint64_t nomem_threshold;
+
+ /* I/O channel allocated by a bdev module */
+ struct spdk_io_channel *shared_ch;
+
+ /* Refcount of bdev channels using this resource */
+ uint32_t ref;
+
+ TAILQ_ENTRY(spdk_bdev_shared_resource) link;
+};
+
+#define BDEV_CH_RESET_IN_PROGRESS (1 << 0)
+#define BDEV_CH_QOS_ENABLED (1 << 1)
+
+struct spdk_bdev_channel {
+ struct spdk_bdev *bdev;
+
+ /* The channel for the underlying device */
+ struct spdk_io_channel *channel;
+
+ /* Per io_device per thread data */
+ struct spdk_bdev_shared_resource *shared_resource;
+
+ struct spdk_bdev_io_stat stat;
+
+ /*
+ * Count of I/O submitted through this channel and waiting for completion.
+ * Incremented before submit_request() is called on an spdk_bdev_io.
+ */
+ uint64_t io_outstanding;
+
+ bdev_io_tailq_t queued_resets;
+
+ uint32_t flags;
+
+#ifdef SPDK_CONFIG_VTUNE
+ uint64_t start_tsc;
+ uint64_t interval_tsc;
+ __itt_string_handle *handle;
+ struct spdk_bdev_io_stat prev_stat;
+#endif
+
+};
+
+struct spdk_bdev_desc {
+ struct spdk_bdev *bdev;
+ struct spdk_thread *thread;
+ spdk_bdev_remove_cb_t remove_cb;
+ void *remove_ctx;
+ bool remove_scheduled;
+ bool closed;
+ bool write;
+ TAILQ_ENTRY(spdk_bdev_desc) link;
+};
+
+struct spdk_bdev_iostat_ctx {
+ struct spdk_bdev_io_stat *stat;
+ spdk_bdev_get_device_stat_cb cb;
+ void *cb_arg;
+};
+
+#define __bdev_to_io_dev(bdev) (((char *)bdev) + 1)
+#define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1))
+
+static void _spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg);
+static void _spdk_bdev_write_zero_buffer_next(void *_bdev_io);
+
+void
+spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
+{
+ *opts = g_bdev_opts;
+}
+
+int
+spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
+{
+ uint32_t min_pool_size;
+
+ /*
+ * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
+ * initialization. A second mgmt_ch will be created on the same thread when the application starts
+ * but before the deferred put_io_channel event is executed for the first mgmt_ch.
+ */
+ min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
+ if (opts->bdev_io_pool_size < min_pool_size) {
+ SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
+ " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
+ spdk_thread_get_count());
+ SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
+ return -1;
+ }
+
+ g_bdev_opts = *opts;
+ return 0;
+}
+
+struct spdk_bdev *
+spdk_bdev_first(void)
+{
+ struct spdk_bdev *bdev;
+
+ bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
+ if (bdev) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
+ }
+
+ return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_next(struct spdk_bdev *prev)
+{
+ struct spdk_bdev *bdev;
+
+ bdev = TAILQ_NEXT(prev, internal.link);
+ if (bdev) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
+ }
+
+ return bdev;
+}
+
+static struct spdk_bdev *
+_bdev_next_leaf(struct spdk_bdev *bdev)
+{
+ while (bdev != NULL) {
+ if (bdev->internal.claim_module == NULL) {
+ return bdev;
+ } else {
+ bdev = TAILQ_NEXT(bdev, internal.link);
+ }
+ }
+
+ return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_first_leaf(void)
+{
+ struct spdk_bdev *bdev;
+
+ bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
+
+ if (bdev) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name);
+ }
+
+ return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_next_leaf(struct spdk_bdev *prev)
+{
+ struct spdk_bdev *bdev;
+
+ bdev = _bdev_next_leaf(TAILQ_NEXT(prev, internal.link));
+
+ if (bdev) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name);
+ }
+
+ return bdev;
+}
+
+struct spdk_bdev *
+spdk_bdev_get_by_name(const char *bdev_name)
+{
+ struct spdk_bdev_alias *tmp;
+ struct spdk_bdev *bdev = spdk_bdev_first();
+
+ while (bdev != NULL) {
+ if (strcmp(bdev_name, bdev->name) == 0) {
+ return bdev;
+ }
+
+ TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
+ if (strcmp(bdev_name, tmp->alias) == 0) {
+ return bdev;
+ }
+ }
+
+ bdev = spdk_bdev_next(bdev);
+ }
+
+ return NULL;
+}
+
+void
+spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
+{
+ struct iovec *iovs;
+
+ iovs = bdev_io->u.bdev.iovs;
+
+ assert(iovs != NULL);
+ assert(bdev_io->u.bdev.iovcnt >= 1);
+
+ iovs[0].iov_base = buf;
+ iovs[0].iov_len = len;
+}
+
+static void
+spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_mempool *pool;
+ struct spdk_bdev_io *tmp;
+ void *buf, *aligned_buf;
+ bdev_io_stailq_t *stailq;
+ struct spdk_bdev_mgmt_channel *ch;
+
+ assert(bdev_io->u.bdev.iovcnt == 1);
+
+ buf = bdev_io->internal.buf;
+ ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+ bdev_io->internal.buf = NULL;
+
+ if (bdev_io->internal.buf_len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
+ pool = g_bdev_mgr.buf_small_pool;
+ stailq = &ch->need_buf_small;
+ } else {
+ pool = g_bdev_mgr.buf_large_pool;
+ stailq = &ch->need_buf_large;
+ }
+
+ if (STAILQ_EMPTY(stailq)) {
+ spdk_mempool_put(pool, buf);
+ } else {
+ tmp = STAILQ_FIRST(stailq);
+
+ aligned_buf = (void *)(((uintptr_t)buf + 511) & ~511UL);
+ spdk_bdev_io_set_buf(tmp, aligned_buf, tmp->internal.buf_len);
+
+ STAILQ_REMOVE_HEAD(stailq, internal.buf_link);
+ tmp->internal.buf = buf;
+ tmp->internal.get_buf_cb(tmp->internal.ch->channel, tmp);
+ }
+}
+
+void
+spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
+{
+ struct spdk_mempool *pool;
+ bdev_io_stailq_t *stailq;
+ void *buf, *aligned_buf;
+ struct spdk_bdev_mgmt_channel *mgmt_ch;
+
+ assert(cb != NULL);
+ assert(bdev_io->u.bdev.iovs != NULL);
+
+ if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) {
+ /* Buffer already present */
+ cb(bdev_io->internal.ch->channel, bdev_io);
+ return;
+ }
+
+ assert(len <= SPDK_BDEV_LARGE_BUF_MAX_SIZE);
+ mgmt_ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+ bdev_io->internal.buf_len = len;
+ bdev_io->internal.get_buf_cb = cb;
+ if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) {
+ pool = g_bdev_mgr.buf_small_pool;
+ stailq = &mgmt_ch->need_buf_small;
+ } else {
+ pool = g_bdev_mgr.buf_large_pool;
+ stailq = &mgmt_ch->need_buf_large;
+ }
+
+ buf = spdk_mempool_get(pool);
+
+ if (!buf) {
+ STAILQ_INSERT_TAIL(stailq, bdev_io, internal.buf_link);
+ } else {
+ aligned_buf = (void *)(((uintptr_t)buf + 511) & ~511UL);
+ spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
+
+ bdev_io->internal.buf = buf;
+ bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io);
+ }
+}
+
+static int
+spdk_bdev_module_get_max_ctx_size(void)
+{
+ struct spdk_bdev_module *bdev_module;
+ int max_bdev_module_size = 0;
+
+ TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
+ max_bdev_module_size = bdev_module->get_ctx_size();
+ }
+ }
+
+ return max_bdev_module_size;
+}
+
+void
+spdk_bdev_config_text(FILE *fp)
+{
+ struct spdk_bdev_module *bdev_module;
+
+ TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (bdev_module->config_text) {
+ bdev_module->config_text(fp);
+ }
+ }
+}
+
+static void
+spdk_bdev_qos_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ int i;
+ struct spdk_bdev_qos *qos = bdev->internal.qos;
+ uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+
+ if (!qos) {
+ return;
+ }
+
+ spdk_bdev_get_qos_rate_limits(bdev, limits);
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "set_bdev_qos_limit");
+ spdk_json_write_name(w, "params");
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "name", bdev->name);
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] > 0) {
+ spdk_json_write_named_uint64(w, qos_rpc_type[i], limits[i]);
+ }
+ }
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+void
+spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
+{
+ struct spdk_bdev_module *bdev_module;
+ struct spdk_bdev *bdev;
+
+ assert(w != NULL);
+
+ spdk_json_write_array_begin(w);
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "set_bdev_options");
+ spdk_json_write_name(w, "params");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_uint32(w, "bdev_io_pool_size", g_bdev_opts.bdev_io_pool_size);
+ spdk_json_write_named_uint32(w, "bdev_io_cache_size", g_bdev_opts.bdev_io_cache_size);
+ spdk_json_write_object_end(w);
+ spdk_json_write_object_end(w);
+
+ TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (bdev_module->config_json) {
+ bdev_module->config_json(w);
+ }
+ }
+
+ TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, internal.link) {
+ spdk_bdev_qos_config_json(bdev, w);
+
+ if (bdev->fn_table->write_config_json) {
+ bdev->fn_table->write_config_json(bdev, w);
+ }
+ }
+
+ spdk_json_write_array_end(w);
+}
+
+static int
+spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_mgmt_channel *ch = ctx_buf;
+ struct spdk_bdev_io *bdev_io;
+ uint32_t i;
+
+ STAILQ_INIT(&ch->need_buf_small);
+ STAILQ_INIT(&ch->need_buf_large);
+
+ STAILQ_INIT(&ch->per_thread_cache);
+ ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;
+
+ /* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
+ ch->per_thread_cache_count = 0;
+ for (i = 0; i < ch->bdev_io_cache_size; i++) {
+ bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
+ assert(bdev_io != NULL);
+ ch->per_thread_cache_count++;
+ STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
+ }
+
+ TAILQ_INIT(&ch->shared_resources);
+ TAILQ_INIT(&ch->io_wait_queue);
+
+ return 0;
+}
+
+static void
+spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_mgmt_channel *ch = ctx_buf;
+ struct spdk_bdev_io *bdev_io;
+
+ if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) {
+ SPDK_ERRLOG("Pending I/O list wasn't empty on mgmt channel free\n");
+ }
+
+ if (!TAILQ_EMPTY(&ch->shared_resources)) {
+ SPDK_ERRLOG("Module channel list wasn't empty on mgmt channel free\n");
+ }
+
+ while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
+ bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
+ STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
+ ch->per_thread_cache_count--;
+ spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
+ }
+
+ assert(ch->per_thread_cache_count == 0);
+}
+
+static void
+spdk_bdev_init_complete(int rc)
+{
+ spdk_bdev_init_cb cb_fn = g_init_cb_fn;
+ void *cb_arg = g_init_cb_arg;
+ struct spdk_bdev_module *m;
+
+ g_bdev_mgr.init_complete = true;
+ g_init_cb_fn = NULL;
+ g_init_cb_arg = NULL;
+
+ /*
+ * For modules that need to know when subsystem init is complete,
+ * inform them now.
+ */
+ if (rc == 0) {
+ TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (m->init_complete) {
+ m->init_complete();
+ }
+ }
+ }
+
+ cb_fn(cb_arg, rc);
+}
+
+static void
+spdk_bdev_module_action_complete(void)
+{
+ struct spdk_bdev_module *m;
+
+ /*
+ * Don't finish bdev subsystem initialization if
+ * module pre-initialization is still in progress, or
+ * the subsystem been already initialized.
+ */
+ if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
+ return;
+ }
+
+ /*
+ * Check all bdev modules for inits/examinations in progress. If any
+ * exist, return immediately since we cannot finish bdev subsystem
+ * initialization until all are completed.
+ */
+ TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (m->internal.action_in_progress > 0) {
+ return;
+ }
+ }
+
+ /*
+ * Modules already finished initialization - now that all
+ * the bdev modules have finished their asynchronous I/O
+ * processing, the entire bdev layer can be marked as complete.
+ */
+ spdk_bdev_init_complete(0);
+}
+
+static void
+spdk_bdev_module_action_done(struct spdk_bdev_module *module)
+{
+ assert(module->internal.action_in_progress > 0);
+ module->internal.action_in_progress--;
+ spdk_bdev_module_action_complete();
+}
+
+void
+spdk_bdev_module_init_done(struct spdk_bdev_module *module)
+{
+ spdk_bdev_module_action_done(module);
+}
+
+void
+spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
+{
+ spdk_bdev_module_action_done(module);
+}
+
+/** The last initialized bdev module */
+static struct spdk_bdev_module *g_resume_bdev_module = NULL;
+
+static int
+spdk_bdev_modules_init(void)
+{
+ struct spdk_bdev_module *module;
+ int rc = 0;
+
+ TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ g_resume_bdev_module = module;
+ rc = module->module_init();
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ g_resume_bdev_module = NULL;
+ return 0;
+}
+
+
+static void
+spdk_bdev_init_failed_complete(void *cb_arg)
+{
+ spdk_bdev_init_complete(-1);
+}
+
+static void
+spdk_bdev_init_failed(void *cb_arg)
+{
+ spdk_bdev_finish(spdk_bdev_init_failed_complete, NULL);
+}
+
+void
+spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
+{
+ struct spdk_conf_section *sp;
+ struct spdk_bdev_opts bdev_opts;
+ int32_t bdev_io_pool_size, bdev_io_cache_size;
+ int cache_size;
+ int rc = 0;
+ char mempool_name[32];
+
+ assert(cb_fn != NULL);
+
+ sp = spdk_conf_find_section(NULL, "Bdev");
+ if (sp != NULL) {
+ spdk_bdev_get_opts(&bdev_opts);
+
+ bdev_io_pool_size = spdk_conf_section_get_intval(sp, "BdevIoPoolSize");
+ if (bdev_io_pool_size >= 0) {
+ bdev_opts.bdev_io_pool_size = bdev_io_pool_size;
+ }
+
+ bdev_io_cache_size = spdk_conf_section_get_intval(sp, "BdevIoCacheSize");
+ if (bdev_io_cache_size >= 0) {
+ bdev_opts.bdev_io_cache_size = bdev_io_cache_size;
+ }
+
+ if (spdk_bdev_set_opts(&bdev_opts)) {
+ spdk_bdev_init_complete(-1);
+ return;
+ }
+
+ assert(memcmp(&bdev_opts, &g_bdev_opts, sizeof(bdev_opts)) == 0);
+ }
+
+ g_init_cb_fn = cb_fn;
+ g_init_cb_arg = cb_arg;
+
+ snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
+
+ g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
+ g_bdev_opts.bdev_io_pool_size,
+ sizeof(struct spdk_bdev_io) +
+ spdk_bdev_module_get_max_ctx_size(),
+ 0,
+ SPDK_ENV_SOCKET_ID_ANY);
+
+ if (g_bdev_mgr.bdev_io_pool == NULL) {
+ SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
+ spdk_bdev_init_complete(-1);
+ return;
+ }
+
+ /**
+ * Ensure no more than half of the total buffers end up local caches, by
+ * using spdk_thread_get_count() to determine how many local caches we need
+ * to account for.
+ */
+ cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_thread_get_count());
+ snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid());
+
+ g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name,
+ BUF_SMALL_POOL_SIZE,
+ SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512,
+ cache_size,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!g_bdev_mgr.buf_small_pool) {
+ SPDK_ERRLOG("create rbuf small pool failed\n");
+ spdk_bdev_init_complete(-1);
+ return;
+ }
+
+ cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_thread_get_count());
+ snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid());
+
+ g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name,
+ BUF_LARGE_POOL_SIZE,
+ SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512,
+ cache_size,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!g_bdev_mgr.buf_large_pool) {
+ SPDK_ERRLOG("create rbuf large pool failed\n");
+ spdk_bdev_init_complete(-1);
+ return;
+ }
+
+ g_bdev_mgr.zero_buffer = spdk_dma_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
+ NULL);
+ if (!g_bdev_mgr.zero_buffer) {
+ SPDK_ERRLOG("create bdev zero buffer failed\n");
+ spdk_bdev_init_complete(-1);
+ return;
+ }
+
+#ifdef SPDK_CONFIG_VTUNE
+ g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
+#endif
+
+ spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create,
+ spdk_bdev_mgmt_channel_destroy,
+ sizeof(struct spdk_bdev_mgmt_channel),
+ "bdev_mgr");
+
+ rc = spdk_bdev_modules_init();
+ g_bdev_mgr.module_init_complete = true;
+ if (rc != 0) {
+ SPDK_ERRLOG("bdev modules init failed\n");
+ spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_init_failed, NULL);
+ return;
+ }
+
+ spdk_bdev_module_action_complete();
+}
+
+static void
+spdk_bdev_mgr_unregister_cb(void *io_device)
+{
+ spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
+
+ if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != g_bdev_opts.bdev_io_pool_size) {
+ SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
+ spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
+ g_bdev_opts.bdev_io_pool_size);
+ }
+
+ if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) {
+ SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n",
+ spdk_mempool_count(g_bdev_mgr.buf_small_pool),
+ BUF_SMALL_POOL_SIZE);
+ assert(false);
+ }
+
+ if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) {
+ SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n",
+ spdk_mempool_count(g_bdev_mgr.buf_large_pool),
+ BUF_LARGE_POOL_SIZE);
+ assert(false);
+ }
+
+ spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
+ spdk_mempool_free(g_bdev_mgr.buf_small_pool);
+ spdk_mempool_free(g_bdev_mgr.buf_large_pool);
+ spdk_dma_free(g_bdev_mgr.zero_buffer);
+
+ cb_fn(g_fini_cb_arg);
+ g_fini_cb_fn = NULL;
+ g_fini_cb_arg = NULL;
+ g_bdev_mgr.init_complete = false;
+ g_bdev_mgr.module_init_complete = false;
+}
+
+static void
+spdk_bdev_module_finish_iter(void *arg)
+{
+ struct spdk_bdev_module *bdev_module;
+
+ /* Start iterating from the last touched module */
+ if (!g_resume_bdev_module) {
+ bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
+ } else {
+ bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list,
+ internal.tailq);
+ }
+
+ while (bdev_module) {
+ if (bdev_module->async_fini) {
+ /* Save our place so we can resume later. We must
+ * save the variable here, before calling module_fini()
+ * below, because in some cases the module may immediately
+ * call spdk_bdev_module_finish_done() and re-enter
+ * this function to continue iterating. */
+ g_resume_bdev_module = bdev_module;
+ }
+
+ if (bdev_module->module_fini) {
+ bdev_module->module_fini();
+ }
+
+ if (bdev_module->async_fini) {
+ return;
+ }
+
+ bdev_module = TAILQ_PREV(bdev_module, bdev_module_list,
+ internal.tailq);
+ }
+
+ g_resume_bdev_module = NULL;
+ spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_mgr_unregister_cb);
+}
+
+void
+spdk_bdev_module_finish_done(void)
+{
+ if (spdk_get_thread() != g_fini_thread) {
+ spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL);
+ } else {
+ spdk_bdev_module_finish_iter(NULL);
+ }
+}
+
+static void
+_spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
+{
+ struct spdk_bdev *bdev = cb_arg;
+
+ if (bdeverrno && bdev) {
+ SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
+ bdev->name);
+
+ /*
+ * Since the call to spdk_bdev_unregister() failed, we have no way to free this
+ * bdev; try to continue by manually removing this bdev from the list and continue
+ * with the next bdev in the list.
+ */
+ TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
+ }
+
+ if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Done unregistering bdevs\n");
+ /*
+ * Bdev module finish need to be deffered as we might be in the middle of some context
+ * (like bdev part free) that will use this bdev (or private bdev driver ctx data)
+ * after returning.
+ */
+ spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_module_finish_iter, NULL);
+ return;
+ }
+
+ /*
+ * Unregister the last bdev in the list. The last bdev in the list should be a bdev
+ * that has no bdevs that depend on it.
+ */
+ bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Unregistering bdev '%s'\n", bdev->name);
+ spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev);
+}
+
+void
+spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
+{
+ struct spdk_bdev_module *m;
+
+ assert(cb_fn != NULL);
+
+ g_fini_thread = spdk_get_thread();
+
+ g_fini_cb_fn = cb_fn;
+ g_fini_cb_arg = cb_arg;
+
+ TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (m->fini_start) {
+ m->fini_start();
+ }
+ }
+
+ _spdk_bdev_finish_unregister_bdevs_iter(NULL, 0);
+}
+
+static struct spdk_bdev_io *
+spdk_bdev_get_io(struct spdk_bdev_channel *channel)
+{
+ struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch;
+ struct spdk_bdev_io *bdev_io;
+
+ if (ch->per_thread_cache_count > 0) {
+ bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
+ STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
+ ch->per_thread_cache_count--;
+ } else if (spdk_unlikely(!TAILQ_EMPTY(&ch->io_wait_queue))) {
+ /*
+ * Don't try to look for bdev_ios in the global pool if there are
+ * waiters on bdev_ios - we don't want this caller to jump the line.
+ */
+ bdev_io = NULL;
+ } else {
+ bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
+ }
+
+ return bdev_io;
+}
+
+void
+spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev_mgmt_channel *ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
+
+ assert(bdev_io != NULL);
+ assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING);
+
+ if (bdev_io->internal.buf != NULL) {
+ spdk_bdev_io_put_buf(bdev_io);
+ }
+
+ if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
+ ch->per_thread_cache_count++;
+ STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
+ while (ch->per_thread_cache_count > 0 && !TAILQ_EMPTY(&ch->io_wait_queue)) {
+ struct spdk_bdev_io_wait_entry *entry;
+
+ entry = TAILQ_FIRST(&ch->io_wait_queue);
+ TAILQ_REMOVE(&ch->io_wait_queue, entry, link);
+ entry->cb_fn(entry->cb_arg);
+ }
+ } else {
+ /* We should never have a full cache with entries on the io wait queue. */
+ assert(TAILQ_EMPTY(&ch->io_wait_queue));
+ spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
+ }
+}
+
+static bool
+_spdk_bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit)
+{
+ assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
+
+ switch (limit) {
+ case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
+ return true;
+ case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
+ return false;
+ case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
+ default:
+ return false;
+ }
+}
+
+static bool
+_spdk_bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t
+_spdk_bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return bdev_io->u.nvme_passthru.nbytes;
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return bdev_io->u.bdev.num_blocks * bdev->blocklen;
+ default:
+ return 0;
+ }
+}
+
+static void
+_spdk_bdev_qos_update_per_io(struct spdk_bdev_qos *qos, uint64_t io_size_in_byte)
+{
+ int i;
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ continue;
+ }
+
+ switch (i) {
+ case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
+ qos->rate_limits[i].remaining_this_timeslice--;
+ break;
+ case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
+ qos->rate_limits[i].remaining_this_timeslice -= io_size_in_byte;
+ break;
+ case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
+ default:
+ break;
+ }
+ }
+}
+
+static void
+_spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
+{
+ struct spdk_bdev_io *bdev_io = NULL;
+ struct spdk_bdev *bdev = ch->bdev;
+ struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
+ int i;
+ bool to_limit_io;
+ uint64_t io_size_in_byte;
+
+ while (!TAILQ_EMPTY(&qos->queued)) {
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (qos->rate_limits[i].max_per_timeslice > 0 &&
+ (qos->rate_limits[i].remaining_this_timeslice <= 0)) {
+ return;
+ }
+ }
+
+ bdev_io = TAILQ_FIRST(&qos->queued);
+ TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
+ ch->io_outstanding++;
+ shared_resource->io_outstanding++;
+ to_limit_io = _spdk_bdev_qos_io_to_limit(bdev_io);
+ if (to_limit_io == true) {
+ io_size_in_byte = _spdk_bdev_get_io_size_in_byte(bdev_io);
+ _spdk_bdev_qos_update_per_io(qos, io_size_in_byte);
+ }
+ bdev->fn_table->submit_request(ch->channel, bdev_io);
+ }
+}
+
+static void
+_spdk_bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb_fn)
+{
+ int rc;
+
+ bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
+ bdev_io->internal.waitq_entry.cb_fn = cb_fn;
+ bdev_io->internal.waitq_entry.cb_arg = bdev_io;
+ rc = spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
+ &bdev_io->internal.waitq_entry);
+ if (rc != 0) {
+ SPDK_ERRLOG("Queue IO failed, rc=%d\n", rc);
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+ }
+}
+
+static bool
+_spdk_bdev_io_type_can_split(uint8_t type)
+{
+ assert(type != SPDK_BDEV_IO_TYPE_INVALID);
+ assert(type < SPDK_BDEV_NUM_IO_TYPES);
+
+ /* Only split READ and WRITE I/O. Theoretically other types of I/O like
+ * UNMAP could be split, but these types of I/O are typically much larger
+ * in size (sometimes the size of the entire block device), and the bdev
+ * module can more efficiently split these types of I/O. Plus those types
+ * of I/O do not have a payload, which makes the splitting process simpler.
+ */
+ if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool
+_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
+{
+ uint64_t start_stripe, end_stripe;
+ uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
+
+ if (io_boundary == 0) {
+ return false;
+ }
+
+ if (!_spdk_bdev_io_type_can_split(bdev_io->type)) {
+ return false;
+ }
+
+ start_stripe = bdev_io->u.bdev.offset_blocks;
+ end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
+ /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
+ if (spdk_likely(spdk_u32_is_pow2(io_boundary))) {
+ start_stripe >>= spdk_u32log2(io_boundary);
+ end_stripe >>= spdk_u32log2(io_boundary);
+ } else {
+ start_stripe /= io_boundary;
+ end_stripe /= io_boundary;
+ }
+ return (start_stripe != end_stripe);
+}
+
+static uint32_t
+_to_next_boundary(uint64_t offset, uint32_t boundary)
+{
+ return (boundary - (offset % boundary));
+}
+
+static void
+_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+
+static void
+_spdk_bdev_io_split_with_payload(void *_bdev_io)
+{
+ struct spdk_bdev_io *bdev_io = _bdev_io;
+ uint64_t current_offset, remaining;
+ uint32_t blocklen, to_next_boundary, to_next_boundary_bytes;
+ struct iovec *parent_iov, *iov;
+ uint64_t parent_iov_offset, iov_len;
+ uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt;
+ int rc;
+
+ remaining = bdev_io->u.bdev.split_remaining_num_blocks;
+ current_offset = bdev_io->u.bdev.split_current_offset_blocks;
+ blocklen = bdev_io->bdev->blocklen;
+ parent_iov_offset = (current_offset - bdev_io->u.bdev.offset_blocks) * blocklen;
+ parent_iovcnt = bdev_io->u.bdev.iovcnt;
+
+ for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
+ parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
+ if (parent_iov_offset < parent_iov->iov_len) {
+ break;
+ }
+ parent_iov_offset -= parent_iov->iov_len;
+ }
+
+ child_iovcnt = 0;
+ while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
+ to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
+ to_next_boundary = spdk_min(remaining, to_next_boundary);
+ to_next_boundary_bytes = to_next_boundary * blocklen;
+ iov = &bdev_io->child_iov[child_iovcnt];
+ iovcnt = 0;
+ while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
+ child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
+ parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
+ iov_len = spdk_min(to_next_boundary_bytes, parent_iov->iov_len - parent_iov_offset);
+ to_next_boundary_bytes -= iov_len;
+
+ bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
+ bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
+
+ if (iov_len < parent_iov->iov_len - parent_iov_offset) {
+ parent_iov_offset += iov_len;
+ } else {
+ parent_iovpos++;
+ parent_iov_offset = 0;
+ }
+ child_iovcnt++;
+ iovcnt++;
+ }
+
+ if (to_next_boundary_bytes > 0) {
+ /* We had to stop this child I/O early because we ran out of
+ * child_iov space. Make sure the iovs collected are valid and
+ * then adjust to_next_boundary before starting the child I/O.
+ */
+ if ((to_next_boundary_bytes % blocklen) != 0) {
+ SPDK_ERRLOG("Remaining %" PRIu32 " is not multiple of block size %" PRIu32 "\n",
+ to_next_boundary_bytes, blocklen);
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ if (bdev_io->u.bdev.split_outstanding == 0) {
+ bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+ }
+ return;
+ }
+ to_next_boundary -= to_next_boundary_bytes / blocklen;
+ }
+
+ bdev_io->u.bdev.split_outstanding++;
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ rc = spdk_bdev_readv_blocks(bdev_io->internal.desc,
+ spdk_io_channel_from_ctx(bdev_io->internal.ch),
+ iov, iovcnt, current_offset, to_next_boundary,
+ _spdk_bdev_io_split_done, bdev_io);
+ } else {
+ rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
+ spdk_io_channel_from_ctx(bdev_io->internal.ch),
+ iov, iovcnt, current_offset, to_next_boundary,
+ _spdk_bdev_io_split_done, bdev_io);
+ }
+
+ if (rc == 0) {
+ current_offset += to_next_boundary;
+ remaining -= to_next_boundary;
+ bdev_io->u.bdev.split_current_offset_blocks = current_offset;
+ bdev_io->u.bdev.split_remaining_num_blocks = remaining;
+ } else {
+ bdev_io->u.bdev.split_outstanding--;
+ if (rc == -ENOMEM) {
+ if (bdev_io->u.bdev.split_outstanding == 0) {
+ /* No I/O is outstanding. Hence we should wait here. */
+ _spdk_bdev_queue_io_wait_with_cb(bdev_io,
+ _spdk_bdev_io_split_with_payload);
+ }
+ } else {
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ if (bdev_io->u.bdev.split_outstanding == 0) {
+ bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+ }
+ }
+
+ return;
+ }
+ }
+}
+
+static void
+_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *parent_io = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+ parent_io->u.bdev.split_outstanding--;
+ if (parent_io->u.bdev.split_outstanding != 0) {
+ return;
+ }
+
+ /*
+ * Parent I/O finishes when all blocks are consumed or there is any failure of
+ * child I/O and no outstanding child I/O.
+ */
+ if (parent_io->u.bdev.split_remaining_num_blocks == 0 ||
+ parent_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS) {
+ parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
+ parent_io->internal.caller_ctx);
+ return;
+ }
+
+ /*
+ * Continue with the splitting process. This function will complete the parent I/O if the
+ * splitting is done.
+ */
+ _spdk_bdev_io_split_with_payload(parent_io);
+}
+
+static void
+_spdk_bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ assert(_spdk_bdev_io_type_can_split(bdev_io->type));
+
+ bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
+ bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
+ bdev_io->u.bdev.split_outstanding = 0;
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ _spdk_bdev_io_split_with_payload(bdev_io);
+}
+
+static void
+_spdk_bdev_io_submit(void *ctx)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+ struct spdk_io_channel *ch = bdev_ch->channel;
+ struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+ uint64_t tsc;
+
+ tsc = spdk_get_ticks();
+ bdev_io->internal.submit_tsc = tsc;
+ spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type);
+ bdev_ch->io_outstanding++;
+ shared_resource->io_outstanding++;
+ bdev_io->internal.in_submit_request = true;
+ if (spdk_likely(bdev_ch->flags == 0)) {
+ if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
+ bdev->fn_table->submit_request(ch, bdev_io);
+ } else {
+ bdev_ch->io_outstanding--;
+ shared_resource->io_outstanding--;
+ TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
+ }
+ } else if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ } else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
+ bdev_ch->io_outstanding--;
+ shared_resource->io_outstanding--;
+ TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
+ _spdk_bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
+ } else {
+ SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ bdev_io->internal.in_submit_request = false;
+}
+
+static void
+spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_thread *thread = spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
+
+ assert(thread != NULL);
+ assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
+
+ if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ spdk_bdev_io_get_buf(bdev_io, _spdk_bdev_io_split,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ } else {
+ _spdk_bdev_io_split(NULL, bdev_io);
+ }
+ return;
+ }
+
+ if (bdev_io->internal.ch->flags & BDEV_CH_QOS_ENABLED) {
+ if ((thread == bdev->internal.qos->thread) || !bdev->internal.qos->thread) {
+ _spdk_bdev_io_submit(bdev_io);
+ } else {
+ bdev_io->internal.io_submit_ch = bdev_io->internal.ch;
+ bdev_io->internal.ch = bdev->internal.qos->ch;
+ spdk_thread_send_msg(bdev->internal.qos->thread, _spdk_bdev_io_submit, bdev_io);
+ }
+ } else {
+ _spdk_bdev_io_submit(bdev_io);
+ }
+}
+
+static void
+spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+ struct spdk_io_channel *ch = bdev_ch->channel;
+
+ assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
+
+ bdev_io->internal.in_submit_request = true;
+ bdev->fn_table->submit_request(ch, bdev_io);
+ bdev_io->internal.in_submit_request = false;
+}
+
+static void
+spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
+ struct spdk_bdev *bdev, void *cb_arg,
+ spdk_bdev_io_completion_cb cb)
+{
+ bdev_io->bdev = bdev;
+ bdev_io->internal.caller_ctx = cb_arg;
+ bdev_io->internal.cb = cb;
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
+ bdev_io->internal.in_submit_request = false;
+ bdev_io->internal.buf = NULL;
+ bdev_io->internal.io_submit_ch = NULL;
+}
+
+static bool
+_spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
+{
+ return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
+}
+
+bool
+spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
+{
+ bool supported;
+
+ supported = _spdk_bdev_io_type_supported(bdev, io_type);
+
+ if (!supported) {
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ /* The bdev layer will emulate write zeroes as long as write is supported. */
+ supported = _spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return supported;
+}
+
+int
+spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ if (bdev->fn_table->dump_info_json) {
+ return bdev->fn_table->dump_info_json(bdev->ctxt, w);
+ }
+
+ return 0;
+}
+
+static void
+spdk_bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
+{
+ uint32_t max_per_timeslice = 0;
+ int i;
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ qos->rate_limits[i].max_per_timeslice = 0;
+ continue;
+ }
+
+ max_per_timeslice = qos->rate_limits[i].limit *
+ SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC;
+
+ qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
+ qos->rate_limits[i].min_per_timeslice);
+
+ qos->rate_limits[i].remaining_this_timeslice = qos->rate_limits[i].max_per_timeslice;
+ }
+}
+
+static int
+spdk_bdev_channel_poll_qos(void *arg)
+{
+ struct spdk_bdev_qos *qos = arg;
+ uint64_t now = spdk_get_ticks();
+ int i;
+
+ if (now < (qos->last_timeslice + qos->timeslice_size)) {
+ /* We received our callback earlier than expected - return
+ * immediately and wait to do accounting until at least one
+ * timeslice has actually expired. This should never happen
+ * with a well-behaved timer implementation.
+ */
+ return 0;
+ }
+
+ /* Reset for next round of rate limiting */
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ /* We may have allowed the IOs or bytes to slightly overrun in the last
+ * timeslice. remaining_this_timeslice is signed, so if it's negative
+ * here, we'll account for the overrun so that the next timeslice will
+ * be appropriately reduced.
+ */
+ if (qos->rate_limits[i].remaining_this_timeslice > 0) {
+ qos->rate_limits[i].remaining_this_timeslice = 0;
+ }
+ }
+
+ while (now >= (qos->last_timeslice + qos->timeslice_size)) {
+ qos->last_timeslice += qos->timeslice_size;
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ qos->rate_limits[i].remaining_this_timeslice +=
+ qos->rate_limits[i].max_per_timeslice;
+ }
+ }
+
+ _spdk_bdev_qos_io_submit(qos->ch, qos);
+
+ return -1;
+}
+
+static void
+_spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
+{
+ struct spdk_bdev_shared_resource *shared_resource;
+
+ if (!ch) {
+ return;
+ }
+
+ if (ch->channel) {
+ spdk_put_io_channel(ch->channel);
+ }
+
+ assert(ch->io_outstanding == 0);
+
+ shared_resource = ch->shared_resource;
+ if (shared_resource) {
+ assert(ch->io_outstanding == 0);
+ assert(shared_resource->ref > 0);
+ shared_resource->ref--;
+ if (shared_resource->ref == 0) {
+ assert(shared_resource->io_outstanding == 0);
+ TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link);
+ spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch));
+ free(shared_resource);
+ }
+ }
+}
+
+/* Caller must hold bdev->internal.mutex. */
+static void
+_spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
+{
+ struct spdk_bdev_qos *qos = bdev->internal.qos;
+ int i;
+
+ /* Rate limiting on this bdev enabled */
+ if (qos) {
+ if (qos->ch == NULL) {
+ struct spdk_io_channel *io_ch;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch,
+ bdev->name, spdk_get_thread());
+
+ /* No qos channel has been selected, so set one up */
+
+ /* Take another reference to ch */
+ io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev));
+ qos->ch = ch;
+
+ qos->thread = spdk_io_channel_get_thread(io_ch);
+
+ TAILQ_INIT(&qos->queued);
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+ qos->rate_limits[i].min_per_timeslice =
+ SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE;
+ } else {
+ qos->rate_limits[i].min_per_timeslice =
+ SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE;
+ }
+
+ if (qos->rate_limits[i].limit == 0) {
+ qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+ }
+ }
+ spdk_bdev_qos_update_max_quota_per_timeslice(qos);
+ qos->timeslice_size =
+ SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
+ qos->last_timeslice = spdk_get_ticks();
+ qos->poller = spdk_poller_register(spdk_bdev_channel_poll_qos,
+ qos,
+ SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
+ }
+
+ ch->flags |= BDEV_CH_QOS_ENABLED;
+ }
+}
+
+static int
+spdk_bdev_channel_create(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
+ struct spdk_bdev_channel *ch = ctx_buf;
+ struct spdk_io_channel *mgmt_io_ch;
+ struct spdk_bdev_mgmt_channel *mgmt_ch;
+ struct spdk_bdev_shared_resource *shared_resource;
+
+ ch->bdev = bdev;
+ ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
+ if (!ch->channel) {
+ return -1;
+ }
+
+ mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
+ if (!mgmt_io_ch) {
+ return -1;
+ }
+
+ mgmt_ch = spdk_io_channel_get_ctx(mgmt_io_ch);
+ TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) {
+ if (shared_resource->shared_ch == ch->channel) {
+ spdk_put_io_channel(mgmt_io_ch);
+ shared_resource->ref++;
+ break;
+ }
+ }
+
+ if (shared_resource == NULL) {
+ shared_resource = calloc(1, sizeof(*shared_resource));
+ if (shared_resource == NULL) {
+ spdk_put_io_channel(mgmt_io_ch);
+ return -1;
+ }
+
+ shared_resource->mgmt_ch = mgmt_ch;
+ shared_resource->io_outstanding = 0;
+ TAILQ_INIT(&shared_resource->nomem_io);
+ shared_resource->nomem_threshold = 0;
+ shared_resource->shared_ch = ch->channel;
+ shared_resource->ref = 1;
+ TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link);
+ }
+
+ memset(&ch->stat, 0, sizeof(ch->stat));
+ ch->stat.ticks_rate = spdk_get_ticks_hz();
+ ch->io_outstanding = 0;
+ TAILQ_INIT(&ch->queued_resets);
+ ch->flags = 0;
+ ch->shared_resource = shared_resource;
+
+#ifdef SPDK_CONFIG_VTUNE
+ {
+ char *name;
+ __itt_init_ittlib(NULL, 0);
+ name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
+ if (!name) {
+ _spdk_bdev_channel_destroy_resource(ch);
+ return -1;
+ }
+ ch->handle = __itt_string_handle_create(name);
+ free(name);
+ ch->start_tsc = spdk_get_ticks();
+ ch->interval_tsc = spdk_get_ticks_hz() / 100;
+ memset(&ch->prev_stat, 0, sizeof(ch->prev_stat));
+ }
+#endif
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ _spdk_bdev_enable_qos(bdev, ch);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ return 0;
+}
+
+/*
+ * Abort I/O that are waiting on a data buffer. These types of I/O are
+ * linked using the spdk_bdev_io internal.buf_link TAILQ_ENTRY.
+ */
+static void
+_spdk_bdev_abort_buf_io(bdev_io_stailq_t *queue, struct spdk_bdev_channel *ch)
+{
+ bdev_io_stailq_t tmp;
+ struct spdk_bdev_io *bdev_io;
+
+ STAILQ_INIT(&tmp);
+
+ while (!STAILQ_EMPTY(queue)) {
+ bdev_io = STAILQ_FIRST(queue);
+ STAILQ_REMOVE_HEAD(queue, internal.buf_link);
+ if (bdev_io->internal.ch == ch) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ } else {
+ STAILQ_INSERT_TAIL(&tmp, bdev_io, internal.buf_link);
+ }
+ }
+
+ STAILQ_SWAP(&tmp, queue, spdk_bdev_io);
+}
+
+/*
+ * Abort I/O that are queued waiting for submission. These types of I/O are
+ * linked using the spdk_bdev_io link TAILQ_ENTRY.
+ */
+static void
+_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
+{
+ struct spdk_bdev_io *bdev_io, *tmp;
+
+ TAILQ_FOREACH_SAFE(bdev_io, queue, internal.link, tmp) {
+ if (bdev_io->internal.ch == ch) {
+ TAILQ_REMOVE(queue, bdev_io, internal.link);
+ /*
+ * spdk_bdev_io_complete() assumes that the completed I/O had
+ * been submitted to the bdev module. Since in this case it
+ * hadn't, bump io_outstanding to account for the decrement
+ * that spdk_bdev_io_complete() will do.
+ */
+ if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
+ ch->io_outstanding++;
+ ch->shared_resource->io_outstanding++;
+ }
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static void
+spdk_bdev_qos_channel_destroy(void *cb_arg)
+{
+ struct spdk_bdev_qos *qos = cb_arg;
+
+ spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
+ spdk_poller_unregister(&qos->poller);
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Free QoS %p.\n", qos);
+
+ free(qos);
+}
+
+static int
+spdk_bdev_qos_destroy(struct spdk_bdev *bdev)
+{
+ int i;
+
+ /*
+ * Cleanly shutting down the QoS poller is tricky, because
+ * during the asynchronous operation the user could open
+ * a new descriptor and create a new channel, spawning
+ * a new QoS poller.
+ *
+ * The strategy is to create a new QoS structure here and swap it
+ * in. The shutdown path then continues to refer to the old one
+ * until it completes and then releases it.
+ */
+ struct spdk_bdev_qos *new_qos, *old_qos;
+
+ old_qos = bdev->internal.qos;
+
+ new_qos = calloc(1, sizeof(*new_qos));
+ if (!new_qos) {
+ SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n");
+ return -ENOMEM;
+ }
+
+ /* Copy the old QoS data into the newly allocated structure */
+ memcpy(new_qos, old_qos, sizeof(*new_qos));
+
+ /* Zero out the key parts of the QoS structure */
+ new_qos->ch = NULL;
+ new_qos->thread = NULL;
+ new_qos->poller = NULL;
+ TAILQ_INIT(&new_qos->queued);
+ /*
+ * The limit member of spdk_bdev_qos_limit structure is not zeroed.
+ * It will be used later for the new QoS structure.
+ */
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ new_qos->rate_limits[i].remaining_this_timeslice = 0;
+ new_qos->rate_limits[i].min_per_timeslice = 0;
+ new_qos->rate_limits[i].max_per_timeslice = 0;
+ }
+
+ bdev->internal.qos = new_qos;
+
+ if (old_qos->thread == NULL) {
+ free(old_qos);
+ } else {
+ spdk_thread_send_msg(old_qos->thread, spdk_bdev_qos_channel_destroy,
+ old_qos);
+ }
+
+ /* It is safe to continue with destroying the bdev even though the QoS channel hasn't
+ * been destroyed yet. The destruction path will end up waiting for the final
+ * channel to be put before it releases resources. */
+
+ return 0;
+}
+
+static void
+_spdk_bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
+{
+ total->bytes_read += add->bytes_read;
+ total->num_read_ops += add->num_read_ops;
+ total->bytes_written += add->bytes_written;
+ total->num_write_ops += add->num_write_ops;
+ total->read_latency_ticks += add->read_latency_ticks;
+ total->write_latency_ticks += add->write_latency_ticks;
+}
+
+static void
+spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_channel *ch = ctx_buf;
+ struct spdk_bdev_mgmt_channel *mgmt_ch;
+ struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
+ spdk_get_thread());
+
+ /* This channel is going away, so add its statistics into the bdev so that they don't get lost. */
+ pthread_mutex_lock(&ch->bdev->internal.mutex);
+ _spdk_bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat);
+ pthread_mutex_unlock(&ch->bdev->internal.mutex);
+
+ mgmt_ch = shared_resource->mgmt_ch;
+
+ _spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
+ _spdk_bdev_abort_queued_io(&shared_resource->nomem_io, ch);
+ _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_small, ch);
+ _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_large, ch);
+
+ _spdk_bdev_channel_destroy_resource(ch);
+}
+
+int
+spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
+{
+ struct spdk_bdev_alias *tmp;
+
+ if (alias == NULL) {
+ SPDK_ERRLOG("Empty alias passed\n");
+ return -EINVAL;
+ }
+
+ if (spdk_bdev_get_by_name(alias)) {
+ SPDK_ERRLOG("Bdev name/alias: %s already exists\n", alias);
+ return -EEXIST;
+ }
+
+ tmp = calloc(1, sizeof(*tmp));
+ if (tmp == NULL) {
+ SPDK_ERRLOG("Unable to allocate alias\n");
+ return -ENOMEM;
+ }
+
+ tmp->alias = strdup(alias);
+ if (tmp->alias == NULL) {
+ free(tmp);
+ SPDK_ERRLOG("Unable to allocate alias\n");
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
+
+ return 0;
+}
+
+int
+spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
+{
+ struct spdk_bdev_alias *tmp;
+
+ TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
+ if (strcmp(alias, tmp->alias) == 0) {
+ TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
+ free(tmp->alias);
+ free(tmp);
+ return 0;
+ }
+ }
+
+ SPDK_INFOLOG(SPDK_LOG_BDEV, "Alias %s does not exists\n", alias);
+
+ return -ENOENT;
+}
+
+void
+spdk_bdev_alias_del_all(struct spdk_bdev *bdev)
+{
+ struct spdk_bdev_alias *p, *tmp;
+
+ TAILQ_FOREACH_SAFE(p, &bdev->aliases, tailq, tmp) {
+ TAILQ_REMOVE(&bdev->aliases, p, tailq);
+ free(p->alias);
+ free(p);
+ }
+}
+
+struct spdk_io_channel *
+spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
+{
+ return spdk_get_io_channel(__bdev_to_io_dev(desc->bdev));
+}
+
+const char *
+spdk_bdev_get_name(const struct spdk_bdev *bdev)
+{
+ return bdev->name;
+}
+
+const char *
+spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
+{
+ return bdev->product_name;
+}
+
+const struct spdk_bdev_aliases_list *
+spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
+{
+ return &bdev->aliases;
+}
+
+uint32_t
+spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
+{
+ return bdev->blocklen;
+}
+
+uint64_t
+spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
+{
+ return bdev->blockcnt;
+}
+
+const char *
+spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type)
+{
+ return qos_rpc_type[type];
+}
+
+void
+spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
+{
+ int i;
+
+ memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ if (bdev->internal.qos) {
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (bdev->internal.qos->rate_limits[i].limit !=
+ SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ limits[i] = bdev->internal.qos->rate_limits[i].limit;
+ if (_spdk_bdev_qos_is_iops_rate_limit(i) == false) {
+ /* Change from Byte to Megabyte which is user visible. */
+ limits[i] = limits[i] / 1024 / 1024;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+size_t
+spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
+{
+ /* TODO: push this logic down to the bdev modules */
+ if (bdev->need_aligned_buffer) {
+ return bdev->blocklen;
+ }
+
+ return 1;
+}
+
+uint32_t
+spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
+{
+ return bdev->optimal_io_boundary;
+}
+
+bool
+spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
+{
+ return bdev->write_cache;
+}
+
+const struct spdk_uuid *
+spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
+{
+ return &bdev->uuid;
+}
+
+uint64_t
+spdk_bdev_get_qd(const struct spdk_bdev *bdev)
+{
+ return bdev->internal.measured_queue_depth;
+}
+
+uint64_t
+spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev)
+{
+ return bdev->internal.period;
+}
+
+uint64_t
+spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev)
+{
+ return bdev->internal.weighted_io_time;
+}
+
+uint64_t
+spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
+{
+ return bdev->internal.io_time;
+}
+
+static void
+_calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
+
+ bdev->internal.measured_queue_depth = bdev->internal.temporary_queue_depth;
+
+ if (bdev->internal.measured_queue_depth) {
+ bdev->internal.io_time += bdev->internal.period;
+ bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
+ }
+}
+
+static void
+_calculate_measured_qd(struct spdk_io_channel_iter *i)
+{
+ struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch);
+
+ bdev->internal.temporary_queue_depth += ch->io_outstanding;
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static int
+spdk_bdev_calculate_measured_queue_depth(void *ctx)
+{
+ struct spdk_bdev *bdev = ctx;
+ bdev->internal.temporary_queue_depth = 0;
+ spdk_for_each_channel(__bdev_to_io_dev(bdev), _calculate_measured_qd, bdev,
+ _calculate_measured_qd_cpl);
+ return 0;
+}
+
+void
+spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
+{
+ bdev->internal.period = period;
+
+ if (bdev->internal.qd_poller != NULL) {
+ spdk_poller_unregister(&bdev->internal.qd_poller);
+ bdev->internal.measured_queue_depth = UINT64_MAX;
+ }
+
+ if (period != 0) {
+ bdev->internal.qd_poller = spdk_poller_register(spdk_bdev_calculate_measured_queue_depth, bdev,
+ period);
+ }
+}
+
+int
+spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
+{
+ int ret;
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+
+ /* bdev has open descriptors */
+ if (!TAILQ_EMPTY(&bdev->internal.open_descs) &&
+ bdev->blockcnt > size) {
+ ret = -EBUSY;
+ } else {
+ bdev->blockcnt = size;
+ ret = 0;
+ }
+
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ return ret;
+}
+
+/*
+ * Convert I/O offset and length from bytes to blocks.
+ *
+ * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
+ */
+static uint64_t
+spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks,
+ uint64_t num_bytes, uint64_t *num_blocks)
+{
+ uint32_t block_size = bdev->blocklen;
+
+ *offset_blocks = offset_bytes / block_size;
+ *num_blocks = num_bytes / block_size;
+
+ return (offset_bytes % block_size) | (num_bytes % block_size);
+}
+
+static bool
+spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
+{
+ /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
+ * has been an overflow and hence the offset has been wrapped around */
+ if (offset_blocks + num_blocks < offset_blocks) {
+ return false;
+ }
+
+ /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
+ if (offset_blocks + num_blocks > bdev->blockcnt) {
+ return false;
+ }
+
+ return true;
+}
+
+int
+spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
+ bdev_io->u.bdev.iovs = &bdev_io->iov;
+ bdev_io->u.bdev.iovs[0].iov_base = buf;
+ bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
+ bdev_io->u.bdev.iovcnt = 1;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
+ bdev_io->u.bdev.iovs = iov;
+ bdev_io->u.bdev.iovcnt = iovcnt;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *buf, uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
+ bdev_io->u.bdev.iovs = &bdev_io->iov;
+ bdev_io->u.bdev.iovs[0].iov_base = buf;
+ bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen;
+ bdev_io->u.bdev.iovcnt = 1;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset, uint64_t len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
+ bdev_io->u.bdev.iovs = iov;
+ bdev_io->u.bdev.iovcnt = iovcnt;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+ } else if (_spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE)) {
+ assert(spdk_bdev_get_block_size(bdev) <= ZERO_BUFFER_SIZE);
+ bdev_io->u.bdev.split_remaining_num_blocks = num_blocks;
+ bdev_io->u.bdev.split_current_offset_blocks = offset_blocks;
+ _spdk_bdev_write_zero_buffer_next(bdev_io);
+ return 0;
+ } else {
+ spdk_bdev_free_io(bdev_io);
+ return -ENOTSUP;
+ }
+}
+
+int
+spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ if (num_blocks == 0) {
+ SPDK_ERRLOG("Can't unmap 0 bytes\n");
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
+
+ bdev_io->u.bdev.iovs = &bdev_io->iov;
+ bdev_io->u.bdev.iovs[0].iov_base = NULL;
+ bdev_io->u.bdev.iovs[0].iov_len = 0;
+ bdev_io->u.bdev.iovcnt = 1;
+
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset, uint64_t length,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ uint64_t offset_blocks, num_blocks;
+
+ if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) {
+ return -EINVAL;
+ }
+
+ return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
+}
+
+int
+spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ uint64_t offset_blocks, uint64_t num_blocks,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
+ return -EINVAL;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
+ bdev_io->u.bdev.iovs = NULL;
+ bdev_io->u.bdev.iovcnt = 0;
+ bdev_io->u.bdev.offset_blocks = offset_blocks;
+ bdev_io->u.bdev.num_blocks = num_blocks;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+static void
+_spdk_bdev_reset_dev(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_bdev_channel *ch = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_bdev_io *bdev_io;
+
+ bdev_io = TAILQ_FIRST(&ch->queued_resets);
+ TAILQ_REMOVE(&ch->queued_resets, bdev_io, internal.link);
+ spdk_bdev_io_submit_reset(bdev_io);
+}
+
+static void
+_spdk_bdev_reset_freeze_channel(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *ch;
+ struct spdk_bdev_channel *channel;
+ struct spdk_bdev_mgmt_channel *mgmt_channel;
+ struct spdk_bdev_shared_resource *shared_resource;
+ bdev_io_tailq_t tmp_queued;
+
+ TAILQ_INIT(&tmp_queued);
+
+ ch = spdk_io_channel_iter_get_channel(i);
+ channel = spdk_io_channel_get_ctx(ch);
+ shared_resource = channel->shared_resource;
+ mgmt_channel = shared_resource->mgmt_ch;
+
+ channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
+
+ if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
+ /* The QoS object is always valid and readable while
+ * the channel flag is set, so the lock here should not
+ * be necessary. We're not in the fast path though, so
+ * just take it anyway. */
+ pthread_mutex_lock(&channel->bdev->internal.mutex);
+ if (channel->bdev->internal.qos->ch == channel) {
+ TAILQ_SWAP(&channel->bdev->internal.qos->queued, &tmp_queued, spdk_bdev_io, internal.link);
+ }
+ pthread_mutex_unlock(&channel->bdev->internal.mutex);
+ }
+
+ _spdk_bdev_abort_queued_io(&shared_resource->nomem_io, channel);
+ _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
+ _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
+ _spdk_bdev_abort_queued_io(&tmp_queued, channel);
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_start_reset(void *ctx)
+{
+ struct spdk_bdev_channel *ch = ctx;
+
+ spdk_for_each_channel(__bdev_to_io_dev(ch->bdev), _spdk_bdev_reset_freeze_channel,
+ ch, _spdk_bdev_reset_dev);
+}
+
+static void
+_spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
+{
+ struct spdk_bdev *bdev = ch->bdev;
+
+ assert(!TAILQ_EMPTY(&ch->queued_resets));
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ if (bdev->internal.reset_in_progress == NULL) {
+ bdev->internal.reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
+ /*
+ * Take a channel reference for the target bdev for the life of this
+ * reset. This guards against the channel getting destroyed while
+ * spdk_for_each_channel() calls related to this reset IO are in
+ * progress. We will release the reference when this reset is
+ * completed.
+ */
+ bdev->internal.reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
+ _spdk_bdev_start_reset(ch);
+ }
+ pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+int
+spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
+ bdev_io->u.reset.ch_ref = NULL;
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, internal.link);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ _spdk_bdev_channel_start_reset(channel);
+
+ return 0;
+}
+
+void
+spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+ struct spdk_bdev_io_stat *stat)
+{
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ *stat = channel->stat;
+}
+
+static void
+_spdk_bdev_get_device_stat_done(struct spdk_io_channel_iter *i, int status)
+{
+ void *io_device = spdk_io_channel_iter_get_io_device(i);
+ struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
+
+ bdev_iostat_ctx->cb(__bdev_from_io_dev(io_device), bdev_iostat_ctx->stat,
+ bdev_iostat_ctx->cb_arg, 0);
+ free(bdev_iostat_ctx);
+}
+
+static void
+_spdk_bdev_get_each_channel_stat(struct spdk_io_channel_iter *i)
+{
+ struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ _spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &channel->stat);
+ spdk_for_each_channel_continue(i, 0);
+}
+
+void
+spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
+ spdk_bdev_get_device_stat_cb cb, void *cb_arg)
+{
+ struct spdk_bdev_iostat_ctx *bdev_iostat_ctx;
+
+ assert(bdev != NULL);
+ assert(stat != NULL);
+ assert(cb != NULL);
+
+ bdev_iostat_ctx = calloc(1, sizeof(struct spdk_bdev_iostat_ctx));
+ if (bdev_iostat_ctx == NULL) {
+ SPDK_ERRLOG("Unable to allocate memory for spdk_bdev_iostat_ctx\n");
+ cb(bdev, stat, cb_arg, -ENOMEM);
+ return;
+ }
+
+ bdev_iostat_ctx->stat = stat;
+ bdev_iostat_ctx->cb = cb;
+ bdev_iostat_ctx->cb_arg = cb_arg;
+
+ /* Start with the statistics from previously deleted channels. */
+ pthread_mutex_lock(&bdev->internal.mutex);
+ _spdk_bdev_io_stat_add(bdev_iostat_ctx->stat, &bdev->internal.stat);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ /* Then iterate and add the statistics from each existing channel. */
+ spdk_for_each_channel(__bdev_to_io_dev(bdev),
+ _spdk_bdev_get_each_channel_stat,
+ bdev_iostat_ctx,
+ _spdk_bdev_get_device_stat_done);
+}
+
+int
+spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ return -EBADF;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
+ bdev_io->u.nvme_passthru.cmd = *cmd;
+ bdev_io->u.nvme_passthru.buf = buf;
+ bdev_io->u.nvme_passthru.nbytes = nbytes;
+ bdev_io->u.nvme_passthru.md_buf = NULL;
+ bdev_io->u.nvme_passthru.md_len = 0;
+
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ /*
+ * Do not try to parse the NVMe command - we could maybe use bits in the opcode
+ * to easily determine if the command is a read or write, but for now just
+ * do not allow io_passthru with a read-only descriptor.
+ */
+ return -EBADF;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
+ bdev_io->u.nvme_passthru.cmd = *cmd;
+ bdev_io->u.nvme_passthru.buf = buf;
+ bdev_io->u.nvme_passthru.nbytes = nbytes;
+ bdev_io->u.nvme_passthru.md_buf = NULL;
+ bdev_io->u.nvme_passthru.md_len = 0;
+
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ if (!desc->write) {
+ /*
+ * Do not try to parse the NVMe command - we could maybe use bits in the opcode
+ * to easily determine if the command is a read or write, but for now just
+ * do not allow io_passthru with a read-only descriptor.
+ */
+ return -EBADF;
+ }
+
+ bdev_io = spdk_bdev_get_io(channel);
+ if (!bdev_io) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
+ bdev_io->u.nvme_passthru.cmd = *cmd;
+ bdev_io->u.nvme_passthru.buf = buf;
+ bdev_io->u.nvme_passthru.nbytes = nbytes;
+ bdev_io->u.nvme_passthru.md_buf = md_buf;
+ bdev_io->u.nvme_passthru.md_len = md_len;
+
+ spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ spdk_bdev_io_submit(bdev_io);
+ return 0;
+}
+
+int
+spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
+ struct spdk_bdev_io_wait_entry *entry)
+{
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+ struct spdk_bdev_mgmt_channel *mgmt_ch = channel->shared_resource->mgmt_ch;
+
+ if (bdev != entry->bdev) {
+ SPDK_ERRLOG("bdevs do not match\n");
+ return -EINVAL;
+ }
+
+ if (mgmt_ch->per_thread_cache_count > 0) {
+ SPDK_ERRLOG("Cannot queue io_wait if spdk_bdev_io available in per-thread cache\n");
+ return -EINVAL;
+ }
+
+ TAILQ_INSERT_TAIL(&mgmt_ch->io_wait_queue, entry, link);
+ return 0;
+}
+
+static void
+_spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
+{
+ struct spdk_bdev *bdev = bdev_ch->bdev;
+ struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+ struct spdk_bdev_io *bdev_io;
+
+ if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
+ /*
+ * Allow some more I/O to complete before retrying the nomem_io queue.
+ * Some drivers (such as nvme) cannot immediately take a new I/O in
+ * the context of a completion, because the resources for the I/O are
+ * not released until control returns to the bdev poller. Also, we
+ * may require several small I/O to complete before a larger I/O
+ * (that requires splitting) can be submitted.
+ */
+ return;
+ }
+
+ while (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
+ bdev_io = TAILQ_FIRST(&shared_resource->nomem_io);
+ TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, internal.link);
+ bdev_io->internal.ch->io_outstanding++;
+ shared_resource->io_outstanding++;
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
+ bdev->fn_table->submit_request(bdev_io->internal.ch->channel, bdev_io);
+ if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
+ break;
+ }
+ }
+}
+
+static inline void
+_spdk_bdev_io_complete(void *ctx)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ uint64_t tsc;
+
+ if (spdk_unlikely(bdev_io->internal.in_submit_request || bdev_io->internal.io_submit_ch)) {
+ /*
+ * Send the completion to the thread that originally submitted the I/O,
+ * which may not be the current thread in the case of QoS.
+ */
+ if (bdev_io->internal.io_submit_ch) {
+ bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
+ bdev_io->internal.io_submit_ch = NULL;
+ }
+
+ /*
+ * Defer completion to avoid potential infinite recursion if the
+ * user's completion callback issues a new I/O.
+ */
+ spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->internal.ch->channel),
+ _spdk_bdev_io_complete, bdev_io);
+ return;
+ }
+
+ tsc = spdk_get_ticks();
+ spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)bdev_io, 0);
+
+ if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ bdev_io->internal.ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+ bdev_io->internal.ch->stat.num_read_ops++;
+ bdev_io->internal.ch->stat.read_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_io->internal.ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+ bdev_io->internal.ch->stat.num_write_ops++;
+ bdev_io->internal.ch->stat.write_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ default:
+ break;
+ }
+ }
+
+#ifdef SPDK_CONFIG_VTUNE
+ uint64_t now_tsc = spdk_get_ticks();
+ if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
+ uint64_t data[5];
+
+ data[0] = bdev_io->internal.ch->stat.num_read_ops - bdev_io->internal.ch->prev_stat.num_read_ops;
+ data[1] = bdev_io->internal.ch->stat.bytes_read - bdev_io->internal.ch->prev_stat.bytes_read;
+ data[2] = bdev_io->internal.ch->stat.num_write_ops - bdev_io->internal.ch->prev_stat.num_write_ops;
+ data[3] = bdev_io->internal.ch->stat.bytes_written - bdev_io->internal.ch->prev_stat.bytes_written;
+ data[4] = bdev_io->bdev->fn_table->get_spin_time ?
+ bdev_io->bdev->fn_table->get_spin_time(bdev_io->internal.ch->channel) : 0;
+
+ __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->internal.ch->handle,
+ __itt_metadata_u64, 5, data);
+
+ bdev_io->internal.ch->prev_stat = bdev_io->internal.ch->stat;
+ bdev_io->internal.ch->start_tsc = now_tsc;
+ }
+#endif
+
+ assert(bdev_io->internal.cb != NULL);
+ assert(spdk_get_thread() == spdk_io_channel_get_thread(bdev_io->internal.ch->channel));
+
+ bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
+ bdev_io->internal.caller_ctx);
+}
+
+static void
+_spdk_bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
+
+ if (bdev_io->u.reset.ch_ref != NULL) {
+ spdk_put_io_channel(bdev_io->u.reset.ch_ref);
+ bdev_io->u.reset.ch_ref = NULL;
+ }
+
+ _spdk_bdev_io_complete(bdev_io);
+}
+
+static void
+_spdk_bdev_unfreeze_channel(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch);
+
+ ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
+ if (!TAILQ_EMPTY(&ch->queued_resets)) {
+ _spdk_bdev_channel_start_reset(ch);
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+void
+spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
+ struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
+
+ bdev_io->internal.status = status;
+
+ if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
+ bool unlock_channels = false;
+
+ if (status == SPDK_BDEV_IO_STATUS_NOMEM) {
+ SPDK_ERRLOG("NOMEM returned for reset\n");
+ }
+ pthread_mutex_lock(&bdev->internal.mutex);
+ if (bdev_io == bdev->internal.reset_in_progress) {
+ bdev->internal.reset_in_progress = NULL;
+ unlock_channels = true;
+ }
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ if (unlock_channels) {
+ spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_unfreeze_channel,
+ bdev_io, _spdk_bdev_reset_complete);
+ return;
+ }
+ } else {
+ assert(bdev_ch->io_outstanding > 0);
+ assert(shared_resource->io_outstanding > 0);
+ bdev_ch->io_outstanding--;
+ shared_resource->io_outstanding--;
+
+ if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) {
+ TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
+ /*
+ * Wait for some of the outstanding I/O to complete before we
+ * retry any of the nomem_io. Normally we will wait for
+ * NOMEM_THRESHOLD_COUNT I/O to complete but for low queue
+ * depth channels we will instead wait for half to complete.
+ */
+ shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2,
+ (int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT);
+ return;
+ }
+
+ if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
+ _spdk_bdev_ch_retry_io(bdev_ch);
+ }
+ }
+
+ _spdk_bdev_io_complete(bdev_io);
+}
+
+void
+spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
+ enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
+{
+ if (sc == SPDK_SCSI_STATUS_GOOD) {
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ } else {
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
+ bdev_io->internal.error.scsi.sc = sc;
+ bdev_io->internal.error.scsi.sk = sk;
+ bdev_io->internal.error.scsi.asc = asc;
+ bdev_io->internal.error.scsi.ascq = ascq;
+ }
+
+ spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
+}
+
+void
+spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
+ int *sc, int *sk, int *asc, int *ascq)
+{
+ assert(sc != NULL);
+ assert(sk != NULL);
+ assert(asc != NULL);
+ assert(ascq != NULL);
+
+ switch (bdev_io->internal.status) {
+ case SPDK_BDEV_IO_STATUS_SUCCESS:
+ *sc = SPDK_SCSI_STATUS_GOOD;
+ *sk = SPDK_SCSI_SENSE_NO_SENSE;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_BDEV_IO_STATUS_NVME_ERROR:
+ spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
+ break;
+ case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
+ *sc = bdev_io->internal.error.scsi.sc;
+ *sk = bdev_io->internal.error.scsi.sk;
+ *asc = bdev_io->internal.error.scsi.asc;
+ *ascq = bdev_io->internal.error.scsi.ascq;
+ break;
+ default:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ }
+}
+
+void
+spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc)
+{
+ if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) {
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ } else {
+ bdev_io->internal.error.nvme.sct = sct;
+ bdev_io->internal.error.nvme.sc = sc;
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
+ }
+
+ spdk_bdev_io_complete(bdev_io, bdev_io->internal.status);
+}
+
+void
+spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc)
+{
+ assert(sct != NULL);
+ assert(sc != NULL);
+
+ if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
+ *sct = bdev_io->internal.error.nvme.sct;
+ *sc = bdev_io->internal.error.nvme.sc;
+ } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+ *sct = SPDK_NVME_SCT_GENERIC;
+ *sc = SPDK_NVME_SC_SUCCESS;
+ } else {
+ *sct = SPDK_NVME_SCT_GENERIC;
+ *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
+ }
+}
+
+struct spdk_thread *
+spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
+{
+ return spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
+}
+
+static void
+_spdk_bdev_qos_config_limit(struct spdk_bdev *bdev, uint64_t *limits)
+{
+ uint64_t min_qos_set;
+ int i;
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ break;
+ }
+ }
+
+ if (i == SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
+ SPDK_ERRLOG("Invalid rate limits set.\n");
+ return;
+ }
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ continue;
+ }
+
+ if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+ min_qos_set = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
+ } else {
+ min_qos_set = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
+ }
+
+ if (limits[i] == 0 || limits[i] % min_qos_set) {
+ SPDK_ERRLOG("Assigned limit %" PRIu64 " on bdev %s is not multiple of %" PRIu64 "\n",
+ limits[i], bdev->name, min_qos_set);
+ SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name);
+ return;
+ }
+ }
+
+ if (!bdev->internal.qos) {
+ bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
+ if (!bdev->internal.qos) {
+ SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
+ return;
+ }
+ }
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ bdev->internal.qos->rate_limits[i].limit = limits[i];
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS type:%d set:%lu\n",
+ bdev->name, i, limits[i]);
+ }
+
+ return;
+}
+
+static void
+_spdk_bdev_qos_config(struct spdk_bdev *bdev)
+{
+ struct spdk_conf_section *sp = NULL;
+ const char *val = NULL;
+ int i = 0, j = 0;
+ uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES] = {};
+ bool config_qos = false;
+
+ sp = spdk_conf_find_section(NULL, "QoS");
+ if (!sp) {
+ return;
+ }
+
+ while (j < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES) {
+ limits[j] = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+
+ i = 0;
+ while (true) {
+ val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 0);
+ if (!val) {
+ break;
+ }
+
+ if (strcmp(bdev->name, val) != 0) {
+ i++;
+ continue;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, qos_conf_type[j], i, 1);
+ if (val) {
+ if (_spdk_bdev_qos_is_iops_rate_limit(j) == true) {
+ limits[j] = strtoull(val, NULL, 10);
+ } else {
+ limits[j] = strtoull(val, NULL, 10) * 1024 * 1024;
+ }
+ config_qos = true;
+ }
+
+ break;
+ }
+
+ j++;
+ }
+
+ if (config_qos == true) {
+ _spdk_bdev_qos_config_limit(bdev, limits);
+ }
+
+ return;
+}
+
+static int
+spdk_bdev_init(struct spdk_bdev *bdev)
+{
+ char *bdev_name;
+
+ assert(bdev->module != NULL);
+
+ if (!bdev->name) {
+ SPDK_ERRLOG("Bdev name is NULL\n");
+ return -EINVAL;
+ }
+
+ if (spdk_bdev_get_by_name(bdev->name)) {
+ SPDK_ERRLOG("Bdev name:%s already exists\n", bdev->name);
+ return -EEXIST;
+ }
+
+ /* Users often register their own I/O devices using the bdev name. In
+ * order to avoid conflicts, prepend bdev_. */
+ bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);
+ if (!bdev_name) {
+ SPDK_ERRLOG("Unable to allocate memory for internal bdev name.\n");
+ return -ENOMEM;
+ }
+
+ bdev->internal.status = SPDK_BDEV_STATUS_READY;
+ bdev->internal.measured_queue_depth = UINT64_MAX;
+ bdev->internal.claim_module = NULL;
+ bdev->internal.qd_poller = NULL;
+ bdev->internal.qos = NULL;
+
+ TAILQ_INIT(&bdev->internal.open_descs);
+
+ TAILQ_INIT(&bdev->aliases);
+
+ bdev->internal.reset_in_progress = NULL;
+
+ _spdk_bdev_qos_config(bdev);
+
+ spdk_io_device_register(__bdev_to_io_dev(bdev),
+ spdk_bdev_channel_create, spdk_bdev_channel_destroy,
+ sizeof(struct spdk_bdev_channel),
+ bdev_name);
+
+ free(bdev_name);
+
+ pthread_mutex_init(&bdev->internal.mutex, NULL);
+ return 0;
+}
+
+static void
+spdk_bdev_destroy_cb(void *io_device)
+{
+ int rc;
+ struct spdk_bdev *bdev;
+ spdk_bdev_unregister_cb cb_fn;
+ void *cb_arg;
+
+ bdev = __bdev_from_io_dev(io_device);
+ cb_fn = bdev->internal.unregister_cb;
+ cb_arg = bdev->internal.unregister_ctx;
+
+ rc = bdev->fn_table->destruct(bdev->ctxt);
+ if (rc < 0) {
+ SPDK_ERRLOG("destruct failed\n");
+ }
+ if (rc <= 0 && cb_fn != NULL) {
+ cb_fn(cb_arg, rc);
+ }
+}
+
+
+static void
+spdk_bdev_fini(struct spdk_bdev *bdev)
+{
+ pthread_mutex_destroy(&bdev->internal.mutex);
+
+ free(bdev->internal.qos);
+
+ spdk_io_device_unregister(__bdev_to_io_dev(bdev), spdk_bdev_destroy_cb);
+}
+
+static void
+spdk_bdev_start(struct spdk_bdev *bdev)
+{
+ struct spdk_bdev_module *module;
+ uint32_t action;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
+ TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
+
+ /* Examine configuration before initializing I/O */
+ TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (module->examine_config) {
+ action = module->internal.action_in_progress;
+ module->internal.action_in_progress++;
+ module->examine_config(bdev);
+ if (action != module->internal.action_in_progress) {
+ SPDK_ERRLOG("examine_config for module %s did not call spdk_bdev_module_examine_done()\n",
+ module->name);
+ }
+ }
+ }
+
+ if (bdev->internal.claim_module) {
+ return;
+ }
+
+ TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (module->examine_disk) {
+ module->internal.action_in_progress++;
+ module->examine_disk(bdev);
+ }
+ }
+}
+
+int
+spdk_bdev_register(struct spdk_bdev *bdev)
+{
+ int rc = spdk_bdev_init(bdev);
+
+ if (rc == 0) {
+ spdk_bdev_start(bdev);
+ }
+
+ return rc;
+}
+
+int
+spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
+{
+ int rc;
+
+ rc = spdk_bdev_init(vbdev);
+ if (rc) {
+ return rc;
+ }
+
+ spdk_bdev_start(vbdev);
+ return 0;
+}
+
+void
+spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
+{
+ if (bdev->internal.unregister_cb != NULL) {
+ bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
+ }
+}
+
+static void
+_remove_notify(void *arg)
+{
+ struct spdk_bdev_desc *desc = arg;
+
+ desc->remove_scheduled = false;
+
+ if (desc->closed) {
+ free(desc);
+ } else {
+ desc->remove_cb(desc->remove_ctx);
+ }
+}
+
+void
+spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
+{
+ struct spdk_bdev_desc *desc, *tmp;
+ bool do_destruct = true;
+ struct spdk_thread *thread;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);
+
+ thread = spdk_get_thread();
+ if (!thread) {
+ /* The user called this from a non-SPDK thread. */
+ if (cb_fn != NULL) {
+ cb_fn(cb_arg, -ENOTSUP);
+ }
+ return;
+ }
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+
+ bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
+ bdev->internal.unregister_cb = cb_fn;
+ bdev->internal.unregister_ctx = cb_arg;
+
+ TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
+ if (desc->remove_cb) {
+ do_destruct = false;
+ /*
+ * Defer invocation of the remove_cb to a separate message that will
+ * run later on its thread. This ensures this context unwinds and
+ * we don't recursively unregister this bdev again if the remove_cb
+ * immediately closes its descriptor.
+ */
+ if (!desc->remove_scheduled) {
+ /* Avoid scheduling removal of the same descriptor multiple times. */
+ desc->remove_scheduled = true;
+ spdk_thread_send_msg(desc->thread, _remove_notify, desc);
+ }
+ }
+ }
+
+ if (!do_destruct) {
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ return;
+ }
+
+ TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ spdk_bdev_fini(bdev);
+}
+
+int
+spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
+ void *remove_ctx, struct spdk_bdev_desc **_desc)
+{
+ struct spdk_bdev_desc *desc;
+ struct spdk_thread *thread;
+
+ thread = spdk_get_thread();
+ if (!thread) {
+ SPDK_ERRLOG("Cannot open bdev from non-SPDK thread.\n");
+ return -ENOTSUP;
+ }
+
+ desc = calloc(1, sizeof(*desc));
+ if (desc == NULL) {
+ SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
+ return -ENOMEM;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
+ spdk_get_thread());
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+
+ if (write && bdev->internal.claim_module) {
+ SPDK_ERRLOG("Could not open %s - %s module already claimed it\n",
+ bdev->name, bdev->internal.claim_module->name);
+ free(desc);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ return -EPERM;
+ }
+
+ TAILQ_INSERT_TAIL(&bdev->internal.open_descs, desc, link);
+
+ desc->bdev = bdev;
+ desc->thread = thread;
+ desc->remove_cb = remove_cb;
+ desc->remove_ctx = remove_ctx;
+ desc->write = write;
+ *_desc = desc;
+
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ return 0;
+}
+
+void
+spdk_bdev_close(struct spdk_bdev_desc *desc)
+{
+ struct spdk_bdev *bdev = desc->bdev;
+ bool do_unregister = false;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
+ spdk_get_thread());
+
+ assert(desc->thread == spdk_get_thread());
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+
+ TAILQ_REMOVE(&bdev->internal.open_descs, desc, link);
+
+ desc->closed = true;
+
+ if (!desc->remove_scheduled) {
+ free(desc);
+ }
+
+ /* If no more descriptors, kill QoS channel */
+ if (bdev->internal.qos && TAILQ_EMPTY(&bdev->internal.open_descs)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n",
+ bdev->name, spdk_get_thread());
+
+ if (spdk_bdev_qos_destroy(bdev)) {
+ /* There isn't anything we can do to recover here. Just let the
+ * old QoS poller keep running. The QoS handling won't change
+ * cores when the user allocates a new channel, but it won't break. */
+ SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n");
+ }
+ }
+
+ spdk_bdev_set_qd_sampling_period(bdev, 0);
+
+ if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
+ do_unregister = true;
+ }
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ if (do_unregister == true) {
+ spdk_bdev_unregister(bdev, bdev->internal.unregister_cb, bdev->internal.unregister_ctx);
+ }
+}
+
+int
+spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
+ struct spdk_bdev_module *module)
+{
+ if (bdev->internal.claim_module != NULL) {
+ SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name,
+ bdev->internal.claim_module->name);
+ return -EPERM;
+ }
+
+ if (desc && !desc->write) {
+ desc->write = true;
+ }
+
+ bdev->internal.claim_module = module;
+ return 0;
+}
+
+void
+spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
+{
+ assert(bdev->internal.claim_module != NULL);
+ bdev->internal.claim_module = NULL;
+}
+
+struct spdk_bdev *
+spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
+{
+ return desc->bdev;
+}
+
+void
+spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
+{
+ struct iovec *iovs;
+ int iovcnt;
+
+ if (bdev_io == NULL) {
+ return;
+ }
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ iovs = bdev_io->u.bdev.iovs;
+ iovcnt = bdev_io->u.bdev.iovcnt;
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ iovs = bdev_io->u.bdev.iovs;
+ iovcnt = bdev_io->u.bdev.iovcnt;
+ break;
+ default:
+ iovs = NULL;
+ iovcnt = 0;
+ break;
+ }
+
+ if (iovp) {
+ *iovp = iovs;
+ }
+ if (iovcntp) {
+ *iovcntp = iovcnt;
+ }
+}
+
+void
+spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
+{
+
+ if (spdk_bdev_module_list_find(bdev_module->name)) {
+ SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name);
+ assert(false);
+ }
+
+ if (bdev_module->async_init) {
+ bdev_module->internal.action_in_progress = 1;
+ }
+
+ /*
+ * Modules with examine callbacks must be initialized first, so they are
+ * ready to handle examine callbacks from later modules that will
+ * register physical bdevs.
+ */
+ if (bdev_module->examine_config != NULL || bdev_module->examine_disk != NULL) {
+ TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
+ } else {
+ TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
+ }
+}
+
+struct spdk_bdev_module *
+spdk_bdev_module_list_find(const char *name)
+{
+ struct spdk_bdev_module *bdev_module;
+
+ TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
+ if (strcmp(name, bdev_module->name) == 0) {
+ break;
+ }
+ }
+
+ return bdev_module;
+}
+
+static void
+_spdk_bdev_write_zero_buffer_next(void *_bdev_io)
+{
+ struct spdk_bdev_io *bdev_io = _bdev_io;
+ uint64_t num_bytes, num_blocks;
+ int rc;
+
+ num_bytes = spdk_min(spdk_bdev_get_block_size(bdev_io->bdev) *
+ bdev_io->u.bdev.split_remaining_num_blocks,
+ ZERO_BUFFER_SIZE);
+ num_blocks = num_bytes / spdk_bdev_get_block_size(bdev_io->bdev);
+
+ rc = spdk_bdev_write_blocks(bdev_io->internal.desc,
+ spdk_io_channel_from_ctx(bdev_io->internal.ch),
+ g_bdev_mgr.zero_buffer,
+ bdev_io->u.bdev.split_current_offset_blocks, num_blocks,
+ _spdk_bdev_write_zero_buffer_done, bdev_io);
+ if (rc == 0) {
+ bdev_io->u.bdev.split_remaining_num_blocks -= num_blocks;
+ bdev_io->u.bdev.split_current_offset_blocks += num_blocks;
+ } else if (rc == -ENOMEM) {
+ _spdk_bdev_queue_io_wait_with_cb(bdev_io, _spdk_bdev_write_zero_buffer_next);
+ } else {
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
+ }
+}
+
+static void
+_spdk_bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *parent_io = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
+ return;
+ }
+
+ if (parent_io->u.bdev.split_remaining_num_blocks == 0) {
+ parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ parent_io->internal.cb(parent_io, true, parent_io->internal.caller_ctx);
+ return;
+ }
+
+ _spdk_bdev_write_zero_buffer_next(parent_io);
+}
+
+struct set_qos_limit_ctx {
+ void (*cb_fn)(void *cb_arg, int status);
+ void *cb_arg;
+ struct spdk_bdev *bdev;
+};
+
+static void
+_spdk_bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
+{
+ pthread_mutex_lock(&ctx->bdev->internal.mutex);
+ ctx->bdev->internal.qos_mod_in_progress = false;
+ pthread_mutex_unlock(&ctx->bdev->internal.mutex);
+
+ ctx->cb_fn(ctx->cb_arg, status);
+ free(ctx);
+}
+
+static void
+_spdk_bdev_disable_qos_done(void *cb_arg)
+{
+ struct set_qos_limit_ctx *ctx = cb_arg;
+ struct spdk_bdev *bdev = ctx->bdev;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_qos *qos;
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ qos = bdev->internal.qos;
+ bdev->internal.qos = NULL;
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ while (!TAILQ_EMPTY(&qos->queued)) {
+ /* Send queued I/O back to their original thread for resubmission. */
+ bdev_io = TAILQ_FIRST(&qos->queued);
+ TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);
+
+ if (bdev_io->internal.io_submit_ch) {
+ /*
+ * Channel was changed when sending it to the QoS thread - change it back
+ * before sending it back to the original thread.
+ */
+ bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
+ bdev_io->internal.io_submit_ch = NULL;
+ }
+
+ spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->internal.ch->channel),
+ _spdk_bdev_io_submit, bdev_io);
+ }
+
+ spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
+ spdk_poller_unregister(&qos->poller);
+
+ free(qos);
+
+ _spdk_bdev_set_qos_limit_done(ctx, 0);
+}
+
+static void
+_spdk_bdev_disable_qos_msg_done(struct spdk_io_channel_iter *i, int status)
+{
+ void *io_device = spdk_io_channel_iter_get_io_device(i);
+ struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
+ struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+ struct spdk_thread *thread;
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ thread = bdev->internal.qos->thread;
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ spdk_thread_send_msg(thread, _spdk_bdev_disable_qos_done, ctx);
+}
+
+static void
+_spdk_bdev_disable_qos_msg(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
+
+ bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_update_qos_rate_limit_msg(void *cb_arg)
+{
+ struct set_qos_limit_ctx *ctx = cb_arg;
+ struct spdk_bdev *bdev = ctx->bdev;
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ spdk_bdev_qos_update_max_quota_per_timeslice(bdev->internal.qos);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+
+ _spdk_bdev_set_qos_limit_done(ctx, 0);
+}
+
+static void
+_spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i)
+{
+ void *io_device = spdk_io_channel_iter_get_io_device(i);
+ struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch);
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ _spdk_bdev_enable_qos(bdev, bdev_ch);
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
+
+ _spdk_bdev_set_qos_limit_done(ctx, status);
+}
+
+static void
+_spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
+{
+ int i;
+
+ assert(bdev->internal.qos != NULL);
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ bdev->internal.qos->rate_limits[i].limit = limits[i];
+
+ if (limits[i] == 0) {
+ bdev->internal.qos->rate_limits[i].limit =
+ SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
+ }
+ }
+ }
+}
+
+void
+spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
+ void (*cb_fn)(void *cb_arg, int status), void *cb_arg)
+{
+ struct set_qos_limit_ctx *ctx;
+ uint32_t limit_set_complement;
+ uint64_t min_limit_per_sec;
+ int i;
+ bool disable_rate_limit = true;
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
+ continue;
+ }
+
+ if (limits[i] > 0) {
+ disable_rate_limit = false;
+ }
+
+ if (_spdk_bdev_qos_is_iops_rate_limit(i) == true) {
+ min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
+ } else {
+ /* Change from megabyte to byte rate limit */
+ limits[i] = limits[i] * 1024 * 1024;
+ min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
+ }
+
+ limit_set_complement = limits[i] % min_limit_per_sec;
+ if (limit_set_complement) {
+ SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n",
+ limits[i], min_limit_per_sec);
+ limits[i] += min_limit_per_sec - limit_set_complement;
+ SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]);
+ }
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+ ctx->bdev = bdev;
+
+ pthread_mutex_lock(&bdev->internal.mutex);
+ if (bdev->internal.qos_mod_in_progress) {
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ free(ctx);
+ cb_fn(cb_arg, -EAGAIN);
+ return;
+ }
+ bdev->internal.qos_mod_in_progress = true;
+
+ if (disable_rate_limit == true && bdev->internal.qos) {
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED &&
+ (bdev->internal.qos->rate_limits[i].limit > 0 &&
+ bdev->internal.qos->rate_limits[i].limit !=
+ SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) {
+ disable_rate_limit = false;
+ break;
+ }
+ }
+ }
+
+ if (disable_rate_limit == false) {
+ if (bdev->internal.qos == NULL) {
+ /* Enabling */
+ bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
+ if (!bdev->internal.qos) {
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
+ free(ctx);
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ _spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+ spdk_for_each_channel(__bdev_to_io_dev(bdev),
+ _spdk_bdev_enable_qos_msg, ctx,
+ _spdk_bdev_enable_qos_done);
+ } else {
+ /* Updating */
+ _spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+ spdk_thread_send_msg(bdev->internal.qos->thread,
+ _spdk_bdev_update_qos_rate_limit_msg, ctx);
+ }
+ } else {
+ if (bdev->internal.qos != NULL) {
+ _spdk_bdev_set_qos_rate_limits(bdev, limits);
+
+ /* Disabling */
+ spdk_for_each_channel(__bdev_to_io_dev(bdev),
+ _spdk_bdev_disable_qos_msg, ctx,
+ _spdk_bdev_disable_qos_msg_done);
+ } else {
+ pthread_mutex_unlock(&bdev->internal.mutex);
+ _spdk_bdev_set_qos_limit_done(ctx, 0);
+ return;
+ }
+ }
+
+ pthread_mutex_unlock(&bdev->internal.mutex);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV)
+
+SPDK_TRACE_REGISTER_FN(bdev_trace)
+{
+ spdk_trace_register_owner(OWNER_BDEV, 'b');
+ spdk_trace_register_object(OBJECT_BDEV_IO, 'i');
+ spdk_trace_register_description("BDEV_IO_START", "", TRACE_BDEV_IO_START, OWNER_BDEV,
+ OBJECT_BDEV_IO, 1, 0, "type: ");
+ spdk_trace_register_description("BDEV_IO_DONE", "", TRACE_BDEV_IO_DONE, OWNER_BDEV,
+ OBJECT_BDEV_IO, 0, 0, "");
+}
diff --git a/src/spdk/lib/bdev/crypto/Makefile b/src/spdk/lib/bdev/crypto/Makefile
new file mode 100644
index 00000000..c3eb1b74
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/Makefile
@@ -0,0 +1,42 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += $(ENV_CFLAGS)
+
+C_SRCS = vbdev_crypto.c vbdev_crypto_rpc.c
+LIBNAME = vbdev_crypto
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto.c b/src/spdk/lib/bdev/crypto/vbdev_crypto.c
new file mode 100644
index 00000000..510e8496
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto.c
@@ -0,0 +1,1506 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_crypto.h"
+
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/io_channel.h"
+#include "spdk/bdev_module.h"
+
+#include <rte_config.h>
+#include <rte_bus_vdev.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+
+/* To add support for new device types, follow the examples of the following...
+ * Note that the string names are defined by the DPDK PMD in question so be
+ * sure to use the exact names.
+ */
+#define MAX_NUM_DRV_TYPES 2
+#define AESNI_MB "crypto_aesni_mb"
+#define QAT "crypto_qat"
+const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
+
+/* Global list of available crypto devices. */
+struct vbdev_dev {
+ struct rte_cryptodev_info cdev_info; /* includes device friendly name */
+ uint8_t cdev_id; /* identifier for the device */
+ TAILQ_ENTRY(vbdev_dev) link;
+};
+static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
+
+/* Global list and lock for unique device/queue pair combos */
+struct device_qp {
+ struct vbdev_dev *device; /* ptr to crypto device */
+ uint8_t qp; /* queue pair for this node */
+ bool in_use; /* whether this node is in use or not */
+ TAILQ_ENTRY(device_qp) link;
+};
+static TAILQ_HEAD(, device_qp) g_device_qp = TAILQ_HEAD_INITIALIZER(g_device_qp);
+static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+/* In order to limit the number of resources we need to do one crypto
+ * operation per LBA (we use LBA as IV), we tell the bdev layer that
+ * our max IO size is something reasonable. Units here are in bytes.
+ */
+#define CRYPTO_MAX_IO (64 * 1024)
+
+/* This controls how many ops will be dequeued from the crypto driver in one run
+ * of the poller. It is mainly a performance knob as it effectively determines how
+ * much work the poller has to do. However even that can vary between crypto drivers
+ * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
+ * QAT drvier just dequeues what has been completed already.
+ */
+#define MAX_DEQUEUE_BURST_SIZE 64
+
+/* When enqueueing, we need to supply the crypto driver with an array of pointers to
+ * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
+ * value in conjunction with the the other defines to make sure we're not using crazy amounts
+ * of memory. All of these numbers can and probably should be adjusted based on the
+ * workload. By default we'll use the worst case (smallest) block size for the
+ * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
+ * blocks would give us an enqueue array size of 128.
+ */
+#define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
+
+/* The number of MBUFS we need must be a power of two and to support other small IOs
+ * in addition to the limits mentioned above, we go to the next power of two. It is
+ * big number because it is one mempool for source and desitnation mbufs. It may
+ * need to be bigger to support multiple crypto drivers at once.
+ */
+#define NUM_MBUFS 32768
+#define POOL_CACHE_SIZE 256
+#define NUM_SESSIONS NUM_MBUFS
+#define SESS_MEMPOOL_CACHE_SIZE 256
+
+/* This is the max number of IOs we can supply to any crypto device QP at one time.
+ * It can vary between drivers.
+ */
+#define CRYPTO_QP_DESCRIPTORS 2048
+
+/* Specific to AES_CBC. */
+#define AES_CBC_IV_LENGTH 16
+#define AES_CBC_KEY_LENGTH 16
+
+/* Common for suported devices. */
+#define IV_OFFSET (sizeof(struct rte_crypto_op) + \
+ sizeof(struct rte_crypto_sym_op))
+
+static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+static void vbdev_crypto_examine(struct spdk_bdev *bdev);
+static int vbdev_crypto_claim(struct spdk_bdev *bdev);
+
+/* list of crypto_bdev names and their base bdevs via configuration file.
+ * Used so we can parse the conf once at init and use this list in examine().
+ */
+struct bdev_names {
+ char *vbdev_name; /* name of the vbdev to create */
+ char *bdev_name; /* base bdev name */
+
+ /* Note, for dev/test we allow use of key in the config file, for production
+ * use, you must use an RPC to specify the key for security reasons.
+ */
+ uint8_t *key; /* key per bdev */
+ char *drv_name; /* name of the crypto device driver */
+ TAILQ_ENTRY(bdev_names) link;
+};
+static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
+
+/* List of virtual bdevs and associated info for each. We keep the device friendly name here even
+ * though its also in the device struct because we use it early on.
+ */
+struct vbdev_crypto {
+ struct spdk_bdev *base_bdev; /* the thing we're attaching to */
+ struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */
+ struct spdk_bdev crypto_bdev; /* the crypto virtual bdev */
+ uint8_t *key; /* key per bdev */
+ char *drv_name; /* name of the crypto device driver */
+ TAILQ_ENTRY(vbdev_crypto) link;
+};
+static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
+
+/* Shared mempools between all devices on this system */
+static struct spdk_mempool *g_session_mp = NULL; /* session mempool */
+static struct spdk_mempool *g_mbuf_mp = NULL; /* mbuf mempool */
+static struct rte_mempool *g_crypto_op_mp = NULL; /* crypto operations, must be rte* mempool */
+
+/* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
+ * We store things in here that are needed on per thread basis like the base_channel for this thread,
+ * and the poller for this thread.
+ */
+struct crypto_io_channel {
+ struct spdk_io_channel *base_ch; /* IO channel of base device */
+ struct spdk_poller *poller; /* completion poller */
+ struct device_qp *device_qp; /* unique device/qp combination for this channel */
+};
+
+/* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
+ * each IO for us.
+ */
+struct crypto_bdev_io {
+ int cryop_cnt_remaining; /* counter used when completing crypto ops */
+ struct crypto_io_channel *crypto_ch; /* need to store for crypto completion handling */
+ struct vbdev_crypto *crypto_bdev; /* the crypto node struct associated with this IO */
+ enum rte_crypto_cipher_operation crypto_op; /* the crypto control struct */
+ struct rte_crypto_sym_xform cipher_xform; /* crypto control struct for this IO */
+ struct spdk_bdev_io *orig_io; /* the original IO */
+ struct spdk_bdev_io *read_io; /* the read IO we issued */
+
+ /* Used for the single contigous buffer that serves as the crypto destination target for writes */
+ uint64_t cry_num_blocks; /* num of blocks for the contiguous buffer */
+ uint64_t cry_offset_blocks; /* block offset on media */
+ struct iovec cry_iov; /* iov representing contig write buffer */
+};
+
+/* This is called from the module's init function. We setup all crypto devices early on as we are unable
+ * to easily dynamically configure queue pairs after the drivers are up and running. So, here, we
+ * configure the max capabilities of each device and assign threads to queue pairs as channels are
+ * requested.
+ */
+static int
+vbdev_crypto_init_crypto_drivers(void)
+{
+ uint8_t cdev_count;
+ uint8_t cdrv_id, cdev_id, i, j;
+ int rc = 0;
+ struct vbdev_dev *device = NULL;
+ struct device_qp *dev_qp = NULL;
+ unsigned int max_sess_size = 0, sess_size;
+ uint16_t num_lcores = rte_lcore_count();
+
+ /* Only the first call, via RPC or module init should init the crypto drivers. */
+ if (g_session_mp != NULL) {
+ return 0;
+ }
+
+ /* We always init AESNI_MB */
+ rc = rte_vdev_init(AESNI_MB, NULL);
+ if (rc == 0) {
+ SPDK_NOTICELOG("created virtual PMD %s\n", AESNI_MB);
+ } else {
+ SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
+ return -EINVAL;
+ }
+
+ /* If we have no crypto devices, there's no reason to continue. */
+ cdev_count = rte_cryptodev_count();
+ if (cdev_count == 0) {
+ return 0;
+ }
+
+ /*
+ * Create global mempools, shared by all devices regardless of type.
+ */
+
+ /* First determine max session size, most pools are shared by all the devices,
+ * so we need to find the global max sessions size.
+ */
+ for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
+ sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
+ if (sess_size > max_sess_size) {
+ max_sess_size = sess_size;
+ }
+ }
+
+ g_session_mp = spdk_mempool_create("session_mp", NUM_SESSIONS * 2, max_sess_size,
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (g_session_mp == NULL) {
+ SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
+ return -ENOMEM;
+ }
+
+ g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (g_mbuf_mp == NULL) {
+ SPDK_ERRLOG("Cannot create mbuf pool\n");
+ rc = -ENOMEM;
+ goto error_create_mbuf;
+ }
+
+ g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+ NUM_MBUFS,
+ POOL_CACHE_SIZE,
+ AES_CBC_IV_LENGTH,
+ rte_socket_id());
+ if (g_crypto_op_mp == NULL) {
+ SPDK_ERRLOG("Cannot create op pool\n");
+ rc = -ENOMEM;
+ goto error_create_op;
+ }
+
+ /*
+ * Now lets configure each device.
+ */
+ for (i = 0; i < cdev_count; i++) {
+ device = calloc(1, sizeof(struct vbdev_dev));
+ if (!device) {
+ rc = -ENOMEM;
+ goto error_create_device;
+ }
+
+ /* Get details about this device. */
+ rte_cryptodev_info_get(i, &device->cdev_info);
+ cdrv_id = device->cdev_info.driver_id;
+ cdev_id = device->cdev_id = i;
+
+ /* Before going any further, make sure we have enough resources for this
+ * device type to function. We need a unique queue pair per core accross each
+ * device type to remain lockless....
+ */
+ if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
+ device->cdev_info.max_nb_queue_pairs) < num_lcores) {
+ SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
+ device->cdev_info.driver_name);
+ SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
+ rc = -EINVAL;
+ goto error_qp;
+ }
+
+ /* Setup queue pairs. */
+ struct rte_cryptodev_config conf = {
+ .nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
+ .socket_id = SPDK_ENV_SOCKET_ID_ANY
+ };
+
+ rc = rte_cryptodev_configure(cdev_id, &conf);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to configure cryptodev %u", cdev_id);
+ rc = -EINVAL;
+ goto error_dev_config;
+ }
+
+ struct rte_cryptodev_qp_conf qp_conf = {
+ .nb_descriptors = CRYPTO_QP_DESCRIPTORS
+ };
+
+ /* Pre-setup all pottential qpairs now and assign them in the channel
+ * callback. If we were to create them there, we'd have to stop the
+ * entire device affecting all other threads that might be using it
+ * even on other queue pairs.
+ */
+ for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
+ rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY,
+ (struct rte_mempool *)g_session_mp);
+
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to setup queue pair %u on "
+ "cryptodev %u", j, cdev_id);
+ rc = -EINVAL;
+ goto error_qp_setup;
+ }
+ }
+
+ rc = rte_cryptodev_start(cdev_id);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to start device %u: error %d\n",
+ cdev_id, rc);
+ rc = -EINVAL;
+ goto error_device_start;
+ }
+
+ /* Add to our list of available crypto devices. */
+ TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
+
+ /* Build up list of device/qp combinations */
+ for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
+ dev_qp = calloc(1, sizeof(struct device_qp));
+ if (!dev_qp) {
+ rc = -ENOMEM;
+ goto error_create_devqp;
+ }
+ dev_qp->device = device;
+ dev_qp->qp = j;
+ dev_qp->in_use = false;
+ TAILQ_INSERT_TAIL(&g_device_qp, dev_qp, link);
+ }
+ }
+ return 0;
+
+ /* Error cleanup paths. */
+error_create_devqp:
+ while ((dev_qp = TAILQ_FIRST(&g_device_qp))) {
+ TAILQ_REMOVE(&g_device_qp, dev_qp, link);
+ free(dev_qp);
+ }
+error_device_start:
+error_qp_setup:
+error_dev_config:
+error_qp:
+ free(device);
+error_create_device:
+ rte_mempool_free(g_crypto_op_mp);
+error_create_op:
+ spdk_mempool_free(g_mbuf_mp);
+error_create_mbuf:
+ spdk_mempool_free(g_session_mp);
+ return rc;
+}
+
+/* Following an encrypt or decrypt we need to then either write the encrypted data or finish
+ * the read on decrypted data. Do that here.
+ */
+static void
+_crypto_operation_complete(struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+ crypto_bdev);
+ struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+ struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
+ struct spdk_bdev_io *free_me = io_ctx->read_io;
+ int rc = 0;
+
+ if (bdev_io->internal.status != SPDK_BDEV_IO_STATUS_FAILED) {
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+
+ /* Complete the original IO and then free the one that we created
+ * as a result of issuing an IO via submit_reqeust.
+ */
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ spdk_bdev_free_io(free_me);
+
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+
+ /* Write the encrypted data. */
+ rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+ &io_ctx->cry_iov, 1, io_ctx->cry_offset_blocks,
+ io_ctx->cry_num_blocks, _complete_internal_write,
+ bdev_io);
+ } else {
+
+ /* Something really went haywire if this function got called with a type
+ * other than read or write.
+ */
+ rc = -1;
+ }
+ } else {
+ /* If the poller found that one of the crypto ops had failed as part of this
+ * bdev_io it would have updated the internal status indicate failure.
+ */
+ rc = -1;
+ }
+
+ if (rc != 0) {
+ SPDK_ERRLOG("ERROR on crypto operation completion!\n");
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+
+}
+
+/* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
+ * the device. Then we need to decide if what we've got so far (including previous poller
+ * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
+ * accordingly. This means either completing a read or issuing a new write.
+ */
+static int
+crypto_dev_poller(void *args)
+{
+ struct crypto_io_channel *crypto_ch = args;
+ uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
+ int i, num_dequeued_ops;
+ struct spdk_bdev_io *bdev_io = NULL;
+ struct crypto_bdev_io *io_ctx = NULL;
+ struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
+ struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
+ int num_mbufs = 0;
+
+ /* Each run of the poller will get just what the device has available
+ * at the moment we call it, we don't check again after draining the
+ * first batch.
+ */
+ num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
+ dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
+
+ /* Check if operation was processed successfully */
+ for (i = 0; i < num_dequeued_ops; i++) {
+
+ /* We don't know the order or association of the crypto ops wrt any
+ * partiular bdev_io so need to look at each and determine if it's
+ * the last one for it's bdev_io or not.
+ */
+ bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
+ assert(bdev_io != NULL);
+
+ if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
+ SPDK_ERRLOG("error with op %d status %u\n", i,
+ dequeued_ops[i]->status);
+ /* Update the bdev status to error, we'll still process the
+ * rest of the crypto ops for this bdev_io though so they
+ * aren't left hanging.
+ */
+ bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+ assert(io_ctx->cryop_cnt_remaining > 0);
+
+ /* Return the associated src and dst mbufs by collecting them into
+ * an array that we can use the bulk API to free after the loop.
+ */
+ dequeued_ops[i]->sym->m_src->userdata = NULL;
+ mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
+ if (dequeued_ops[i]->sym->m_dst) {
+ mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
+ }
+
+ /* done encrypting, complete the bdev_io */
+ if (--io_ctx->cryop_cnt_remaining == 0) {
+
+ /* Complete the IO */
+ _crypto_operation_complete(bdev_io);
+
+ /* Return session */
+ rte_cryptodev_sym_session_clear(cdev_id, dequeued_ops[i]->sym->session);
+ rte_cryptodev_sym_session_free(dequeued_ops[i]->sym->session);
+ }
+ }
+
+ /* Now bulk free both mbufs and crypto operations. */
+ if (num_dequeued_ops > 0) {
+ rte_mempool_put_bulk(g_crypto_op_mp,
+ (void **)dequeued_ops,
+ num_dequeued_ops);
+ assert(num_mbufs > 0);
+ spdk_mempool_put_bulk(g_mbuf_mp,
+ (void **)mbufs_to_free,
+ num_mbufs);
+ }
+
+ return num_dequeued_ops;
+}
+
+/* We're either encrypting on the way down or decrypting on the way back. */
+static int
+_crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op)
+{
+ struct rte_cryptodev_sym_session *session;
+ uint16_t num_enqueued_ops = 0;
+ uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
+ struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+ struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
+ uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
+ uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
+ uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
+ int rc;
+ uint32_t enqueued = 0;
+ uint32_t iov_index = 0;
+ uint32_t allocated = 0;
+ uint8_t *current_iov = NULL;
+ uint64_t total_remaining = 0;
+ uint64_t current_iov_remaining = 0;
+ int completed = 0;
+ int crypto_index = 0;
+ uint32_t en_offset = 0;
+ struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
+ struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
+ struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
+ int burst;
+
+ assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
+
+ /* Get the number of source mbufs that we need. These will always be 1:1 because we
+ * don't support chaining. The reason we don't is because of our decision to use
+ * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
+ * op would be > 1 LBA.
+ */
+ rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
+ if (rc) {
+ SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
+ return -ENOMEM;
+ }
+
+ /* Get the same amount but these buffers to describe the encrypted data location (dst). */
+ if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+ rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
+ if (rc) {
+ SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
+ rc = -ENOMEM;
+ goto error_get_dst;
+ }
+ }
+
+ /* Allocate crypto operations. */
+ allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+ crypto_ops, cryop_cnt);
+ if (allocated < cryop_cnt) {
+ SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
+ rc = -ENOMEM;
+ goto error_get_ops;
+ }
+
+ /* Get sessions. */
+ session = rte_cryptodev_sym_session_create((struct rte_mempool *)g_session_mp);
+ if (NULL == session) {
+ SPDK_ERRLOG("ERROR trying to create crypto session!\n");
+ rc = -EINVAL;
+ goto error_session_create;
+ }
+
+ /* Init our session with the desired cipher options. */
+ io_ctx->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+ io_ctx->cipher_xform.cipher.key.data = io_ctx->crypto_bdev->key;
+ io_ctx->cipher_xform.cipher.op = io_ctx->crypto_op = crypto_op;
+ io_ctx->cipher_xform.cipher.iv.offset = IV_OFFSET;
+ io_ctx->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ io_ctx->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
+ io_ctx->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
+
+ rc = rte_cryptodev_sym_session_init(cdev_id, session,
+ &io_ctx->cipher_xform,
+ (struct rte_mempool *)g_session_mp);
+ if (rc < 0) {
+ SPDK_ERRLOG("ERROR trying to init crypto session!\n");
+ rc = -EINVAL;
+ goto error_session_init;
+ }
+
+ /* For encryption, we need to prepare a single contiguous buffer as the encryption
+ * destination, we'll then pass that along for the write after encryption is done.
+ * This is done to avoiding encrypting the provided write buffer which may be
+ * undesirable in some use cases.
+ */
+ if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+ io_ctx->cry_iov.iov_len = total_length;
+ /* For now just allocate in the I/O path, not optimal but the current bdev API
+ * for getting a buffer from the pool won't work if the bdev_io passed in
+ * has a buffer, which ours always will. So, until we modify that API
+ * or better yet the current ZCOPY work lands, this is the best we can do.
+ */
+ io_ctx->cry_iov.iov_base = spdk_dma_malloc(total_length, 0x10, NULL);
+ if (!io_ctx->cry_iov.iov_base) {
+ SPDK_ERRLOG("ERROR trying to allocate write buffer for encryption!\n");
+ rc = -ENOMEM;
+ goto error_get_write_buffer;
+ }
+ io_ctx->cry_offset_blocks = bdev_io->u.bdev.offset_blocks;
+ io_ctx->cry_num_blocks = bdev_io->u.bdev.num_blocks;
+ }
+
+ /* This value is used in the completion callback to determine when the bdev_io is
+ * complete.
+ */
+ io_ctx->cryop_cnt_remaining = cryop_cnt;
+
+ /* As we don't support chaining because of a decision to use LBA as IV, construction
+ * of crypto operaations is straightforward. We build both the op, the mbuf and the
+ * dst_mbuf in our local arrays by looping through the length of the bdev IO and
+ * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
+ * LBA sized chunck of memory will correspond 1:1 to a crypto operation and a single
+ * mbuf per crypto operation.
+ */
+ total_remaining = total_length;
+ current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
+ current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
+ do {
+ uint8_t *iv_ptr;
+ uint64_t op_block_offset;
+
+ /* Set the mbuf elements address and length. Null out the next pointer. */
+ src_mbufs[crypto_index]->buf_addr = current_iov;
+ src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov);
+ src_mbufs[crypto_index]->data_len = crypto_len;
+ src_mbufs[crypto_index]->next = NULL;
+ /* Store context in every mbuf as we don't know anything about completion order */
+ src_mbufs[crypto_index]->userdata = bdev_io;
+
+ /* Set the IV - we use the LBA of the crypto_op */
+ iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
+ IV_OFFSET);
+ memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
+ op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
+ rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
+
+ /* Set the data to encrypt/decrypt length */
+ crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
+ crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
+
+ /* link the mbuf to the crypto op. */
+ crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
+ if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+ crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
+ } else {
+ crypto_ops[crypto_index]->sym->m_dst = NULL;
+ }
+
+ /* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
+ * that will be used to process the write on completion to the same buffer. Setting
+ * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
+ */
+ if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+
+ /* Set the relevant destination en_mbuf elements. */
+ dst_mbufs[crypto_index]->buf_addr = io_ctx->cry_iov.iov_base + en_offset;
+ dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr);
+ dst_mbufs[crypto_index]->data_len = crypto_len;
+ crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
+ en_offset += crypto_len;
+ dst_mbufs[crypto_index]->next = NULL;
+ }
+
+ /* Attach the crypto session to the operation */
+ rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], session);
+ if (rc) {
+ rc = -EINVAL;
+ goto error_attach_session;
+ }
+
+ /* Subtract our running totals for the op in progress and the overall bdev io */
+ total_remaining -= crypto_len;
+ current_iov_remaining -= crypto_len;
+
+ /* move our current IOV pointer accordingly. */
+ current_iov += crypto_len;
+
+ /* move on to the next crypto operation */
+ crypto_index++;
+
+ /* If we're done with this IOV, move to the next one. */
+ if (current_iov_remaining == 0 && total_remaining > 0) {
+ iov_index++;
+ current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
+ current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
+ }
+ } while (total_remaining > 0);
+
+ /* Enqueue everything we've got but limit by the max number of descriptors we
+ * configured the crypto device for.
+ */
+ do {
+ burst = spdk_min((cryop_cnt - enqueued), CRYPTO_QP_DESCRIPTORS);
+ num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
+ &crypto_ops[enqueued],
+ burst);
+ enqueued += num_enqueued_ops;
+
+ /* Dequeue all inline if the device is full. We don't defer anything simply
+ * because of the complexity involved as we're building 1 or more crypto
+ * ops per IO. Dequeue will free up space for more enqueue.
+ */
+ if (enqueued < cryop_cnt) {
+
+ /* Dequeue everything, this may include ops that were already
+ * in the device before this submission....
+ */
+ do {
+ completed = crypto_dev_poller(crypto_ch);
+ } while (completed > 0);
+ }
+ } while (enqueued < cryop_cnt);
+
+ return rc;
+
+ /* Error cleanup paths. */
+error_attach_session:
+error_get_write_buffer:
+error_session_init:
+ rte_cryptodev_sym_session_clear(cdev_id, session);
+ rte_cryptodev_sym_session_free(session);
+error_session_create:
+ rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops, cryop_cnt);
+ allocated = 0;
+error_get_ops:
+ if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+ spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
+ cryop_cnt);
+ }
+ if (allocated > 0) {
+ rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
+ allocated);
+ }
+error_get_dst:
+ spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
+ cryop_cnt);
+ return rc;
+}
+
+/* Completion callback for IO that were issued from this bdev other than read/write.
+ * They have their own for readability.
+ */
+static void
+_complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *orig_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+ spdk_bdev_io_complete(orig_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+/* Completion callback for writes that were issued from this bdev. */
+static void
+_complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *orig_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+ struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
+
+ spdk_dma_free(orig_ctx->cry_iov.iov_base);
+ spdk_bdev_io_complete(orig_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+/* Completion callback for reads that were issued from this bdev. */
+static void
+_complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *orig_io = cb_arg;
+ struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
+
+ if (success) {
+
+ /* Save off this bdev_io so it can be freed after decryption. */
+ orig_ctx->read_io = bdev_io;
+
+ if (_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT)) {
+ SPDK_ERRLOG("ERROR decrypting");
+ spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
+ spdk_bdev_free_io(bdev_io);
+ }
+ } else {
+ SPDK_ERRLOG("ERROR on read prior to decrypting");
+ spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
+ spdk_bdev_free_io(bdev_io);
+ }
+}
+
+/* Callback for getting a buf from the bdev pool in the event that the caller passed
+ * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
+ * beneath us before we're done with it.
+ */
+static void
+crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+ crypto_bdev);
+ struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks, _complete_internal_read,
+ bdev_io);
+ if (rc != 0) {
+ SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+/* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
+ * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
+ * and call our cpl callback provided below along with the original bdev_io so that we can
+ * complete it once this IO completes. For crypto operations, we'll either encrypt it first
+ * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
+ * on the way back for decryption.
+ */
+static void
+vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
+ crypto_bdev);
+ struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
+ struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
+ int rc = 0;
+
+ memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
+ io_ctx->crypto_bdev = crypto_bdev;
+ io_ctx->crypto_ch = crypto_ch;
+ io_ctx->orig_io = bdev_io;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ _complete_internal_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ _complete_internal_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
+ _complete_internal_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ default:
+ SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ if (rc != 0) {
+ SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+/* We'll just call the base bdev and let it answer except for WZ command which
+ * we always say we don't support so that the bdev layer will actually send us
+ * real writes that we can encrypt.
+ */
+static bool
+vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ /* Force the bdev layer to issue actual writes of zeroes so we can
+ * encrypt them as regular writes.
+ */
+ default:
+ return false;
+ }
+}
+
+/* Called after we've unregistered following a hot remove callback.
+ * Our finish entry point will be called next.
+ */
+static int
+vbdev_crypto_destruct(void *ctx)
+{
+ struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+ /* Unclaim the underlying bdev. */
+ spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
+
+ /* Close the underlying bdev. */
+ spdk_bdev_close(crypto_bdev->base_desc);
+
+ /* Done with this crypto_bdev. */
+ TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+ free(crypto_bdev->drv_name);
+ free(crypto_bdev->key);
+ free(crypto_bdev->crypto_bdev.name);
+ free(crypto_bdev);
+ return 0;
+}
+
+/* We supplied this as an entry point for upper layers who want to communicate to this
+ * bdev. This is how they get a channel. We are passed the same context we provided when
+ * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
+ * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
+ * struct and we'll keep it in our crypto node.
+ */
+static struct spdk_io_channel *
+vbdev_crypto_get_io_channel(void *ctx)
+{
+ struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+ /* The IO channel code will allocate a channel for us which consists of
+ * the SPDK cahnnel structure plus the size of our crypto_io_channel struct
+ * that we passed in when we registered our IO device. It will then call
+ * our channel create callback to populate any elements that we need to
+ * update.
+ */
+ return spdk_get_io_channel(crypto_bdev);
+}
+
+/* This is the output for get_bdevs() for this vbdev */
+static int
+vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
+
+ spdk_json_write_name(w, "crypto");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
+ spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
+ spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
+ spdk_json_write_named_string(w, "key", crypto_bdev->key);
+ spdk_json_write_object_end(w);
+ return 0;
+}
+
+static int
+vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
+{
+ struct vbdev_crypto *crypto_bdev, *tmp;
+
+ TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "construct_crypto_bdev");
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
+ spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
+ spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
+ spdk_json_write_named_string(w, "key", crypto_bdev->key);
+ spdk_json_write_object_end(w);
+ spdk_json_write_object_end(w);
+ }
+ return 0;
+}
+
+/* We provide this callback for the SPDK channel code to create a channel using
+ * the channel struct we provided in our module get_io_channel() entry point. Here
+ * we get and save off an underlying base channel of the device below us so that
+ * we can communicate with the base bdev on a per channel basis. We also register the
+ * poller used to complete crypto operations from the device.
+ */
+static int
+crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
+{
+ struct crypto_io_channel *crypto_ch = ctx_buf;
+ struct vbdev_crypto *crypto_bdev = io_device;
+ struct device_qp *device_qp;
+
+ crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
+ crypto_ch->poller = spdk_poller_register(crypto_dev_poller, crypto_ch, 0);
+ crypto_ch->device_qp = NULL;
+
+ pthread_mutex_lock(&g_device_qp_lock);
+ TAILQ_FOREACH(device_qp, &g_device_qp, link) {
+ if ((strcmp(device_qp->device->cdev_info.driver_name, crypto_bdev->drv_name) == 0) &&
+ (device_qp->in_use == false)) {
+ crypto_ch->device_qp = device_qp;
+ device_qp->in_use = true;
+ SPDK_NOTICELOG("Device queue pair assignment: ch %p device %p qpid %u %s\n",
+ crypto_ch, device_qp->device, crypto_ch->device_qp->qp, crypto_bdev->drv_name);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&g_device_qp_lock);
+ assert(crypto_ch->device_qp);
+ return 0;
+}
+
+/* We provide this callback for the SPDK channel code to destroy a channel
+ * created with our create callback. We just need to undo anything we did
+ * when we created.
+ */
+static void
+crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct crypto_io_channel *crypto_ch = ctx_buf;
+
+ pthread_mutex_lock(&g_device_qp_lock);
+ crypto_ch->device_qp->in_use = false;
+ pthread_mutex_unlock(&g_device_qp_lock);
+
+ spdk_poller_unregister(&crypto_ch->poller);
+ spdk_put_io_channel(crypto_ch->base_ch);
+}
+
+/* Create the association from the bdev and vbdev name and insert
+ * on the global list. */
+static int
+vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
+ const char *crypto_pmd, const char *key)
+{
+ struct bdev_names *name;
+ int rc, j;
+ bool found = false;
+
+ name = calloc(1, sizeof(struct bdev_names));
+ if (!name) {
+ SPDK_ERRLOG("could not allocate bdev_names\n");
+ return -ENOMEM;
+ }
+
+ name->bdev_name = strdup(bdev_name);
+ if (!name->bdev_name) {
+ SPDK_ERRLOG("could not allocate name->bdev_name\n");
+ rc = -ENOMEM;
+ goto error_alloc_bname;
+ }
+
+ name->vbdev_name = strdup(vbdev_name);
+ if (!name->vbdev_name) {
+ SPDK_ERRLOG("could not allocate name->vbdev_name\n");
+ rc = -ENOMEM;
+ goto error_alloc_vname;
+ }
+
+ name->drv_name = strdup(crypto_pmd);
+ if (!name->drv_name) {
+ SPDK_ERRLOG("could not allocate name->drv_name\n");
+ rc = -ENOMEM;
+ goto error_alloc_dname;
+ }
+ for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
+ if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
+ rc = -EINVAL;
+ goto error_invalid_pmd;
+ }
+
+ name->key = strdup(key);
+ if (!name->key) {
+ SPDK_ERRLOG("could not allocate name->key\n");
+ rc = -ENOMEM;
+ goto error_alloc_key;
+ }
+ if (strlen(name->key) != AES_CBC_KEY_LENGTH) {
+ SPDK_ERRLOG("invalid AES_CCB key length\n");
+ rc = -EINVAL;
+ goto error_invalid_key;
+ }
+
+ TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
+
+ return 0;
+
+ /* Error cleanup paths. */
+error_invalid_key:
+error_alloc_key:
+error_invalid_pmd:
+ free(name->drv_name);
+error_alloc_dname:
+ free(name->vbdev_name);
+error_alloc_vname:
+ free(name->bdev_name);
+error_alloc_bname:
+ free(name);
+ return rc;
+}
+
+/* RPC entry point for crypto creation. */
+int
+create_crypto_disk(const char *bdev_name, const char *vbdev_name,
+ const char *crypto_pmd, const char *key)
+{
+ struct spdk_bdev *bdev = NULL;
+ struct vbdev_crypto *crypto_bdev, *tmp;
+ int rc = 0;
+
+ bdev = spdk_bdev_get_by_name(bdev_name);
+
+ rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key);
+ if (rc) {
+ return rc;
+ }
+
+ if (!bdev) {
+ return 0;
+ }
+
+ rc = vbdev_crypto_claim(bdev);
+ if (rc) {
+ return rc;
+ }
+
+ rc = vbdev_crypto_init_crypto_drivers();
+ if (rc) {
+ return rc;
+ }
+
+ TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+ if (strcmp(crypto_bdev->base_bdev->name, bdev->name) == 0) {
+ rc = spdk_vbdev_register(&crypto_bdev->crypto_bdev,
+ &crypto_bdev->base_bdev, 1);
+ if (rc) {
+ SPDK_ERRLOG("could not register crypto_bdev\n");
+ spdk_bdev_close(crypto_bdev->base_desc);
+ TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+ free(crypto_bdev->crypto_bdev.name);
+ free(crypto_bdev->key);
+ free(crypto_bdev);
+ }
+ break;
+ }
+ }
+
+ return rc;
+}
+
+/* Called at driver init time, parses config file to preapre for examine calls,
+ * also fully initializes the crypto drivers.
+ */
+static int
+vbdev_crypto_init(void)
+{
+ struct spdk_conf_section *sp = NULL;
+ const char *conf_bdev_name = NULL;
+ const char *conf_vbdev_name = NULL;
+ const char *crypto_pmd = NULL;
+ int i;
+ int rc = 0;
+ const char *key = NULL;
+
+ /* Fully configure both SW and HW drivers. */
+ rc = vbdev_crypto_init_crypto_drivers();
+ if (rc) {
+ SPDK_ERRLOG("Error setting up crypto devices\n");
+ return rc;
+ }
+
+ sp = spdk_conf_find_section(NULL, "crypto");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ for (i = 0; ; i++) {
+
+ if (!spdk_conf_section_get_nval(sp, "CRY", i)) {
+ break;
+ }
+
+ conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0);
+ if (!conf_bdev_name) {
+ SPDK_ERRLOG("crypto configuration missing bdev name\n");
+ return -EINVAL;
+ }
+
+ conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1);
+ if (!conf_vbdev_name) {
+ SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n");
+ return -EINVAL;
+ }
+
+ key = spdk_conf_section_get_nmval(sp, "CRY", i, 2);
+ if (!key) {
+ SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n");
+ return -EINVAL;
+ }
+ SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n");
+
+ crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3);
+ if (!crypto_pmd) {
+ SPDK_ERRLOG("crypto configuration missing driver type\n");
+ return -EINVAL;
+ }
+
+ rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name,
+ crypto_pmd, key);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ return rc;
+}
+
+/* Called when the entire module is being torn down. */
+static void
+vbdev_crypto_finish(void)
+{
+ struct bdev_names *name;
+ struct vbdev_dev *device;
+ struct device_qp *dev_qp;
+
+ while ((name = TAILQ_FIRST(&g_bdev_names))) {
+ TAILQ_REMOVE(&g_bdev_names, name, link);
+ free(name->drv_name);
+ free(name->key);
+ free(name->bdev_name);
+ free(name->vbdev_name);
+ free(name);
+ }
+
+ while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
+ TAILQ_REMOVE(&g_vbdev_devs, device, link);
+ rte_cryptodev_stop(device->cdev_id);
+ free(device);
+ }
+
+ while ((dev_qp = TAILQ_FIRST(&g_device_qp))) {
+ TAILQ_REMOVE(&g_device_qp, dev_qp, link);
+ free(dev_qp);
+ }
+
+ rte_mempool_free(g_crypto_op_mp);
+ spdk_mempool_free(g_mbuf_mp);
+ spdk_mempool_free(g_session_mp);
+}
+
+/* During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_crypto_get_ctx_size(void)
+{
+ return sizeof(struct crypto_bdev_io);
+}
+
+/* Called when SPDK wants to save the current config of this vbdev module to
+ * a file.
+ */
+static void
+vbdev_crypto_get_spdk_running_config(FILE *fp)
+{
+ struct bdev_names *names = NULL;
+ fprintf(fp, "\n[crypto]\n");
+ TAILQ_FOREACH(names, &g_bdev_names, link) {
+ fprintf(fp, " crypto %s %s ", names->bdev_name, names->vbdev_name);
+ fprintf(fp, "\n");
+ }
+
+ fprintf(fp, "\n");
+}
+
+/* Called when the underlying base bdev goes away. */
+static void
+vbdev_crypto_examine_hotremove_cb(void *ctx)
+{
+ struct vbdev_crypto *crypto_bdev, *tmp;
+ struct spdk_bdev *bdev_find = ctx;
+
+ TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+ if (bdev_find == crypto_bdev->base_bdev) {
+ spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
+ }
+ }
+}
+
+static void
+vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* No config per bdev needed */
+}
+
+/* When we register our bdev this is how we specify our entry points. */
+static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
+ .destruct = vbdev_crypto_destruct,
+ .submit_request = vbdev_crypto_submit_request,
+ .io_type_supported = vbdev_crypto_io_type_supported,
+ .get_io_channel = vbdev_crypto_get_io_channel,
+ .dump_info_json = vbdev_crypto_dump_info_json,
+ .write_config_json = vbdev_crypto_write_config_json
+};
+
+static struct spdk_bdev_module crypto_if = {
+ .name = "crypto",
+ .module_init = vbdev_crypto_init,
+ .config_text = vbdev_crypto_get_spdk_running_config,
+ .get_ctx_size = vbdev_crypto_get_ctx_size,
+ .examine_config = vbdev_crypto_examine,
+ .module_fini = vbdev_crypto_finish,
+ .config_json = vbdev_crypto_config_json
+};
+
+SPDK_BDEV_MODULE_REGISTER(&crypto_if)
+
+static int
+vbdev_crypto_claim(struct spdk_bdev *bdev)
+{
+ struct bdev_names *name;
+ struct vbdev_crypto *vbdev;
+ int rc = 0;
+
+ /* Check our list of names from config versus this bdev and if
+ * there's a match, create the crypto_bdev & bdev accordingly.
+ */
+ TAILQ_FOREACH(name, &g_bdev_names, link) {
+ if (strcmp(name->bdev_name, bdev->name) != 0) {
+ continue;
+ }
+
+ SPDK_NOTICELOG("Match on %s\n", bdev->name);
+ vbdev = calloc(1, sizeof(struct vbdev_crypto));
+ if (!vbdev) {
+ SPDK_ERRLOG("could not allocate crypto_bdev\n");
+ rc = -ENOMEM;
+ goto error_vbdev_alloc;
+ }
+
+ /* The base bdev that we're attaching to. */
+ vbdev->base_bdev = bdev;
+ vbdev->crypto_bdev.name = strdup(name->vbdev_name);
+ if (!vbdev->crypto_bdev.name) {
+ SPDK_ERRLOG("could not allocate crypto_bdev name\n");
+ rc = -ENOMEM;
+ goto error_bdev_name;
+ }
+
+ vbdev->key = strdup(name->key);
+ if (!vbdev->key) {
+ SPDK_ERRLOG("could not allocate crypto_bdev key\n");
+ rc = -ENOMEM;
+ goto error_alloc_key;
+ }
+
+ vbdev->drv_name = strdup(name->drv_name);
+ if (!vbdev->drv_name) {
+ SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
+ rc = -ENOMEM;
+ goto error_drv_name;
+ }
+
+ vbdev->crypto_bdev.product_name = "crypto";
+ vbdev->crypto_bdev.write_cache = bdev->write_cache;
+ vbdev->crypto_bdev.need_aligned_buffer = bdev->need_aligned_buffer;
+ /* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
+ * in units of blocks.
+ */
+ if (bdev->optimal_io_boundary > 0) {
+ vbdev->crypto_bdev.optimal_io_boundary =
+ spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
+ } else {
+ vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
+ }
+ vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
+ vbdev->crypto_bdev.blocklen = bdev->blocklen;
+ vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
+
+ /* This is the context that is passed to us when the bdev
+ * layer calls in so we'll save our crypto_bdev node here.
+ */
+ vbdev->crypto_bdev.ctxt = vbdev;
+ vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
+ vbdev->crypto_bdev.module = &crypto_if;
+ TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
+
+ spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
+ sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
+
+ rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb,
+ bdev, &vbdev->base_desc);
+ if (rc) {
+ SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+ goto error_open;
+ }
+
+ rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
+ if (rc) {
+ SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
+ goto error_claim;
+ }
+
+ SPDK_NOTICELOG("registered crypto_bdev for: %s\n", name->vbdev_name);
+ }
+
+ return rc;
+
+ /* Error cleanup paths. */
+error_claim:
+ spdk_bdev_close(vbdev->base_desc);
+error_open:
+ TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
+ spdk_io_device_unregister(vbdev, NULL);
+ free(vbdev->drv_name);
+error_drv_name:
+ free(vbdev->key);
+error_alloc_key:
+ free(vbdev->crypto_bdev.name);
+error_bdev_name:
+ free(vbdev);
+error_vbdev_alloc:
+ return rc;
+}
+
+/* RPC entry for deleting a crypto vbdev. */
+void
+delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
+ void *cb_arg)
+{
+ struct bdev_names *name;
+
+ if (!bdev || bdev->module != &crypto_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ /* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
+ * vbdev does not get re-created if the same bdev is constructed at some other time,
+ * unless the underlying bdev was hot-removed.
+ */
+ TAILQ_FOREACH(name, &g_bdev_names, link) {
+ if (strcmp(name->vbdev_name, bdev->name) == 0) {
+ TAILQ_REMOVE(&g_bdev_names, name, link);
+ free(name->bdev_name);
+ free(name->vbdev_name);
+ free(name->drv_name);
+ free(name->key);
+ free(name);
+ break;
+ }
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+/* Because we specified this function in our crypto bdev function table when we
+ * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
+ * Here we need to decide if we care about it and if so what to do. We
+ * parsed the config file at init so we check the new bdev against the list
+ * we built up at that time and if the user configured us to attach to this
+ * bdev, here's where we do it.
+ */
+static void
+vbdev_crypto_examine(struct spdk_bdev *bdev)
+{
+ struct vbdev_crypto *crypto_bdev, *tmp;
+ int rc;
+
+ vbdev_crypto_claim(bdev);
+
+ TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
+ if (strcmp(crypto_bdev->base_bdev->name, bdev->name) == 0) {
+ rc = spdk_vbdev_register(&crypto_bdev->crypto_bdev,
+ &crypto_bdev->base_bdev, 1);
+ if (rc) {
+ SPDK_ERRLOG("could not register crypto_bdev\n");
+ spdk_bdev_close(crypto_bdev->base_desc);
+ TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
+ free(crypto_bdev->crypto_bdev.name);
+ free(crypto_bdev->key);
+ free(crypto_bdev);
+ }
+ break;
+ }
+ }
+
+ spdk_bdev_module_examine_done(&crypto_if);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_VBDEV_crypto)
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto.h b/src/spdk/lib/bdev/crypto/vbdev_crypto.h
new file mode 100644
index 00000000..c8ef8d16
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto.h
@@ -0,0 +1,66 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_CRYPTO_H
+#define SPDK_VBDEV_CRYPTO_H
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_crypto_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new crypto bdev.
+ *
+ * \param bdev_name Bdev on which crypto vbdev will be created.
+ * \param bdev_name Vbdev name crypto_pmd key
+ * \return 0 on success, other on failure.
+ */
+int create_crypto_disk(const char *bdev_name, const char *vbdev_name,
+ const char *crypto_pmd, const char *key);
+
+/**
+ * Delete crypto bdev.
+ *
+ * \param bdev Pointer to crypto bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_VBDEV_CRYPTO_H */
diff --git a/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c b/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c
new file mode 100644
index 00000000..cbf5a3b8
--- /dev/null
+++ b/src/spdk/lib/bdev/crypto/vbdev_crypto_rpc.c
@@ -0,0 +1,163 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_crypto.h"
+
+/* Structure to hold the parameters for this RPC method. */
+struct rpc_construct_crypto {
+ char *base_bdev_name;
+ char *name;
+ char *crypto_pmd;
+ char *key;
+};
+
+/* Free the allocated memory resource after the RPC handling. */
+static void
+free_rpc_construct_crypto(struct rpc_construct_crypto *r)
+{
+ free(r->base_bdev_name);
+ free(r->name);
+ free(r->crypto_pmd);
+ free(r->key);
+}
+
+/* Structure to decode the input parameters for this RPC method. */
+static const struct spdk_json_object_decoder rpc_construct_crypto_decoders[] = {
+ {"base_bdev_name", offsetof(struct rpc_construct_crypto, base_bdev_name), spdk_json_decode_string},
+ {"name", offsetof(struct rpc_construct_crypto, name), spdk_json_decode_string},
+ {"crypto_pmd", offsetof(struct rpc_construct_crypto, crypto_pmd), spdk_json_decode_string},
+ {"key", offsetof(struct rpc_construct_crypto, key), spdk_json_decode_string},
+};
+
+/* Decode the parameters for this RPC method and properly construct the crypto
+ * device. Error status returned in the failed cases.
+ */
+static void
+spdk_rpc_construct_crypto_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_crypto req = {NULL};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_crypto_decoders,
+ SPDK_COUNTOF(rpc_construct_crypto_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_crypto, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ rc = create_crypto_disk(req.base_bdev_name, req.name,
+ req.crypto_pmd, req.key);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_crypto(&req);
+ return;
+ }
+
+ spdk_json_write_string(w, req.name);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_construct_crypto(&req);
+ return;
+
+invalid:
+ free_rpc_construct_crypto(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_crypto_bdev", spdk_rpc_construct_crypto_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_crypto {
+ char *name;
+};
+
+static void
+free_rpc_delete_crypto(struct rpc_delete_crypto *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_crypto_decoders[] = {
+ {"name", offsetof(struct rpc_delete_crypto, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_crypto_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_crypto_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_crypto req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_crypto_decoders,
+ SPDK_COUNTOF(rpc_delete_crypto_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_crypto_disk(bdev, _spdk_rpc_delete_crypto_bdev_cb, request);
+
+ free_rpc_delete_crypto(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_crypto(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_crypto_bdev", spdk_rpc_delete_crypto_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/error/Makefile b/src/spdk/lib/bdev/error/Makefile
new file mode 100644
index 00000000..9dcee8bd
--- /dev/null
+++ b/src/spdk/lib/bdev/error/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_error.c vbdev_error_rpc.c
+LIBNAME = vbdev_error
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/error/vbdev_error.c b/src/spdk/lib/bdev/error/vbdev_error.c
new file mode 100644
index 00000000..4bab9426
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error.c
@@ -0,0 +1,513 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a module for test purpose which will simulate error cases for bdev.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/rpc.h"
+#include "spdk/conf.h"
+#include "spdk/util.h"
+#include "spdk/endian.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "vbdev_error.h"
+
+struct spdk_vbdev_error_config {
+ char *base_bdev;
+ TAILQ_ENTRY(spdk_vbdev_error_config) tailq;
+};
+
+static TAILQ_HEAD(, spdk_vbdev_error_config) g_error_config
+ = TAILQ_HEAD_INITIALIZER(g_error_config);
+
+struct vbdev_error_info {
+ bool enabled;
+ uint32_t error_type;
+ uint32_t error_num;
+};
+
+/* Context for each error bdev */
+struct error_disk {
+ struct spdk_bdev_part part;
+ struct vbdev_error_info error_vector[SPDK_BDEV_IO_TYPE_RESET];
+ TAILQ_HEAD(, spdk_bdev_io) pending_ios;
+};
+
+struct error_channel {
+ struct spdk_bdev_part_channel part_ch;
+};
+
+static pthread_mutex_t g_vbdev_error_mutex = PTHREAD_MUTEX_INITIALIZER;
+static SPDK_BDEV_PART_TAILQ g_error_disks = TAILQ_HEAD_INITIALIZER(g_error_disks);
+
+static int vbdev_error_init(void);
+static void vbdev_error_fini(void);
+
+static void vbdev_error_examine(struct spdk_bdev *bdev);
+static int vbdev_error_config_json(struct spdk_json_write_ctx *w);
+
+static int vbdev_error_config_add(const char *base_bdev_name);
+static int vbdev_error_config_remove(const char *base_bdev_name);
+
+static struct spdk_bdev_module error_if = {
+ .name = "error",
+ .module_init = vbdev_error_init,
+ .module_fini = vbdev_error_fini,
+ .examine_config = vbdev_error_examine,
+ .config_json = vbdev_error_config_json,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&error_if)
+
+int
+spdk_vbdev_inject_error(char *name, uint32_t io_type, uint32_t error_type, uint32_t error_num)
+{
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_part *part;
+ struct error_disk *error_disk = NULL;
+ uint32_t i;
+
+ pthread_mutex_lock(&g_vbdev_error_mutex);
+ bdev = spdk_bdev_get_by_name(name);
+ if (!bdev) {
+ SPDK_ERRLOG("Could not find ErrorInjection bdev %s\n", name);
+ pthread_mutex_unlock(&g_vbdev_error_mutex);
+ return -1;
+ }
+
+ TAILQ_FOREACH(part, &g_error_disks, tailq) {
+ if (bdev == spdk_bdev_part_get_bdev(part)) {
+ error_disk = (struct error_disk *)part;
+ break;
+ }
+ }
+
+ if (error_disk == NULL) {
+ SPDK_ERRLOG("Could not find ErrorInjection bdev %s\n", name);
+ pthread_mutex_unlock(&g_vbdev_error_mutex);
+ return -1;
+ }
+
+ if (0xffffffff == io_type) {
+ for (i = 0; i < SPDK_COUNTOF(error_disk->error_vector); i++) {
+ error_disk->error_vector[i].enabled = true;
+ error_disk->error_vector[i].error_type = error_type;
+ error_disk->error_vector[i].error_num = error_num;
+ }
+ } else if (0 == io_type) {
+ for (i = 0; i < SPDK_COUNTOF(error_disk->error_vector); i++) {
+ error_disk->error_vector[i].enabled = false;
+ error_disk->error_vector[i].error_num = 0;
+ }
+ } else {
+ error_disk->error_vector[io_type].enabled = true;
+ error_disk->error_vector[io_type].error_type = error_type;
+ error_disk->error_vector[io_type].error_num = error_num;
+ }
+ pthread_mutex_unlock(&g_vbdev_error_mutex);
+ return 0;
+}
+
+static void
+vbdev_error_reset(struct error_disk *error_disk, struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev_io *pending_io, *tmp;
+
+ TAILQ_FOREACH_SAFE(pending_io, &error_disk->pending_ios, module_link, tmp) {
+ TAILQ_REMOVE(&error_disk->pending_ios, pending_io, module_link);
+ spdk_bdev_io_complete(pending_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static uint32_t
+vbdev_error_get_error_type(struct error_disk *error_disk, uint32_t io_type)
+{
+ if (error_disk->error_vector[io_type].enabled &&
+ error_disk->error_vector[io_type].error_num) {
+ return error_disk->error_vector[io_type].error_type;
+ }
+ return 0;
+}
+
+static void
+vbdev_error_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct error_channel *ch = spdk_io_channel_get_ctx(_ch);
+ struct error_disk *error_disk = bdev_io->bdev->ctxt;
+ uint32_t error_type;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ vbdev_error_reset(error_disk, bdev_io);
+ return;
+ default:
+ SPDK_ERRLOG("Error Injection: unknown I/O type %d\n", bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ error_type = vbdev_error_get_error_type(error_disk, bdev_io->type);
+ if (error_type == 0) {
+ int rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+
+ if (rc) {
+ SPDK_ERRLOG("bdev_error: submit request failed, rc=%d\n", rc);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ return;
+ } else if (error_type == VBDEV_IO_FAILURE) {
+ error_disk->error_vector[bdev_io->type].error_num--;
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ } else if (error_type == VBDEV_IO_PENDING) {
+ TAILQ_INSERT_TAIL(&error_disk->pending_ios, bdev_io, module_link);
+ error_disk->error_vector[bdev_io->type].error_num--;
+ }
+}
+
+static int
+vbdev_error_destruct(void *ctx)
+{
+ struct error_disk *error_disk = ctx;
+ struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(&error_disk->part);
+ int rc;
+
+ rc = vbdev_error_config_remove(base_bdev->name);
+ if (rc != 0) {
+ SPDK_ERRLOG("vbdev_error_config_remove() failed\n");
+ }
+
+ return spdk_bdev_part_free(&error_disk->part);
+}
+
+static int
+vbdev_error_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct error_disk *error_disk = ctx;
+ struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(&error_disk->part);
+
+ spdk_json_write_name(w, "error_disk");
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "base_bdev");
+ spdk_json_write_string(w, base_bdev->name);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+vbdev_error_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* No config per bdev. */
+}
+
+
+static struct spdk_bdev_fn_table vbdev_error_fn_table = {
+ .destruct = vbdev_error_destruct,
+ .submit_request = vbdev_error_submit_request,
+ .dump_info_json = vbdev_error_dump_info_json,
+ .write_config_json = vbdev_error_write_config_json
+};
+
+static void
+spdk_vbdev_error_base_bdev_hotremove_cb(void *_base_bdev)
+{
+ spdk_bdev_part_base_hotremove(_base_bdev, &g_error_disks);
+}
+
+static int
+_spdk_vbdev_error_create(struct spdk_bdev *base_bdev)
+{
+ struct spdk_bdev_part_base *base = NULL;
+ struct error_disk *disk = NULL;
+ char *name;
+ int rc;
+
+ base = spdk_bdev_part_base_construct(base_bdev,
+ spdk_vbdev_error_base_bdev_hotremove_cb,
+ &error_if, &vbdev_error_fn_table, &g_error_disks,
+ NULL, NULL, sizeof(struct error_channel),
+ NULL, NULL);
+ if (!base) {
+ SPDK_ERRLOG("could not construct part base for bdev %s\n", spdk_bdev_get_name(base_bdev));
+ return -ENOMEM;
+ }
+
+ disk = calloc(1, sizeof(*disk));
+ if (!disk) {
+ SPDK_ERRLOG("Memory allocation failure\n");
+ spdk_bdev_part_base_free(base);
+ return -ENOMEM;
+ }
+
+ name = spdk_sprintf_alloc("EE_%s", spdk_bdev_get_name(base_bdev));
+ if (!name) {
+ SPDK_ERRLOG("name allocation failure\n");
+ spdk_bdev_part_base_free(base);
+ free(disk);
+ return -ENOMEM;
+ }
+
+ rc = spdk_bdev_part_construct(&disk->part, base, name, 0, base_bdev->blockcnt,
+ "Error Injection Disk");
+ free(name);
+ if (rc) {
+ SPDK_ERRLOG("could not construct part for bdev %s\n", spdk_bdev_get_name(base_bdev));
+ /* spdk_bdev_part_construct will free name on failure */
+ spdk_bdev_part_base_free(base);
+ free(disk);
+ return rc;
+ }
+
+ TAILQ_INIT(&disk->pending_ios);
+
+ return 0;
+}
+
+int
+spdk_vbdev_error_create(const char *base_bdev_name)
+{
+ int rc;
+ struct spdk_bdev *base_bdev;
+
+ rc = vbdev_error_config_add(base_bdev_name);
+ if (rc != 0) {
+ SPDK_ERRLOG("Adding config for ErrorInjection bdev %s failed (rc=%d)\n",
+ base_bdev_name, rc);
+ return rc;
+ }
+
+ base_bdev = spdk_bdev_get_by_name(base_bdev_name);
+ if (!base_bdev) {
+ return 0;
+ }
+
+ rc = _spdk_vbdev_error_create(base_bdev);
+ if (rc != 0) {
+ vbdev_error_config_remove(base_bdev_name);
+ SPDK_ERRLOG("Could not create ErrorInjection bdev %s (rc=%d)\n",
+ base_bdev_name, rc);
+ }
+
+ return rc;
+}
+
+void
+spdk_vbdev_error_delete(struct spdk_bdev *vbdev, spdk_delete_error_complete cb_fn, void *cb_arg)
+{
+ if (!vbdev || vbdev->module != &error_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(vbdev, cb_fn, cb_arg);
+}
+
+static void
+vbdev_error_clear_config(void)
+{
+ struct spdk_vbdev_error_config *cfg;
+
+ while ((cfg = TAILQ_FIRST(&g_error_config))) {
+ TAILQ_REMOVE(&g_error_config, cfg, tailq);
+ free(cfg->base_bdev);
+ free(cfg);
+ }
+}
+
+static struct spdk_vbdev_error_config *
+vbdev_error_config_find_by_base_name(const char *base_bdev_name)
+{
+ struct spdk_vbdev_error_config *cfg;
+
+ TAILQ_FOREACH(cfg, &g_error_config, tailq) {
+ if (strcmp(cfg->base_bdev, base_bdev_name) == 0) {
+ return cfg;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+vbdev_error_config_add(const char *base_bdev_name)
+{
+ struct spdk_vbdev_error_config *cfg;
+
+ cfg = vbdev_error_config_find_by_base_name(base_bdev_name);
+ if (cfg) {
+ SPDK_ERRLOG("vbdev_error_config for bdev %s already exists\n",
+ base_bdev_name);
+ return -EEXIST;
+ }
+
+ cfg = calloc(1, sizeof(*cfg));
+ if (!cfg) {
+ SPDK_ERRLOG("calloc() failed for vbdev_error_config\n");
+ return -ENOMEM;
+ }
+
+ cfg->base_bdev = strdup(base_bdev_name);
+ if (!cfg->base_bdev) {
+ free(cfg);
+ SPDK_ERRLOG("strdup() failed for base_bdev_name\n");
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&g_error_config, cfg, tailq);
+
+ return 0;
+}
+
+static int
+vbdev_error_config_remove(const char *base_bdev_name)
+{
+ struct spdk_vbdev_error_config *cfg;
+
+ cfg = vbdev_error_config_find_by_base_name(base_bdev_name);
+ if (!cfg) {
+ return -ENOENT;
+ }
+
+ TAILQ_REMOVE(&g_error_config, cfg, tailq);
+ free(cfg->base_bdev);
+ free(cfg);
+ return 0;
+}
+
+static int
+vbdev_error_init(void)
+{
+ struct spdk_conf_section *sp;
+ struct spdk_vbdev_error_config *cfg;
+ const char *base_bdev_name;
+ int i, rc;
+
+ sp = spdk_conf_find_section(NULL, "BdevError");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ for (i = 0; ; i++) {
+ if (!spdk_conf_section_get_nval(sp, "BdevError", i)) {
+ break;
+ }
+
+ base_bdev_name = spdk_conf_section_get_nmval(sp, "BdevError", i, 0);
+ if (!base_bdev_name) {
+ SPDK_ERRLOG("ErrorInjection configuration missing bdev name\n");
+ rc = -EINVAL;
+ goto error;
+ }
+
+ cfg = calloc(1, sizeof(*cfg));
+ if (!cfg) {
+ SPDK_ERRLOG("calloc() failed for vbdev_error_config\n");
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ cfg->base_bdev = strdup(base_bdev_name);
+ if (!cfg->base_bdev) {
+ free(cfg);
+ SPDK_ERRLOG("strdup() failed for bdev name\n");
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ TAILQ_INSERT_TAIL(&g_error_config, cfg, tailq);
+ }
+
+ return 0;
+
+error:
+ vbdev_error_clear_config();
+ return rc;
+}
+
+static void
+vbdev_error_fini(void)
+{
+ vbdev_error_clear_config();
+}
+
+static void
+vbdev_error_examine(struct spdk_bdev *bdev)
+{
+ struct spdk_vbdev_error_config *cfg;
+ int rc;
+
+ cfg = vbdev_error_config_find_by_base_name(bdev->name);
+ if (cfg != NULL) {
+ rc = _spdk_vbdev_error_create(bdev);
+ if (rc != 0) {
+ SPDK_ERRLOG("could not create error vbdev for bdev %s at examine\n",
+ bdev->name);
+ }
+ }
+
+ spdk_bdev_module_examine_done(&error_if);
+}
+
+static int
+vbdev_error_config_json(struct spdk_json_write_ctx *w)
+{
+ struct spdk_vbdev_error_config *cfg;
+
+ TAILQ_FOREACH(cfg, &g_error_config, tailq) {
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_error_bdev");
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "base_name", cfg->base_bdev);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+
+ return 0;
+}
diff --git a/src/spdk/lib/bdev/error/vbdev_error.h b/src/spdk/lib/bdev/error/vbdev_error.h
new file mode 100644
index 00000000..4ff1ac19
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_ERROR_H
+#define SPDK_VBDEV_ERROR_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+
+enum vbdev_error_type {
+ VBDEV_IO_FAILURE = 1,
+ VBDEV_IO_PENDING,
+};
+
+typedef void (*spdk_delete_error_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create a vbdev on the base bdev to inject error into it.
+ *
+ * \param base_bdev_name Name of the base bdev.
+ * \return 0 on success or negative on failure.
+ */
+int spdk_vbdev_error_create(const char *base_bdev_name);
+
+/**
+ * Delete vbdev used to inject errors.
+ *
+ * \param bdev Pointer to error vbdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Arguments to pass to cb_fn.
+ */
+void spdk_vbdev_error_delete(struct spdk_bdev *vbdev, spdk_delete_error_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Inject error to the base bdev. Users can specify which IO type error is injected,
+ * what type of error is injected, and how many errors are injected.
+ *
+ * \param name Name of the base bdev into which error is injected.
+ * \param io_type IO type into which error is injected.
+ * \param error_num Count of injected errors
+ */
+int spdk_vbdev_inject_error(char *name, uint32_t io_type, uint32_t error_type,
+ uint32_t error_num);
+
+#endif // SPDK_VBDEV_ERROR_H
diff --git a/src/spdk/lib/bdev/error/vbdev_error_rpc.c b/src/spdk/lib/bdev/error/vbdev_error_rpc.c
new file mode 100644
index 00000000..8d95fd09
--- /dev/null
+++ b/src/spdk/lib/bdev/error/vbdev_error_rpc.c
@@ -0,0 +1,258 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+#include "vbdev_error.h"
+
+#define ERROR_BDEV_IO_TYPE_INVALID (SPDK_BDEV_IO_TYPE_RESET + 1)
+#define ERROR_BDEV_ERROR_TYPE_INVALID (VBDEV_IO_PENDING + 1)
+
+static uint32_t
+spdk_rpc_error_bdev_io_type_parse(char *name)
+{
+ if (strcmp(name, "read") == 0) {
+ return SPDK_BDEV_IO_TYPE_READ;
+ } else if (strcmp(name, "write") == 0) {
+ return SPDK_BDEV_IO_TYPE_WRITE;
+ } else if (strcmp(name, "flush") == 0) {
+ return SPDK_BDEV_IO_TYPE_FLUSH;
+ } else if (strcmp(name, "unmap") == 0) {
+ return SPDK_BDEV_IO_TYPE_UNMAP;
+ } else if (strcmp(name, "all") == 0) {
+ return 0xffffffff;
+ } else if (strcmp(name, "clear") == 0) {
+ return 0;
+ }
+ return ERROR_BDEV_IO_TYPE_INVALID;
+}
+
+static uint32_t
+spdk_rpc_error_bdev_error_type_parse(char *name)
+{
+ if (strcmp(name, "failure") == 0) {
+ return VBDEV_IO_FAILURE;
+ } else if (strcmp(name, "pending") == 0) {
+ return VBDEV_IO_PENDING;
+ }
+ return ERROR_BDEV_ERROR_TYPE_INVALID;
+}
+
+struct rpc_construct_error_bdev {
+ char *base_name;
+};
+
+static void
+free_rpc_construct_error_bdev(struct rpc_construct_error_bdev *req)
+{
+ free(req->base_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_error_bdev_decoders[] = {
+ {"base_name", offsetof(struct rpc_construct_error_bdev, base_name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_construct_error_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_error_bdev req = {};
+ struct spdk_json_write_ctx *w;
+
+ if (spdk_json_decode_object(params, rpc_construct_error_bdev_decoders,
+ SPDK_COUNTOF(rpc_construct_error_bdev_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (spdk_vbdev_error_create(req.base_name)) {
+ SPDK_ERRLOG("Could not create ErrorInjection bdev %s\n", req.base_name);
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_error_bdev(&req);
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+ free_rpc_construct_error_bdev(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_construct_error_bdev(&req);
+}
+SPDK_RPC_REGISTER("construct_error_bdev", spdk_rpc_construct_error_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_error {
+ char *name;
+};
+
+static void
+free_rpc_delete_error(struct rpc_delete_error *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_error_decoders[] = {
+ {"name", offsetof(struct rpc_delete_error, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_error_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_error_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_error req = {NULL};
+ struct spdk_bdev *vbdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_error_decoders,
+ SPDK_COUNTOF(rpc_delete_error_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ vbdev = spdk_bdev_get_by_name(req.name);
+ if (vbdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ spdk_vbdev_error_delete(vbdev, _spdk_rpc_delete_error_bdev_cb, request);
+
+ free_rpc_delete_error(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_error(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_error_bdev", spdk_rpc_delete_error_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_error_information {
+ char *name;
+ char *io_type;
+ char *error_type;
+ uint32_t num;
+};
+
+static const struct spdk_json_object_decoder rpc_error_information_decoders[] = {
+ {"name", offsetof(struct rpc_error_information, name), spdk_json_decode_string},
+ {"io_type", offsetof(struct rpc_error_information, io_type), spdk_json_decode_string},
+ {"error_type", offsetof(struct rpc_error_information, error_type), spdk_json_decode_string},
+ {"num", offsetof(struct rpc_error_information, num), spdk_json_decode_uint32, true},
+};
+
+static void
+free_rpc_error_information(struct rpc_error_information *p)
+{
+ free(p->name);
+ free(p->io_type);
+ free(p->error_type);
+}
+
+static void
+spdk_rpc_bdev_inject_error(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_error_information req = {};
+ struct spdk_json_write_ctx *w;
+ uint32_t io_type;
+ uint32_t error_type;
+ int ret;
+
+ if (spdk_json_decode_object(params, rpc_error_information_decoders,
+ SPDK_COUNTOF(rpc_error_information_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ io_type = spdk_rpc_error_bdev_io_type_parse(req.io_type);
+ if (io_type == ERROR_BDEV_IO_TYPE_INVALID) {
+ goto invalid;
+ }
+
+ error_type = spdk_rpc_error_bdev_error_type_parse(req.error_type);
+ if (error_type == ERROR_BDEV_ERROR_TYPE_INVALID) {
+ goto invalid;
+ }
+
+ ret = spdk_vbdev_inject_error(req.name, io_type, error_type, req.num);
+ if (ret) {
+ goto invalid;
+ }
+
+ free_rpc_error_information(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_error_information(&req);
+}
+SPDK_RPC_REGISTER("bdev_inject_error", spdk_rpc_bdev_inject_error, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/gpt/Makefile b/src/spdk/lib/bdev/gpt/Makefile
new file mode 100644
index 00000000..6806c647
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = gpt.c vbdev_gpt.c
+LIBNAME = vbdev_gpt
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/gpt/gpt.c b/src/spdk/lib/bdev/gpt/gpt.c
new file mode 100644
index 00000000..0e830cdd
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/gpt.c
@@ -0,0 +1,239 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gpt.h"
+
+#include "spdk/crc32.h"
+#include "spdk/endian.h"
+#include "spdk/event.h"
+
+#include "spdk_internal/log.h"
+
+#define GPT_PRIMARY_PARTITION_TABLE_LBA 0x1
+#define PRIMARY_PARTITION_NUMBER 4
+#define GPT_PROTECTIVE_MBR 1
+#define SPDK_MAX_NUM_PARTITION_ENTRIES 128
+
+static int
+spdk_gpt_read_partitions(struct spdk_gpt *gpt)
+{
+ uint32_t total_partition_size, num_partition_entries, partition_entry_size;
+ uint64_t partition_start_lba;
+ struct spdk_gpt_header *head = gpt->header;
+ uint32_t crc32;
+
+ num_partition_entries = from_le32(&head->num_partition_entries);
+ if (num_partition_entries > SPDK_MAX_NUM_PARTITION_ENTRIES) {
+ SPDK_ERRLOG("Num_partition_entries=%u which exceeds max=%u\n",
+ num_partition_entries, SPDK_MAX_NUM_PARTITION_ENTRIES);
+ return -1;
+ }
+
+ partition_entry_size = from_le32(&head->size_of_partition_entry);
+ if (partition_entry_size != sizeof(struct spdk_gpt_partition_entry)) {
+ SPDK_ERRLOG("Partition_entry_size(%x) != expected(%lx)\n",
+ partition_entry_size, sizeof(struct spdk_gpt_partition_entry));
+ return -1;
+ }
+
+ total_partition_size = num_partition_entries * partition_entry_size;
+ partition_start_lba = from_le64(&head->partition_entry_lba);
+ if ((total_partition_size + partition_start_lba * gpt->sector_size) > SPDK_GPT_BUFFER_SIZE) {
+ SPDK_ERRLOG("Buffer size is not enough\n");
+ return -1;
+ }
+
+ gpt->partitions = (struct spdk_gpt_partition_entry *)(gpt->buf +
+ partition_start_lba * gpt->sector_size);
+
+ crc32 = spdk_crc32_ieee_update(gpt->partitions, total_partition_size, ~0);
+ crc32 ^= ~0;
+
+ if (crc32 != from_le32(&head->partition_entry_array_crc32)) {
+ SPDK_ERRLOG("GPT partition entry array crc32 did not match\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+spdk_gpt_lba_range_check(struct spdk_gpt_header *head, uint64_t lba_end)
+{
+ uint64_t usable_lba_start, usable_lba_end;
+
+ usable_lba_start = from_le64(&head->first_usable_lba);
+ usable_lba_end = from_le64(&head->last_usable_lba);
+
+ if (usable_lba_end < usable_lba_start) {
+ SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") < usable_lba_start(%" PRIu64 ")\n",
+ usable_lba_end, usable_lba_start);
+ return -1;
+ }
+
+ if (usable_lba_end > lba_end) {
+ SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") > lba_end(%" PRIu64 ")\n",
+ usable_lba_end, lba_end);
+ return -1;
+ }
+
+ if ((usable_lba_start < GPT_PRIMARY_PARTITION_TABLE_LBA) &&
+ (GPT_PRIMARY_PARTITION_TABLE_LBA < usable_lba_end)) {
+ SPDK_ERRLOG("Head lba is not in the usable range\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+spdk_gpt_read_header(struct spdk_gpt *gpt)
+{
+ uint32_t head_size;
+ uint32_t new_crc, original_crc;
+ struct spdk_gpt_header *head;
+
+ head = (struct spdk_gpt_header *)(gpt->buf + GPT_PRIMARY_PARTITION_TABLE_LBA * gpt->sector_size);
+ head_size = from_le32(&head->header_size);
+ if (head_size < sizeof(*head) || head_size > gpt->sector_size) {
+ SPDK_ERRLOG("head_size=%u\n", head_size);
+ return -1;
+ }
+
+ original_crc = from_le32(&head->header_crc32);
+ head->header_crc32 = 0;
+ new_crc = spdk_crc32_ieee_update(head, from_le32(&head->header_size), ~0);
+ new_crc ^= ~0;
+ /* restore header crc32 */
+ to_le32(&head->header_crc32, original_crc);
+
+ if (new_crc != original_crc) {
+ SPDK_ERRLOG("head crc32 does not match, provided=%u, caculated=%u\n",
+ original_crc, new_crc);
+ return -1;
+ }
+
+ if (memcmp(SPDK_GPT_SIGNATURE, head->gpt_signature,
+ sizeof(head->gpt_signature))) {
+ SPDK_ERRLOG("signature did not match\n");
+ return -1;
+ }
+
+ if (spdk_gpt_lba_range_check(head, gpt->lba_end)) {
+ SPDK_ERRLOG("lba range check error\n");
+ return -1;
+ }
+
+ gpt->header = head;
+ return 0;
+}
+
+static int
+spdk_gpt_check_mbr(struct spdk_gpt *gpt)
+{
+ int i, primary_partition = 0;
+ uint32_t total_lba_size = 0, ret = 0, expected_start_lba;
+ struct spdk_mbr *mbr;
+
+ mbr = (struct spdk_mbr *)gpt->buf;
+ if (from_le16(&mbr->mbr_signature) != SPDK_MBR_SIGNATURE) {
+ SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Signature mismatch, provided=%x,"
+ "expected=%x\n", from_le16(&mbr->disk_signature),
+ SPDK_MBR_SIGNATURE);
+ return -1;
+ }
+
+ for (i = 0; i < PRIMARY_PARTITION_NUMBER; i++) {
+ if (mbr->partitions[i].os_type == SPDK_MBR_OS_TYPE_GPT_PROTECTIVE) {
+ primary_partition = i;
+ ret = GPT_PROTECTIVE_MBR;
+ break;
+ }
+ }
+
+ if (ret == GPT_PROTECTIVE_MBR) {
+ expected_start_lba = GPT_PRIMARY_PARTITION_TABLE_LBA;
+ if (from_le32(&mbr->partitions[primary_partition].start_lba) != expected_start_lba) {
+ SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "start lba mismatch, provided=%u, expected=%u\n",
+ from_le32(&mbr->partitions[primary_partition].start_lba),
+ expected_start_lba);
+ return -1;
+ }
+
+ total_lba_size = from_le32(&mbr->partitions[primary_partition].size_lba);
+ if ((total_lba_size != ((uint32_t) gpt->total_sectors - 1)) &&
+ (total_lba_size != 0xFFFFFFFF)) {
+ SPDK_ERRLOG("GPT Primary MBR size does not equal: (record_size %u != actual_size %u)!\n",
+ total_lba_size, (uint32_t) gpt->total_sectors - 1);
+ return -1;
+ }
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Currently only support GPT Protective MBR format\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+spdk_gpt_parse(struct spdk_gpt *gpt)
+{
+ int rc;
+
+ if (!gpt || !gpt->buf) {
+ SPDK_ERRLOG("Gpt and the related buffer should not be NULL\n");
+ return -1;
+ }
+
+ rc = spdk_gpt_check_mbr(gpt);
+ if (rc) {
+ SPDK_DEBUGLOG(SPDK_LOG_GPT_PARSE, "Failed to detect gpt in MBR\n");
+ return rc;
+ }
+
+ rc = spdk_gpt_read_header(gpt);
+ if (rc) {
+ SPDK_ERRLOG("Failed to read gpt header\n");
+ return rc;
+ }
+
+ rc = spdk_gpt_read_partitions(gpt);
+ if (rc) {
+ SPDK_ERRLOG("Failed to read gpt partitions\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("gpt_parse", SPDK_LOG_GPT_PARSE)
diff --git a/src/spdk/lib/bdev/gpt/gpt.h b/src/spdk/lib/bdev/gpt/gpt.h
new file mode 100644
index 00000000..923bdc1c
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/gpt.h
@@ -0,0 +1,62 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * GPT internal Interface
+ */
+
+#ifndef SPDK_INTERNAL_GPT_H
+#define SPDK_INTERNAL_GPT_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/gpt_spec.h"
+
+#define SPDK_GPT_PART_TYPE_GUID SPDK_GPT_GUID(0x7c5222bd, 0x8f5d, 0x4087, 0x9c00, 0xbf9843c7b58c)
+#define SPDK_GPT_BUFFER_SIZE 32768 /* 32KB */
+#define SPDK_GPT_GUID_EQUAL(x,y) (memcmp(x, y, sizeof(struct spdk_gpt_guid)) == 0)
+
+struct spdk_gpt {
+ unsigned char *buf;
+ uint64_t buf_size;
+ uint64_t lba_start;
+ uint64_t lba_end;
+ uint64_t total_sectors;
+ uint32_t sector_size;
+ struct spdk_gpt_header *header;
+ struct spdk_gpt_partition_entry *partitions;
+};
+
+int spdk_gpt_parse(struct spdk_gpt *gpt);
+
+#endif /* SPDK_INTERNAL_GPT_H */
diff --git a/src/spdk/lib/bdev/gpt/vbdev_gpt.c b/src/spdk/lib/bdev/gpt/vbdev_gpt.c
new file mode 100644
index 00000000..751af0ea
--- /dev/null
+++ b/src/spdk/lib/bdev/gpt/vbdev_gpt.c
@@ -0,0 +1,463 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This driver reads a GPT partition table from a bdev and exposes a virtual block device for
+ * each partition.
+ */
+
+#include "gpt.h"
+
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static int vbdev_gpt_init(void);
+static void vbdev_gpt_examine(struct spdk_bdev *bdev);
+static int vbdev_gpt_get_ctx_size(void);
+
+static struct spdk_bdev_module gpt_if = {
+ .name = "gpt",
+ .module_init = vbdev_gpt_init,
+ .get_ctx_size = vbdev_gpt_get_ctx_size,
+ .examine_disk = vbdev_gpt_examine,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&gpt_if)
+
+/* Base block device gpt context */
+struct gpt_base {
+ struct spdk_gpt gpt;
+ struct spdk_bdev_part_base *part_base;
+
+ /* This channel is only used for reading the partition table. */
+ struct spdk_io_channel *ch;
+};
+
+/* Context for each gpt virtual bdev */
+struct gpt_disk {
+ struct spdk_bdev_part part;
+ uint32_t partition_index;
+};
+
+struct gpt_channel {
+ struct spdk_bdev_part_channel part_ch;
+};
+
+struct gpt_io {
+ struct spdk_io_channel *ch;
+ struct spdk_bdev_io *bdev_io;
+
+ /* for bdev_io_wait */
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static SPDK_BDEV_PART_TAILQ g_gpt_disks = TAILQ_HEAD_INITIALIZER(g_gpt_disks);
+
+static bool g_gpt_disabled;
+
+static void
+spdk_gpt_base_free(void *ctx)
+{
+ struct gpt_base *gpt_base = ctx;
+
+ spdk_dma_free(gpt_base->gpt.buf);
+ free(gpt_base);
+}
+
+static void
+spdk_gpt_base_bdev_hotremove_cb(void *_base_bdev)
+{
+ spdk_bdev_part_base_hotremove(_base_bdev, &g_gpt_disks);
+}
+
+static int vbdev_gpt_destruct(void *ctx);
+static void vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
+static int vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w);
+
+static struct spdk_bdev_fn_table vbdev_gpt_fn_table = {
+ .destruct = vbdev_gpt_destruct,
+ .submit_request = vbdev_gpt_submit_request,
+ .dump_info_json = vbdev_gpt_dump_info_json,
+};
+
+static struct gpt_base *
+spdk_gpt_base_bdev_init(struct spdk_bdev *bdev)
+{
+ struct gpt_base *gpt_base;
+ struct spdk_gpt *gpt;
+
+ gpt_base = calloc(1, sizeof(*gpt_base));
+ if (!gpt_base) {
+ SPDK_ERRLOG("Cannot alloc memory for gpt_base pointer\n");
+ return NULL;
+ }
+
+ gpt_base->part_base = spdk_bdev_part_base_construct(bdev,
+ spdk_gpt_base_bdev_hotremove_cb,
+ &gpt_if, &vbdev_gpt_fn_table,
+ &g_gpt_disks, spdk_gpt_base_free, gpt_base,
+ sizeof(struct gpt_channel), NULL, NULL);
+ if (!gpt_base->part_base) {
+ free(gpt_base);
+ SPDK_ERRLOG("cannot construct gpt_base");
+ return NULL;
+ }
+
+ gpt = &gpt_base->gpt;
+ gpt->buf_size = spdk_max(SPDK_GPT_BUFFER_SIZE, bdev->blocklen);
+ gpt->buf = spdk_dma_zmalloc(gpt->buf_size, spdk_bdev_get_buf_align(bdev), NULL);
+ if (!gpt->buf) {
+ SPDK_ERRLOG("Cannot alloc buf\n");
+ spdk_bdev_part_base_free(gpt_base->part_base);
+ return NULL;
+ }
+
+ gpt->sector_size = bdev->blocklen;
+ gpt->total_sectors = bdev->blockcnt;
+ gpt->lba_start = 0;
+ gpt->lba_end = gpt->total_sectors - 1;
+
+ return gpt_base;
+}
+
+static int
+vbdev_gpt_destruct(void *ctx)
+{
+ struct gpt_disk *gpt_disk = ctx;
+
+ return spdk_bdev_part_free(&gpt_disk->part);
+}
+
+static void
+vbdev_gpt_resubmit_request(void *arg)
+{
+ struct gpt_io *io = (struct gpt_io *)arg;
+
+ vbdev_gpt_submit_request(io->ch, io->bdev_io);
+}
+
+static void
+vbdev_gpt_queue_io(struct gpt_io *io)
+{
+ int rc;
+
+ io->bdev_io_wait.bdev = io->bdev_io->bdev;
+ io->bdev_io_wait.cb_fn = vbdev_gpt_resubmit_request;
+ io->bdev_io_wait.cb_arg = io;
+
+ rc = spdk_bdev_queue_io_wait(io->bdev_io->bdev,
+ io->ch, &io->bdev_io_wait);
+ if (rc != 0) {
+ SPDK_ERRLOG("Queue io failed in vbdev_gpt_queue_io, rc=%d.\n", rc);
+ spdk_bdev_io_complete(io->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+vbdev_gpt_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct gpt_channel *ch = spdk_io_channel_get_ctx(_ch);
+ struct gpt_io *io = (struct gpt_io *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+ if (rc) {
+ if (rc == -ENOMEM) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "gpt: no memory, queue io\n");
+ io->ch = _ch;
+ io->bdev_io = bdev_io;
+ vbdev_gpt_queue_io(io);
+ } else {
+ SPDK_ERRLOG("gpt: error on bdev_io submission, rc=%d.\n", rc);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static void
+write_guid(struct spdk_json_write_ctx *w, const struct spdk_gpt_guid *guid)
+{
+ spdk_json_write_string_fmt(w, "%08x-%04x-%04x-%04x-%04x%08x",
+ from_le32(&guid->raw[0]),
+ from_le16(&guid->raw[4]),
+ from_le16(&guid->raw[6]),
+ from_be16(&guid->raw[8]),
+ from_be16(&guid->raw[10]),
+ from_be32(&guid->raw[12]));
+}
+
+static void
+write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *str, size_t max_len)
+{
+ size_t len;
+ const uint16_t *p;
+
+ for (len = 0, p = str; len < max_len && *p; p++) {
+ len++;
+ }
+
+ spdk_json_write_string_utf16le_raw(w, str, len);
+}
+
+static int
+vbdev_gpt_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct gpt_disk *gpt_disk = SPDK_CONTAINEROF(ctx, struct gpt_disk, part);
+ struct spdk_bdev_part_base *base_bdev = spdk_bdev_part_get_base(&gpt_disk->part);
+ struct gpt_base *gpt_base = spdk_bdev_part_base_get_ctx(base_bdev);
+ struct spdk_bdev *part_base_bdev = spdk_bdev_part_base_get_bdev(base_bdev);
+ struct spdk_gpt *gpt = &gpt_base->gpt;
+ struct spdk_gpt_partition_entry *gpt_entry = &gpt->partitions[gpt_disk->partition_index];
+ uint64_t offset_blocks = spdk_bdev_part_get_offset_blocks(&gpt_disk->part);
+
+ spdk_json_write_name(w, "gpt");
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "base_bdev");
+ spdk_json_write_string(w, spdk_bdev_get_name(part_base_bdev));
+
+ spdk_json_write_name(w, "offset_blocks");
+ spdk_json_write_uint64(w, offset_blocks);
+
+ spdk_json_write_name(w, "partition_type_guid");
+ write_guid(w, &gpt_entry->part_type_guid);
+
+ spdk_json_write_name(w, "unique_partition_guid");
+ write_guid(w, &gpt_entry->unique_partition_guid);
+
+ spdk_json_write_name(w, "partition_name");
+ write_string_utf16le(w, gpt_entry->partition_name, SPDK_COUNTOF(gpt_entry->partition_name));
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static int
+vbdev_gpt_create_bdevs(struct gpt_base *gpt_base)
+{
+ uint32_t num_partition_entries;
+ uint64_t i, head_lba_start, head_lba_end;
+ uint32_t num_partitions;
+ struct spdk_gpt_partition_entry *p;
+ struct gpt_disk *d;
+ struct spdk_gpt *gpt;
+ char *name;
+ struct spdk_bdev *base_bdev;
+ int rc;
+
+ gpt = &gpt_base->gpt;
+ num_partition_entries = from_le32(&gpt->header->num_partition_entries);
+ head_lba_start = from_le64(&gpt->header->first_usable_lba);
+ head_lba_end = from_le64(&gpt->header->last_usable_lba);
+ num_partitions = 0;
+
+ for (i = 0; i < num_partition_entries; i++) {
+ p = &gpt->partitions[i];
+ uint64_t lba_start = from_le64(&p->starting_lba);
+ uint64_t lba_end = from_le64(&p->ending_lba);
+
+ if (!SPDK_GPT_GUID_EQUAL(&gpt->partitions[i].part_type_guid,
+ &SPDK_GPT_PART_TYPE_GUID) ||
+ lba_start == 0) {
+ continue;
+ }
+ if (lba_start < head_lba_start || lba_end > head_lba_end) {
+ continue;
+ }
+
+ d = calloc(1, sizeof(*d));
+ if (!d) {
+ SPDK_ERRLOG("Memory allocation failure\n");
+ return -1;
+ }
+
+ /* index start at 1 instead of 0 to match the existing style */
+ base_bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
+ name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1);
+ if (!name) {
+ SPDK_ERRLOG("name allocation failure\n");
+ free(d);
+ return -1;
+ }
+
+ rc = spdk_bdev_part_construct(&d->part, gpt_base->part_base, name,
+ lba_start, lba_end - lba_start, "GPT Disk");
+ free(name);
+ if (rc) {
+ SPDK_ERRLOG("could not construct bdev part\n");
+ /* spdk_bdev_part_construct will free name on failure */
+ free(d);
+ return -1;
+ }
+ num_partitions++;
+ d->partition_index = i;
+ }
+
+ return num_partitions;
+}
+
+static void
+spdk_gpt_bdev_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg)
+{
+ struct gpt_base *gpt_base = (struct gpt_base *)arg;
+ struct spdk_bdev *bdev = spdk_bdev_part_base_get_bdev(gpt_base->part_base);
+ int rc, num_partitions = 0;
+
+ spdk_bdev_free_io(bdev_io);
+ spdk_put_io_channel(gpt_base->ch);
+ gpt_base->ch = NULL;
+
+ if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
+ SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n",
+ spdk_bdev_get_name(bdev), status);
+ goto end;
+ }
+
+ rc = spdk_gpt_parse(&gpt_base->gpt);
+ if (rc) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "Failed to parse gpt\n");
+ goto end;
+ }
+
+ num_partitions = vbdev_gpt_create_bdevs(gpt_base);
+ if (num_partitions < 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_GPT, "Failed to split dev=%s by gpt table\n",
+ spdk_bdev_get_name(bdev));
+ }
+
+end:
+ /*
+ * Notify the generic bdev layer that the actions related to the original examine
+ * callback are now completed.
+ */
+ spdk_bdev_module_examine_done(&gpt_if);
+
+ /*
+ * vbdev_gpt_create_bdevs returns the number of bdevs created upon success.
+ * We can branch on this value.
+ */
+ if (num_partitions <= 0) {
+ /* If no gpt_disk instances were created, free the base context */
+ spdk_bdev_part_base_free(gpt_base->part_base);
+ }
+}
+
+static int
+vbdev_gpt_read_gpt(struct spdk_bdev *bdev)
+{
+ struct gpt_base *gpt_base;
+ struct spdk_bdev_desc *part_base_desc;
+ int rc;
+
+ gpt_base = spdk_gpt_base_bdev_init(bdev);
+ if (!gpt_base) {
+ SPDK_ERRLOG("Cannot allocated gpt_base\n");
+ return -1;
+ }
+
+ part_base_desc = spdk_bdev_part_base_get_desc(gpt_base->part_base);
+ gpt_base->ch = spdk_bdev_get_io_channel(part_base_desc);
+ if (gpt_base->ch == NULL) {
+ SPDK_ERRLOG("Failed to get an io_channel.\n");
+ spdk_bdev_part_base_free(gpt_base->part_base);
+ return -1;
+ }
+
+ rc = spdk_bdev_read(part_base_desc, gpt_base->ch, gpt_base->gpt.buf, 0,
+ gpt_base->gpt.buf_size, spdk_gpt_bdev_complete, gpt_base);
+ if (rc < 0) {
+ spdk_put_io_channel(gpt_base->ch);
+ spdk_bdev_part_base_free(gpt_base->part_base);
+ SPDK_ERRLOG("Failed to send bdev_io command\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+vbdev_gpt_init(void)
+{
+ struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Gpt");
+
+ if (sp && spdk_conf_section_get_boolval(sp, "Disable", false)) {
+ /* Disable Gpt probe */
+ g_gpt_disabled = true;
+ }
+
+ return 0;
+}
+
+static int
+vbdev_gpt_get_ctx_size(void)
+{
+ return sizeof(struct gpt_io);
+}
+
+static void
+vbdev_gpt_examine(struct spdk_bdev *bdev)
+{
+ int rc;
+
+ /* A bdev with fewer than 2 blocks cannot have a GPT. Block 0 has
+ * the MBR and block 1 has the GPT header.
+ */
+ if (g_gpt_disabled || spdk_bdev_get_num_blocks(bdev) < 2) {
+ spdk_bdev_module_examine_done(&gpt_if);
+ return;
+ }
+
+ if (spdk_bdev_get_block_size(bdev) % 512 != 0) {
+ SPDK_ERRLOG("GPT module does not support block size %" PRIu32 " for bdev %s\n",
+ spdk_bdev_get_block_size(bdev), spdk_bdev_get_name(bdev));
+ spdk_bdev_module_examine_done(&gpt_if);
+ return;
+ }
+
+ rc = vbdev_gpt_read_gpt(bdev);
+ if (rc) {
+ spdk_bdev_module_examine_done(&gpt_if);
+ SPDK_ERRLOG("Failed to read info from bdev %s\n", spdk_bdev_get_name(bdev));
+ }
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_gpt", SPDK_LOG_VBDEV_GPT)
diff --git a/src/spdk/lib/bdev/iscsi/Makefile b/src/spdk/lib/bdev/iscsi/Makefile
new file mode 100644
index 00000000..4a38886d
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/Makefile
@@ -0,0 +1,46 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+# CentOS 7 libiscsi package has functions declared inline but not
+# defined in the header file. Not aware of any way to disable
+# this warning so just make sure the warning isn't treated as
+# an error.
+CFLAGS += -Wno-error
+C_SRCS = bdev_iscsi.c bdev_iscsi_rpc.c
+LIBNAME = bdev_iscsi
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi.c b/src/spdk/lib/bdev/iscsi/bdev_iscsi.c
new file mode 100644
index 00000000..528337f5
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi.c
@@ -0,0 +1,875 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/fd.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/util.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/iscsi_spec.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/bdev_module.h"
+
+#include "iscsi/iscsi.h"
+#include "iscsi/scsi-lowlevel.h"
+
+#include "bdev_iscsi.h"
+
+struct bdev_iscsi_lun;
+
+#define BDEV_ISCSI_CONNECTION_POLL_US 500 /* 0.5 ms */
+#define BDEV_ISCSI_NO_MASTER_CH_POLL_US 10000 /* 10ms */
+
+#define DEFAULT_INITIATOR_NAME "iqn.2016-06.io.spdk:init"
+
+static int bdev_iscsi_initialize(void);
+static TAILQ_HEAD(, bdev_iscsi_conn_req) g_iscsi_conn_req = TAILQ_HEAD_INITIALIZER(
+ g_iscsi_conn_req);
+static struct spdk_poller *g_conn_poller = NULL;
+
+struct bdev_iscsi_io {
+ struct spdk_thread *submit_td;
+ enum spdk_bdev_io_status status;
+ int scsi_status;
+ enum spdk_scsi_sense sk;
+ uint8_t asc;
+ uint8_t ascq;
+};
+
+struct bdev_iscsi_lun {
+ struct spdk_bdev bdev;
+ struct iscsi_context *context;
+ char *initiator_iqn;
+ char *url;
+ pthread_mutex_t mutex;
+ uint32_t ch_count;
+ struct bdev_iscsi_io_channel *master_ch;
+ struct spdk_thread *master_td;
+ struct spdk_poller *no_master_ch_poller;
+ struct spdk_thread *no_master_ch_poller_td;
+ bool unmap_supported;
+};
+
+struct bdev_iscsi_io_channel {
+ struct spdk_poller *poller;
+ struct bdev_iscsi_lun *lun;
+};
+
+struct bdev_iscsi_conn_req {
+ char *url;
+ char *bdev_name;
+ char *initiator_iqn;
+ struct iscsi_context *context;
+ spdk_bdev_iscsi_create_cb create_cb;
+ spdk_bdev_iscsi_create_cb create_cb_arg;
+ bool unmap_supported;
+ TAILQ_ENTRY(bdev_iscsi_conn_req) link;
+};
+
+static void
+complete_conn_req(struct bdev_iscsi_conn_req *req, struct spdk_bdev *bdev,
+ int status)
+{
+ TAILQ_REMOVE(&g_iscsi_conn_req, req, link);
+ req->create_cb(req->create_cb_arg, bdev, status);
+ if (status) {
+ /* if the request failed and no iscsi lun was
+ * created then we could not hand over this
+ * memory and have to free it manually now.
+ */
+ iscsi_destroy_context(req->context);
+ free(req->initiator_iqn);
+ free(req->bdev_name);
+ free(req->url);
+ }
+ free(req);
+}
+
+static int
+bdev_iscsi_get_ctx_size(void)
+{
+ return sizeof(struct bdev_iscsi_io);
+}
+
+static void
+_iscsi_free_lun(void *arg)
+{
+ struct bdev_iscsi_lun *lun = arg;
+
+ assert(lun != NULL);
+ iscsi_destroy_context(lun->context);
+ pthread_mutex_destroy(&lun->mutex);
+ free(lun->bdev.name);
+ free(lun->url);
+ free(lun->initiator_iqn);
+
+ spdk_bdev_destruct_done(&lun->bdev, 0);
+ free(lun);
+}
+
+static void
+bdev_iscsi_finish(void)
+{
+ struct bdev_iscsi_conn_req *req;
+
+ while (!TAILQ_EMPTY(&g_iscsi_conn_req)) {
+ req = TAILQ_FIRST(&g_iscsi_conn_req);
+ complete_conn_req(req, NULL, -EINTR);
+ }
+
+ if (g_conn_poller) {
+ spdk_poller_unregister(&g_conn_poller);
+ }
+}
+
+static struct spdk_bdev_module g_iscsi_bdev_module = {
+ .name = "iscsi",
+ .module_init = bdev_iscsi_initialize,
+ .module_fini = bdev_iscsi_finish,
+ .get_ctx_size = bdev_iscsi_get_ctx_size,
+ .async_init = true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&g_iscsi_bdev_module);
+
+static void
+_bdev_iscsi_io_complete(void *_iscsi_io)
+{
+ struct bdev_iscsi_io *iscsi_io = _iscsi_io;
+
+ if (iscsi_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
+ spdk_bdev_io_complete_scsi_status(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->scsi_status,
+ iscsi_io->sk, iscsi_io->asc, iscsi_io->ascq);
+ } else {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(iscsi_io), iscsi_io->status);
+ }
+}
+
+static void
+bdev_iscsi_io_complete(struct bdev_iscsi_io *iscsi_io, enum spdk_bdev_io_status status)
+{
+ iscsi_io->status = status;
+ if (iscsi_io->submit_td != NULL) {
+ spdk_thread_send_msg(iscsi_io->submit_td, _bdev_iscsi_io_complete, iscsi_io);
+ } else {
+ _bdev_iscsi_io_complete(iscsi_io);
+ }
+}
+
+/* Common call back function for read/write/flush command */
+static void
+bdev_iscsi_command_cb(struct iscsi_context *context, int status, void *_task, void *_iscsi_io)
+{
+ struct scsi_task *task = _task;
+ struct bdev_iscsi_io *iscsi_io = _iscsi_io;
+
+ iscsi_io->scsi_status = status;
+ iscsi_io->sk = (uint8_t)task->sense.key;
+ iscsi_io->asc = (task->sense.ascq >> 8) & 0xFF;
+ iscsi_io->ascq = task->sense.ascq & 0xFF;
+
+ scsi_free_scsi_task(task);
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+}
+
+static void
+bdev_iscsi_readv(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+ struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
+{
+ struct scsi_task *task;
+
+ SPDK_DEBUGLOG(SPDK_LOG_ISCSI_INIT, "read %d iovs size %lu to lba: %#lx\n",
+ iovcnt, nbytes, lba);
+
+ task = iscsi_read16_task(lun->context, 0, lba, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
+ bdev_iscsi_command_cb, iscsi_io);
+ if (task == NULL) {
+ SPDK_ERRLOG("failed to get read16_task\n");
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+ scsi_task_set_iov_in(task, (struct scsi_iovec *)iov, iovcnt);
+#else
+ int i;
+ for (i = 0; i < iovcnt; i++) {
+ scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
+ }
+#endif
+}
+
+static void
+bdev_iscsi_writev(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+ struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t lba)
+{
+ struct scsi_task *task;
+
+ SPDK_DEBUGLOG(SPDK_LOG_ISCSI_INIT, "write %d iovs size %lu to lba: %#lx\n",
+ iovcnt, nbytes, lba);
+
+ task = iscsi_write16_task(lun->context, 0, lba, NULL, nbytes, lun->bdev.blocklen, 0, 0, 0, 0, 0,
+ bdev_iscsi_command_cb, iscsi_io);
+ if (task == NULL) {
+ SPDK_ERRLOG("failed to get write16_task\n");
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+ scsi_task_set_iov_out(task, (struct scsi_iovec *)iov, iovcnt);
+#else
+ int i;
+ for (i = 0; i < iovcnt; i++) {
+ scsi_task_add_data_in_buffer(task, iov[i].iov_len, iov[i].iov_base);
+ }
+#endif
+}
+
+static void
+bdev_iscsi_destruct_cb(void *ctx)
+{
+ struct bdev_iscsi_lun *lun = ctx;
+
+ spdk_poller_unregister(&lun->no_master_ch_poller);
+ spdk_io_device_unregister(lun, _iscsi_free_lun);
+}
+
+static int
+bdev_iscsi_destruct(void *ctx)
+{
+ struct bdev_iscsi_lun *lun = ctx;
+
+ assert(lun->no_master_ch_poller_td);
+ spdk_thread_send_msg(lun->no_master_ch_poller_td, bdev_iscsi_destruct_cb, lun);
+ return 1;
+}
+
+static void
+bdev_iscsi_flush(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io, uint32_t num_blocks,
+ int immed, uint64_t lba)
+{
+ struct scsi_task *task;
+
+ task = iscsi_synchronizecache16_task(lun->context, 0, lba,
+ num_blocks, 0, immed, bdev_iscsi_command_cb, iscsi_io);
+ if (task == NULL) {
+ SPDK_ERRLOG("failed to get sync16_task\n");
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+}
+
+static void
+bdev_iscsi_unmap(struct bdev_iscsi_lun *lun, struct bdev_iscsi_io *iscsi_io,
+ uint64_t lba, uint64_t num_blocks)
+{
+ struct scsi_task *task;
+ struct unmap_list list[1];
+
+ list[0].lba = lba;
+ list[0].num = num_blocks;
+ task = iscsi_unmap_task(lun->context, 0, 0, 0, list, 1,
+ bdev_iscsi_command_cb, iscsi_io);
+ if (task == NULL) {
+ SPDK_ERRLOG("failed to get unmap_task\n");
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+}
+
+static void
+bdev_iscsi_reset_cb(struct iscsi_context *context __attribute__((unused)), int status,
+ void *command_data, void *private_data)
+{
+ uint32_t tmf_response;
+ struct bdev_iscsi_io *iscsi_io = private_data;
+
+ tmf_response = *(uint32_t *)command_data;
+ if (tmf_response == ISCSI_TASK_FUNC_RESP_COMPLETE) {
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ } else {
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+_bdev_iscsi_reset(void *_bdev_io)
+{
+ int rc;
+ struct spdk_bdev_io *bdev_io = _bdev_io;
+ struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+ struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+ struct iscsi_context *context = lun->context;
+
+ rc = iscsi_task_mgmt_lun_reset_async(context, 0,
+ bdev_iscsi_reset_cb, iscsi_io);
+ if (rc != 0) {
+ SPDK_ERRLOG("failed to do iscsi reset\n");
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+}
+
+static void
+bdev_iscsi_reset(struct spdk_bdev_io *bdev_io)
+{
+ struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+ spdk_thread_send_msg(lun->master_td, _bdev_iscsi_reset, bdev_io);
+}
+
+static int
+bdev_iscsi_poll_lun(struct bdev_iscsi_lun *lun)
+{
+ struct pollfd pfd = {};
+
+ pfd.fd = iscsi_get_fd(lun->context);
+ pfd.events = iscsi_which_events(lun->context);
+
+ if (poll(&pfd, 1, 0) < 0) {
+ SPDK_ERRLOG("poll failed\n");
+ return -1;
+ }
+
+ if (pfd.revents != 0) {
+ if (iscsi_service(lun->context, pfd.revents) < 0) {
+ SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(lun->context));
+ }
+ }
+
+ return -1;
+}
+
+static int
+bdev_iscsi_no_master_ch_poll(void *arg)
+{
+ struct bdev_iscsi_lun *lun = arg;
+ int rc = 0;
+
+ if (pthread_mutex_trylock(&lun->mutex)) {
+ /* Don't care about the error code here. */
+ return -1;
+ }
+
+ if (lun->ch_count == 0) {
+ rc = bdev_iscsi_poll_lun(arg);
+ }
+
+ pthread_mutex_unlock(&lun->mutex);
+ return rc;
+}
+
+static int
+bdev_iscsi_poll(void *arg)
+{
+ struct bdev_iscsi_io_channel *ch = arg;
+
+ return bdev_iscsi_poll_lun(ch->lun);
+}
+
+static void bdev_iscsi_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ bdev_iscsi_readv((struct bdev_iscsi_lun *)bdev_io->bdev->ctxt,
+ (struct bdev_iscsi_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks);
+}
+
+static void _bdev_iscsi_submit_request(void *_bdev_io)
+{
+ struct spdk_bdev_io *bdev_io = _bdev_io;
+ struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+ struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_iscsi_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_iscsi_writev(lun, iscsi_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ bdev_iscsi_flush(lun, iscsi_io,
+ bdev_io->u.bdev.num_blocks,
+ ISCSI_IMMEDIATE_DATA_NO,
+ bdev_io->u.bdev.offset_blocks);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ bdev_iscsi_reset(bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ bdev_iscsi_unmap(lun, iscsi_io,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+ break;
+ default:
+ bdev_iscsi_io_complete(iscsi_io, SPDK_BDEV_IO_STATUS_FAILED);
+ break;
+ }
+}
+
+static void bdev_iscsi_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_thread *submit_td = spdk_io_channel_get_thread(_ch);
+ struct bdev_iscsi_io *iscsi_io = (struct bdev_iscsi_io *)bdev_io->driver_ctx;
+ struct bdev_iscsi_lun *lun = (struct bdev_iscsi_lun *)bdev_io->bdev->ctxt;
+
+ if (lun->master_td != submit_td) {
+ iscsi_io->submit_td = submit_td;
+ spdk_thread_send_msg(lun->master_td, _bdev_iscsi_submit_request, bdev_io);
+ return;
+ } else {
+ iscsi_io->submit_td = NULL;
+ }
+
+ _bdev_iscsi_submit_request(bdev_io);
+}
+
+static bool
+bdev_iscsi_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct bdev_iscsi_lun *lun = ctx;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ return lun->unmap_supported;
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_iscsi_create_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_iscsi_io_channel *ch = ctx_buf;
+ struct bdev_iscsi_lun *lun = io_device;
+
+ pthread_mutex_lock(&lun->mutex);
+ if (lun->ch_count == 0) {
+ assert(lun->master_ch == NULL);
+ assert(lun->master_td == NULL);
+ lun->master_ch = ch;
+ lun->master_td = spdk_get_thread();
+ ch->poller = spdk_poller_register(bdev_iscsi_poll, ch, 0);
+ ch->lun = lun;
+ }
+ lun->ch_count++;
+ pthread_mutex_unlock(&lun->mutex);
+
+ return 0;
+}
+
+static void
+bdev_iscsi_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_iscsi_io_channel *io_channel = ctx_buf;
+ struct bdev_iscsi_lun *lun = io_device;
+
+ pthread_mutex_lock(&lun->mutex);
+ lun->ch_count--;
+ if (lun->ch_count == 0) {
+ assert(lun->master_ch != NULL);
+ assert(lun->master_td != NULL);
+ assert(lun->master_td == spdk_get_thread());
+
+ lun->master_ch = NULL;
+ lun->master_td = NULL;
+ spdk_poller_unregister(&io_channel->poller);
+ }
+ pthread_mutex_unlock(&lun->mutex);
+}
+
+static struct spdk_io_channel *
+bdev_iscsi_get_io_channel(void *ctx)
+{
+ struct bdev_iscsi_lun *lun = ctx;
+
+ return spdk_get_io_channel(lun);
+}
+
+static int
+bdev_iscsi_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct bdev_iscsi_lun *lun = ctx;
+
+ spdk_json_write_name(w, "iscsi");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_name(w, "initiator_name");
+ spdk_json_write_string(w, lun->initiator_iqn);
+ spdk_json_write_name(w, "url");
+ spdk_json_write_string(w, lun->url);
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_iscsi_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct bdev_iscsi_lun *lun = bdev->ctxt;
+
+ pthread_mutex_lock(&lun->mutex);
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_iscsi_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_string(w, "initiator_iqn", lun->initiator_iqn);
+ spdk_json_write_named_string(w, "url", lun->url);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ pthread_mutex_unlock(&lun->mutex);
+}
+
+static const struct spdk_bdev_fn_table iscsi_fn_table = {
+ .destruct = bdev_iscsi_destruct,
+ .submit_request = bdev_iscsi_submit_request,
+ .io_type_supported = bdev_iscsi_io_type_supported,
+ .get_io_channel = bdev_iscsi_get_io_channel,
+ .dump_info_json = bdev_iscsi_dump_info_json,
+ .write_config_json = bdev_iscsi_write_config_json,
+};
+
+static int
+create_iscsi_lun(struct iscsi_context *context, char *url, char *initiator_iqn, char *name,
+ uint64_t num_blocks, uint32_t block_size, struct spdk_bdev **bdev, bool unmap_supported)
+{
+ struct bdev_iscsi_lun *lun;
+ int rc;
+
+ lun = calloc(sizeof(*lun), 1);
+ if (!lun) {
+ SPDK_ERRLOG("Unable to allocate enough memory for iscsi backend\n");
+ return -ENOMEM;
+ }
+
+ lun->context = context;
+ lun->url = url;
+ lun->initiator_iqn = initiator_iqn;
+
+ pthread_mutex_init(&lun->mutex, NULL);
+
+ lun->bdev.name = name;
+ lun->bdev.product_name = "iSCSI LUN";
+ lun->bdev.module = &g_iscsi_bdev_module;
+ lun->bdev.blocklen = block_size;
+ lun->bdev.blockcnt = num_blocks;
+ lun->bdev.ctxt = lun;
+ lun->unmap_supported = unmap_supported;
+
+ lun->bdev.fn_table = &iscsi_fn_table;
+
+ spdk_io_device_register(lun, bdev_iscsi_create_cb, bdev_iscsi_destroy_cb,
+ sizeof(struct bdev_iscsi_io_channel),
+ name);
+ rc = spdk_bdev_register(&lun->bdev);
+ if (rc) {
+ spdk_io_device_unregister(lun, NULL);
+ pthread_mutex_destroy(&lun->mutex);
+ free(lun);
+ return rc;
+ }
+
+ lun->no_master_ch_poller_td = spdk_get_thread();
+ lun->no_master_ch_poller = spdk_poller_register(bdev_iscsi_no_master_ch_poll, lun,
+ BDEV_ISCSI_NO_MASTER_CH_POLL_US);
+
+ *bdev = &lun->bdev;
+ return 0;
+}
+
+static void
+iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
+ void *command_data, void *private_data)
+{
+ struct bdev_iscsi_conn_req *req = private_data;
+ struct scsi_readcapacity16 *readcap16;
+ struct spdk_bdev *bdev = NULL;
+ struct scsi_task *task = command_data;
+
+ if (status != SPDK_SCSI_STATUS_GOOD) {
+ SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(iscsi));
+ goto ret;
+ }
+
+ readcap16 = scsi_datain_unmarshall(task);
+ if (!readcap16) {
+ status = -ENOMEM;
+ goto ret;
+ }
+
+ status = create_iscsi_lun(req->context, req->url, req->initiator_iqn, req->bdev_name,
+ readcap16->returned_lba + 1, readcap16->block_length, &bdev, req->unmap_supported);
+ if (status) {
+ SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status);
+ }
+
+ret:
+ scsi_free_scsi_task(task);
+ complete_conn_req(req, bdev, status);
+}
+
+static void
+bdev_iscsi_inquiry_cb(struct iscsi_context *context, int status, void *_task, void *private_data)
+{
+ struct scsi_task *task = _task;
+ struct scsi_inquiry_logical_block_provisioning *lbp_inq = NULL;
+ struct bdev_iscsi_conn_req *req = private_data;
+
+ if (status == SPDK_SCSI_STATUS_GOOD) {
+ lbp_inq = scsi_datain_unmarshall(task);
+ if (lbp_inq != NULL && lbp_inq->lbpu) {
+ req->unmap_supported = true;
+ }
+ }
+
+ task = iscsi_readcapacity16_task(context, 0, iscsi_readcapacity16_cb, req);
+ if (task) {
+ return;
+ }
+
+ SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
+ complete_conn_req(req, NULL, status);
+}
+
+static void
+iscsi_connect_cb(struct iscsi_context *iscsi, int status,
+ void *command_data, void *private_data)
+{
+ struct bdev_iscsi_conn_req *req = private_data;
+ struct scsi_task *task;
+
+ if (status != SPDK_SCSI_STATUS_GOOD) {
+ goto ret;
+ }
+
+ task = iscsi_inquiry_task(iscsi, 0, 1,
+ SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+ 255, bdev_iscsi_inquiry_cb, req);
+ if (task) {
+ return;
+ }
+
+ret:
+ SPDK_ERRLOG("iSCSI error: %s\n", iscsi_get_error(req->context));
+ complete_conn_req(req, NULL, status);
+}
+
+static int
+iscsi_bdev_conn_poll(void *arg)
+{
+ struct bdev_iscsi_conn_req *req, *tmp;
+ struct pollfd pfd;
+ struct iscsi_context *context;
+
+ TAILQ_FOREACH_SAFE(req, &g_iscsi_conn_req, link, tmp) {
+ context = req->context;
+ pfd.fd = iscsi_get_fd(context);
+ pfd.events = iscsi_which_events(context);
+ pfd.revents = 0;
+ if (poll(&pfd, 1, 0) < 0) {
+ SPDK_ERRLOG("poll failed\n");
+ return -1;
+ }
+
+ if (pfd.revents != 0) {
+ if (iscsi_service(context, pfd.revents) < 0) {
+ SPDK_ERRLOG("iscsi_service failed: %s\n", iscsi_get_error(context));
+ }
+ }
+ }
+
+ return -1;
+}
+
+int
+create_iscsi_disk(const char *bdev_name, const char *url, const char *initiator_iqn,
+ spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg)
+{
+ struct bdev_iscsi_conn_req *req;
+ struct iscsi_url *iscsi_url = NULL;
+ int rc;
+
+ if (!bdev_name || !url || !initiator_iqn || strlen(initiator_iqn) == 0 || !cb_fn) {
+ return -EINVAL;
+ }
+
+ req = calloc(1, sizeof(struct bdev_iscsi_conn_req));
+ if (!req) {
+ SPDK_ERRLOG("Cannot allocate pointer of struct bdev_iscsi_conn_req\n");
+ return -ENOMEM;
+ }
+
+ req->bdev_name = strdup(bdev_name);
+ req->url = strdup(url);
+ req->initiator_iqn = strdup(initiator_iqn);
+ req->context = iscsi_create_context(initiator_iqn);
+ if (!req->bdev_name || !req->url || !req->initiator_iqn || !req->context) {
+ SPDK_ERRLOG("Out of memory\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ req->create_cb = cb_fn;
+ req->create_cb_arg = cb_arg;
+
+ iscsi_url = iscsi_parse_full_url(req->context, url);
+ if (iscsi_url == NULL) {
+ SPDK_ERRLOG("could not parse URL: %s\n", iscsi_get_error(req->context));
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = iscsi_set_session_type(req->context, ISCSI_SESSION_NORMAL);
+ rc = rc ? rc : iscsi_set_header_digest(req->context, ISCSI_HEADER_DIGEST_NONE);
+ rc = rc ? rc : iscsi_set_targetname(req->context, iscsi_url->target);
+ rc = rc ? rc : iscsi_full_connect_async(req->context, iscsi_url->portal, iscsi_url->lun,
+ iscsi_connect_cb, req);
+ if (rc == 0 && iscsi_url->user[0] != '\0') {
+ rc = iscsi_set_initiator_username_pwd(req->context, iscsi_url->user, iscsi_url->passwd);
+ }
+
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to connect provided URL=%s: %s\n", url, iscsi_get_error(req->context));
+ goto err;
+ }
+
+ iscsi_destroy_url(iscsi_url);
+ TAILQ_INSERT_TAIL(&g_iscsi_conn_req, req, link);
+ if (!g_conn_poller) {
+ g_conn_poller = spdk_poller_register(iscsi_bdev_conn_poll, NULL, BDEV_ISCSI_CONNECTION_POLL_US);
+ }
+
+ return 0;
+
+err:
+ /* iscsi_destroy_url() is not NULL-proof */
+ if (iscsi_url) {
+ iscsi_destroy_url(iscsi_url);
+ }
+
+ if (req->context) {
+ iscsi_destroy_context(req->context);
+ }
+
+ free(req->initiator_iqn);
+ free(req->bdev_name);
+ free(req->url);
+ free(req);
+ return rc;
+}
+
+void
+delete_iscsi_disk(struct spdk_bdev *bdev, spdk_delete_iscsi_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &g_iscsi_bdev_module) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static void
+bdev_iscsi_initialize_cb(void *cb_arg, struct spdk_bdev *bdev, int status)
+{
+ if (TAILQ_EMPTY(&g_iscsi_conn_req)) {
+ spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+ }
+}
+
+static int
+bdev_iscsi_initialize(void)
+{
+ struct spdk_conf_section *sp;
+
+ const char *url, *bdev_name, *initiator_iqn;
+ int i, rc;
+
+ sp = spdk_conf_find_section(NULL, "iSCSI_Initiator");
+ if (sp == NULL) {
+ spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+ return 0;
+ }
+
+ initiator_iqn = spdk_conf_section_get_val(sp, "initiator_name");
+ if (!initiator_iqn) {
+ initiator_iqn = DEFAULT_INITIATOR_NAME;
+ }
+
+ rc = 0;
+ for (i = 0; (url = spdk_conf_section_get_nmval(sp, "URL", i, 0)) != NULL; i++) {
+ bdev_name = spdk_conf_section_get_nmval(sp, "URL", i, 1);
+ if (bdev_name == NULL) {
+ SPDK_ERRLOG("no bdev name specified for URL %s\n", url);
+ rc = -EINVAL;
+ break;
+ }
+
+ rc = create_iscsi_disk(bdev_name, url, initiator_iqn, bdev_iscsi_initialize_cb, NULL);
+ if (rc) {
+ break;
+ }
+ }
+
+ if (i == 0) {
+ spdk_bdev_module_init_done(&g_iscsi_bdev_module);
+ }
+
+ return rc;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("iscsi_init", SPDK_LOG_ISCSI_INIT)
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi.h b/src/spdk/lib/bdev/iscsi/bdev_iscsi.h
new file mode 100644
index 00000000..b1d22fa8
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi.h
@@ -0,0 +1,75 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_ISCSI_H
+#define SPDK_BDEV_ISCSI_H
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_iscsi_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * SPDK bdev iSCSI callback type.
+ *
+ * \param cb_arg Completion callback custom arguments
+ * \param bdev created bdev
+ * \param status operation status. Zero on success.
+ */
+typedef void (*spdk_bdev_iscsi_create_cb)(void *cb_arg, struct spdk_bdev *bdev, int status);
+
+/**
+ * Create new iSCSI bdev.
+ *
+ * \warning iSCSI URL allow providing login and password. Be careful because
+ * they will show up in configuration dump.
+ *
+ * \param name name for new bdev.
+ * \param initiator_iqn connection iqn name we identify to target as
+ * \param url iSCSI URL string.
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ * \return 0 on success or negative error code. If success bdev with provided name was created.
+ */
+int create_iscsi_disk(const char *bdev_name, const char *initiator_iqn, const char *url,
+ spdk_bdev_iscsi_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Delete iSCSI bdev.
+ *
+ * \param bdev Pointer to iSCSI bdev.
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void delete_iscsi_disk(struct spdk_bdev *bdev, spdk_delete_iscsi_complete cb_fn, void *cb_arg);
+
+#endif // SPDK_BDEV_ISCSI_H
diff --git a/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c b/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c
new file mode 100644
index 00000000..3682b612
--- /dev/null
+++ b/src/spdk/lib/bdev/iscsi/bdev_iscsi_rpc.c
@@ -0,0 +1,173 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_iscsi.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_construct_iscsi_bdev {
+ char *name;
+ char *initiator_iqn;
+ char *url;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_iscsi_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_construct_iscsi_bdev, name), spdk_json_decode_string},
+ {"initiator_iqn", offsetof(struct rpc_construct_iscsi_bdev, initiator_iqn), spdk_json_decode_string},
+ {"url", offsetof(struct rpc_construct_iscsi_bdev, url), spdk_json_decode_string},
+};
+
+static void
+free_rpc_construct_iscsi_bdev(struct rpc_construct_iscsi_bdev *req)
+{
+ free(req->name);
+ free(req->initiator_iqn);
+ free(req->url);
+}
+
+static void
+construct_iscsi_bdev_cb(void *cb_arg, struct spdk_bdev *bdev, int status)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ if (status > 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "iSCSI error (%d).", status);
+ } else if (status < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-status));
+ } else {
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ }
+}
+
+static void
+spdk_rpc_construct_iscsi_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_iscsi_bdev req = {};
+ int rc = 0;
+
+ if (spdk_json_decode_object(params, rpc_construct_iscsi_bdev_decoders,
+ SPDK_COUNTOF(rpc_construct_iscsi_bdev_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = create_iscsi_disk(req.name, req.url, req.initiator_iqn, construct_iscsi_bdev_cb, request);
+ if (rc) {
+ goto invalid;
+ }
+
+ free_rpc_construct_iscsi_bdev(&req);
+ return;
+
+invalid:
+ free_rpc_construct_iscsi_bdev(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_iscsi_bdev", spdk_rpc_construct_iscsi_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_iscsi {
+ char *name;
+};
+
+static void
+free_rpc_delete_iscsi(struct rpc_delete_iscsi *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_iscsi_decoders[] = {
+ {"name", offsetof(struct rpc_delete_iscsi, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_iscsi_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_iscsi_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_iscsi req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_iscsi_decoders,
+ SPDK_COUNTOF(rpc_delete_iscsi_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_iscsi_disk(bdev, _spdk_rpc_delete_iscsi_bdev_cb, request);
+
+ free_rpc_delete_iscsi(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_iscsi(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_iscsi_bdev", spdk_rpc_delete_iscsi_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/lvol/Makefile b/src/spdk/lib/bdev/lvol/Makefile
new file mode 100644
index 00000000..569b14cf
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/Makefile
@@ -0,0 +1,41 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_lvol.c vbdev_lvol_rpc.c
+LIBNAME = vbdev_lvol
+LOCAL_SYS_LIBS = -luuid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol.c b/src/spdk/lib/bdev/lvol/vbdev_lvol.c
new file mode 100644
index 00000000..74df81e4
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol.c
@@ -0,0 +1,1321 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/blob_bdev.h"
+#include "spdk/rpc.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+#include "spdk/uuid.h"
+
+#include "vbdev_lvol.h"
+
+static TAILQ_HEAD(, lvol_store_bdev) g_spdk_lvol_pairs = TAILQ_HEAD_INITIALIZER(
+ g_spdk_lvol_pairs);
+
+static int vbdev_lvs_init(void);
+static int vbdev_lvs_get_ctx_size(void);
+static void vbdev_lvs_examine(struct spdk_bdev *bdev);
+
+static struct spdk_bdev_module g_lvol_if = {
+ .name = "lvol",
+ .module_init = vbdev_lvs_init,
+ .examine_disk = vbdev_lvs_examine,
+ .get_ctx_size = vbdev_lvs_get_ctx_size,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&g_lvol_if)
+
+struct lvol_store_bdev *
+vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
+{
+ struct spdk_lvol_store *lvs = NULL;
+ struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+ while (lvs_bdev != NULL) {
+ lvs = lvs_bdev->lvs;
+ if (lvs == lvs_orig) {
+ if (lvs_bdev->req != NULL) {
+ /* We do not allow access to lvs that are being destroyed */
+ return NULL;
+ } else {
+ return lvs_bdev;
+ }
+ }
+ lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+ }
+
+ return NULL;
+}
+
+static int
+_vbdev_lvol_change_bdev_alias(struct spdk_lvol *lvol, const char *new_lvol_name)
+{
+ struct spdk_bdev_alias *tmp;
+ char *old_alias;
+ char *alias;
+ int rc;
+ int alias_number = 0;
+
+ /* bdev representing lvols have only one alias,
+ * while we changed lvs name earlier, we have to iterate alias list to get one,
+ * and check if there is only one alias */
+
+ TAILQ_FOREACH(tmp, &lvol->bdev->aliases, tailq) {
+ if (++alias_number > 1) {
+ SPDK_ERRLOG("There is more than 1 alias in bdev %s\n", lvol->bdev->name);
+ return -EINVAL;
+ }
+
+ old_alias = tmp->alias;
+ }
+
+ if (alias_number == 0) {
+ SPDK_ERRLOG("There are no aliases in bdev %s\n", lvol->bdev->name);
+ return -EINVAL;
+ }
+
+ alias = spdk_sprintf_alloc("%s/%s", lvol->lvol_store->name, new_lvol_name);
+ if (alias == NULL) {
+ SPDK_ERRLOG("Cannot alloc memory for alias\n");
+ return -ENOMEM;
+ }
+
+ rc = spdk_bdev_alias_add(lvol->bdev, alias);
+ if (rc != 0) {
+ SPDK_ERRLOG("cannot add alias '%s'\n", alias);
+ free(alias);
+ return rc;
+ }
+ free(alias);
+
+ rc = spdk_bdev_alias_del(lvol->bdev, old_alias);
+ if (rc != 0) {
+ SPDK_ERRLOG("cannot remove alias '%s'\n", old_alias);
+ return rc;
+ }
+
+ return 0;
+}
+
+static struct lvol_store_bdev *
+vbdev_get_lvs_bdev_by_bdev(struct spdk_bdev *bdev_orig)
+{
+ struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+ while (lvs_bdev != NULL) {
+ if (lvs_bdev->bdev == bdev_orig) {
+ if (lvs_bdev->req != NULL) {
+ /* We do not allow access to lvs that are being destroyed */
+ return NULL;
+ } else {
+ return lvs_bdev;
+ }
+ }
+ lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+ }
+
+ return NULL;
+}
+
+static void
+vbdev_lvs_hotremove_cb(void *ctx)
+{
+ struct spdk_bdev *bdev = ctx;
+ struct lvol_store_bdev *lvs_bdev;
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_bdev(bdev);
+ if (lvs_bdev != NULL) {
+ vbdev_lvs_unload(lvs_bdev->lvs, NULL, NULL);
+ }
+}
+
+static void
+_vbdev_lvs_create_cb(void *cb_arg, struct spdk_lvol_store *lvs, int lvserrno)
+{
+ struct spdk_lvs_with_handle_req *req = cb_arg;
+ struct lvol_store_bdev *lvs_bdev;
+ struct spdk_bdev *bdev = req->base_bdev;
+ struct spdk_bs_dev *bs_dev = req->bs_dev;
+
+ if (lvserrno != 0) {
+ assert(lvs == NULL);
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Cannot create lvol store bdev\n");
+ goto end;
+ }
+
+ lvserrno = spdk_bs_bdev_claim(bs_dev, &g_lvol_if);
+ if (lvserrno != 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store base bdev already claimed by another bdev\n");
+ req->bs_dev->destroy(req->bs_dev);
+ goto end;
+ }
+
+ assert(lvs != NULL);
+
+ lvs_bdev = calloc(1, sizeof(*lvs_bdev));
+ if (!lvs_bdev) {
+ lvserrno = -ENOMEM;
+ goto end;
+ }
+ lvs_bdev->lvs = lvs;
+ lvs_bdev->bdev = bdev;
+ lvs_bdev->req = NULL;
+
+ TAILQ_INSERT_TAIL(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store bdev inserted\n");
+
+end:
+ req->cb_fn(req->cb_arg, lvs, lvserrno);
+ free(req);
+
+ return;
+}
+
+int
+vbdev_lvs_create(struct spdk_bdev *base_bdev, const char *name, uint32_t cluster_sz,
+ spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg)
+{
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_lvs_with_handle_req *lvs_req;
+ struct spdk_lvs_opts opts;
+ int rc;
+ int len;
+
+ if (base_bdev == NULL) {
+ SPDK_ERRLOG("Bdev does not exist\n");
+ return -ENODEV;
+ }
+
+ spdk_lvs_opts_init(&opts);
+ if (cluster_sz != 0) {
+ opts.cluster_sz = cluster_sz;
+ }
+
+ if (name == NULL) {
+ SPDK_ERRLOG("missing name param\n");
+ return -EINVAL;
+ }
+
+ len = strnlen(name, SPDK_LVS_NAME_MAX);
+
+ if (len == 0 || len == SPDK_LVS_NAME_MAX) {
+ SPDK_ERRLOG("name must be between 1 and %d characters\n", SPDK_LVS_NAME_MAX - 1);
+ return -EINVAL;
+ }
+ snprintf(opts.name, sizeof(opts.name), "%s", name);
+
+ lvs_req = calloc(1, sizeof(*lvs_req));
+ if (!lvs_req) {
+ SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+ return -ENOMEM;
+ }
+
+ bs_dev = spdk_bdev_create_bs_dev(base_bdev, vbdev_lvs_hotremove_cb, base_bdev);
+ if (!bs_dev) {
+ SPDK_ERRLOG("Cannot create blobstore device\n");
+ free(lvs_req);
+ return -ENODEV;
+ }
+
+ lvs_req->bs_dev = bs_dev;
+ lvs_req->base_bdev = base_bdev;
+ lvs_req->cb_fn = cb_fn;
+ lvs_req->cb_arg = cb_arg;
+
+ rc = spdk_lvs_init(bs_dev, &opts, _vbdev_lvs_create_cb, lvs_req);
+ if (rc < 0) {
+ free(lvs_req);
+ bs_dev->destroy(bs_dev);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void
+_vbdev_lvs_rename_cb(void *cb_arg, int lvserrno)
+{
+ struct spdk_lvs_req *req = cb_arg;
+ struct spdk_lvol *tmp;
+
+ if (lvserrno != 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store rename failed\n");
+ } else {
+ TAILQ_FOREACH(tmp, &req->lvol_store->lvols, link) {
+ /* We have to pass current lvol name, since only lvs name changed */
+ _vbdev_lvol_change_bdev_alias(tmp, tmp->name);
+ }
+ }
+
+ req->cb_fn(req->cb_arg, lvserrno);
+ free(req);
+}
+
+void
+vbdev_lvs_rename(struct spdk_lvol_store *lvs, const char *new_lvs_name,
+ spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+ struct lvol_store_bdev *lvs_bdev;
+
+ struct spdk_lvs_req *req;
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+ if (!lvs_bdev) {
+ SPDK_ERRLOG("No such lvol store found\n");
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+ req->lvol_store = lvs;
+
+ spdk_lvs_rename(lvs, new_lvs_name, _vbdev_lvs_rename_cb, req);
+}
+
+static void
+_vbdev_lvs_remove_cb(void *cb_arg, int lvserrno)
+{
+ struct lvol_store_bdev *lvs_bdev = cb_arg;
+ struct spdk_lvs_req *req = lvs_bdev->req;
+
+ if (lvserrno != 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Could not remove lvol store bdev\n");
+ } else {
+ TAILQ_REMOVE(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+ free(lvs_bdev);
+ }
+
+ if (req->cb_fn != NULL) {
+ req->cb_fn(req->cb_arg, lvserrno);
+ }
+ free(req);
+}
+
+static void
+_vbdev_lvs_remove_lvol_cb(void *cb_arg, int lvolerrno)
+{
+ struct lvol_store_bdev *lvs_bdev = cb_arg;
+ struct spdk_lvol_store *lvs = lvs_bdev->lvs;
+ struct spdk_lvol *lvol;
+
+ if (lvolerrno != 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_LVOL, "Lvol removed with errno %d\n", lvolerrno);
+ }
+
+ if (TAILQ_EMPTY(&lvs->lvols)) {
+ spdk_lvs_destroy(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+ return;
+ }
+
+ lvol = TAILQ_FIRST(&lvs->lvols);
+ while (lvol != NULL) {
+ if (spdk_lvol_deletable(lvol)) {
+ vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
+ return;
+ }
+ lvol = TAILQ_NEXT(lvol, link);
+ }
+
+ /* If no lvol is deletable, that means there is circular dependency. */
+ SPDK_ERRLOG("Lvols left in lvs, but unable to delete.\n");
+ assert(false);
+}
+
+static void
+_vbdev_lvs_remove_bdev_unregistered_cb(void *cb_arg, int bdeverrno)
+{
+ struct lvol_store_bdev *lvs_bdev = cb_arg;
+ struct spdk_lvol_store *lvs = lvs_bdev->lvs;
+ struct spdk_lvol *lvol, *tmp;
+
+ if (bdeverrno != 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_LVOL, "Lvol unregistered with errno %d\n", bdeverrno);
+ }
+
+ TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+ if (lvol->ref_count != 0) {
+ /* An lvol is still open, don't unload whole lvol store. */
+ return;
+ }
+ }
+ spdk_lvs_unload(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+}
+
+static void
+_vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg,
+ bool destroy)
+{
+ struct spdk_lvs_req *req;
+ struct lvol_store_bdev *lvs_bdev;
+ struct spdk_lvol *lvol, *tmp;
+ bool all_lvols_closed = true;
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+ if (!lvs_bdev) {
+ SPDK_ERRLOG("No such lvol store found\n");
+ if (cb_fn != NULL) {
+ cb_fn(cb_arg, -ENODEV);
+ }
+ return;
+ }
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+ if (cb_fn != NULL) {
+ cb_fn(cb_arg, -ENOMEM);
+ }
+ return;
+ }
+
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+ lvs_bdev->req = req;
+
+ TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+ if (lvol->ref_count != 0) {
+ all_lvols_closed = false;
+ }
+ }
+
+ if (all_lvols_closed == true) {
+ if (destroy) {
+ spdk_lvs_destroy(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+ } else {
+ spdk_lvs_unload(lvs, _vbdev_lvs_remove_cb, lvs_bdev);
+ }
+ } else {
+ lvs->destruct = destroy;
+ if (destroy) {
+ _vbdev_lvs_remove_lvol_cb(lvs_bdev, 0);
+ } else {
+ TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
+ spdk_bdev_unregister(lvol->bdev, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
+ }
+ }
+ }
+}
+
+void
+vbdev_lvs_unload(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+ _vbdev_lvs_remove(lvs, cb_fn, cb_arg, false);
+}
+
+void
+vbdev_lvs_destruct(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg)
+{
+ _vbdev_lvs_remove(lvs, cb_fn, cb_arg, true);
+}
+
+struct lvol_store_bdev *
+vbdev_lvol_store_first(void)
+{
+ struct lvol_store_bdev *lvs_bdev;
+
+ lvs_bdev = TAILQ_FIRST(&g_spdk_lvol_pairs);
+ if (lvs_bdev) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Starting lvolstore iteration at %p\n", lvs_bdev->lvs);
+ }
+
+ return lvs_bdev;
+}
+
+struct lvol_store_bdev *
+vbdev_lvol_store_next(struct lvol_store_bdev *prev)
+{
+ struct lvol_store_bdev *lvs_bdev;
+
+ if (prev == NULL) {
+ SPDK_ERRLOG("prev argument cannot be NULL\n");
+ return NULL;
+ }
+
+ lvs_bdev = TAILQ_NEXT(prev, lvol_stores);
+ if (lvs_bdev) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Continuing lvolstore iteration at %p\n", lvs_bdev->lvs);
+ }
+
+ return lvs_bdev;
+}
+
+static struct spdk_lvol_store *
+_vbdev_get_lvol_store_by_uuid(const struct spdk_uuid *uuid)
+{
+ struct spdk_lvol_store *lvs = NULL;
+ struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+ while (lvs_bdev != NULL) {
+ lvs = lvs_bdev->lvs;
+ if (spdk_uuid_compare(&lvs->uuid, uuid) == 0) {
+ return lvs;
+ }
+ lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+ }
+ return NULL;
+}
+
+struct spdk_lvol_store *
+vbdev_get_lvol_store_by_uuid(const char *uuid_str)
+{
+ struct spdk_uuid uuid;
+
+ if (spdk_uuid_parse(&uuid, uuid_str)) {
+ return NULL;
+ }
+
+ return _vbdev_get_lvol_store_by_uuid(&uuid);
+}
+
+struct spdk_lvol_store *
+vbdev_get_lvol_store_by_name(const char *name)
+{
+ struct spdk_lvol_store *lvs = NULL;
+ struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first();
+
+ while (lvs_bdev != NULL) {
+ lvs = lvs_bdev->lvs;
+ if (strncmp(lvs->name, name, sizeof(lvs->name)) == 0) {
+ return lvs;
+ }
+ lvs_bdev = vbdev_lvol_store_next(lvs_bdev);
+ }
+ return NULL;
+}
+
+struct vbdev_lvol_destroy_ctx {
+ struct spdk_lvol *lvol;
+ spdk_lvol_op_complete cb_fn;
+ void *cb_arg;
+};
+
+static void
+_vbdev_lvol_unregister_cb(void *ctx, int lvolerrno)
+{
+ struct spdk_bdev *bdev = ctx;
+
+ spdk_bdev_destruct_done(bdev, lvolerrno);
+ free(bdev);
+}
+
+static int
+vbdev_lvol_unregister(void *ctx)
+{
+ struct spdk_lvol *lvol = ctx;
+
+ assert(lvol != NULL);
+
+ spdk_bdev_alias_del_all(lvol->bdev);
+ spdk_lvol_close(lvol, _vbdev_lvol_unregister_cb, lvol->bdev);
+
+ /* return 1 to indicate we have an operation that must finish asynchronously before the
+ * lvol is closed
+ */
+ return 1;
+}
+
+static void
+_vbdev_lvol_destroy_cb(void *cb_arg, int bdeverrno)
+{
+ struct vbdev_lvol_destroy_ctx *ctx = cb_arg;
+ struct spdk_lvol *lvol = ctx->lvol;
+
+ if (bdeverrno < 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Could not unregister bdev during lvol (%s) destroy\n",
+ lvol->unique_id);
+ ctx->cb_fn(ctx->cb_arg, bdeverrno);
+ free(ctx);
+ return;
+ }
+
+ spdk_lvol_destroy(lvol, ctx->cb_fn, ctx->cb_arg);
+ free(ctx);
+}
+
+void
+vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+ struct vbdev_lvol_destroy_ctx *ctx;
+
+ assert(lvol != NULL);
+ assert(cb_fn != NULL);
+
+ /* Check if it is possible to delete lvol */
+ if (spdk_lvol_deletable(lvol) == false) {
+ /* throw an error */
+ SPDK_ERRLOG("Cannot delete lvol\n");
+ cb_fn(cb_arg, -EPERM);
+ return;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ ctx->lvol = lvol;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_arg = cb_arg;
+
+ spdk_bdev_unregister(lvol->bdev, _vbdev_lvol_destroy_cb, ctx);
+}
+
+static char *
+vbdev_lvol_find_name(struct spdk_lvol *lvol, spdk_blob_id blob_id)
+{
+ struct spdk_lvol_store *lvs;
+ struct spdk_lvol *_lvol;
+
+ assert(lvol != NULL);
+
+ lvs = lvol->lvol_store;
+
+ assert(lvs);
+
+ TAILQ_FOREACH(_lvol, &lvs->lvols, link) {
+ if (_lvol->blob_id == blob_id) {
+ return _lvol->name;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+vbdev_lvol_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct spdk_lvol *lvol = ctx;
+ struct lvol_store_bdev *lvs_bdev;
+ struct spdk_bdev *bdev;
+ struct spdk_blob *blob;
+ char lvol_store_uuid[SPDK_UUID_STRING_LEN];
+ spdk_blob_id *ids = NULL;
+ size_t count, i;
+ char *name;
+ int rc = 0;
+
+ spdk_json_write_name(w, "lvol");
+ spdk_json_write_object_begin(w);
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
+ if (!lvs_bdev) {
+ SPDK_ERRLOG("No such lvol store found\n");
+ rc = -ENODEV;
+ goto end;
+ }
+
+ bdev = lvs_bdev->bdev;
+
+ spdk_uuid_fmt_lower(lvol_store_uuid, sizeof(lvol_store_uuid), &lvol->lvol_store->uuid);
+ spdk_json_write_name(w, "lvol_store_uuid");
+ spdk_json_write_string(w, lvol_store_uuid);
+
+ spdk_json_write_name(w, "base_bdev");
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+
+ blob = lvol->blob;
+
+ spdk_json_write_name(w, "thin_provision");
+ spdk_json_write_bool(w, spdk_blob_is_thin_provisioned(blob));
+
+ spdk_json_write_name(w, "snapshot");
+ spdk_json_write_bool(w, spdk_blob_is_snapshot(blob));
+
+ spdk_json_write_name(w, "clone");
+ spdk_json_write_bool(w, spdk_blob_is_clone(blob));
+
+ if (spdk_blob_is_clone(blob)) {
+ spdk_blob_id snapshotid = spdk_blob_get_parent_snapshot(lvol->lvol_store->blobstore, lvol->blob_id);
+ if (snapshotid != SPDK_BLOBID_INVALID) {
+ name = vbdev_lvol_find_name(lvol, snapshotid);
+ if (name != NULL) {
+ spdk_json_write_name(w, "base_snapshot");
+ spdk_json_write_string(w, name);
+ } else {
+ SPDK_ERRLOG("Cannot obtain snapshots name\n");
+ }
+ }
+ }
+
+ if (spdk_blob_is_snapshot(blob)) {
+ /* Take a number of clones */
+ rc = spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count);
+ if (rc == -ENOMEM && count > 0) {
+ ids = malloc(sizeof(spdk_blob_id) * count);
+ if (ids == NULL) {
+ SPDK_ERRLOG("Cannot allocate memory\n");
+ rc = -ENOMEM;
+ goto end;
+ }
+
+ rc = spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, ids, &count);
+ if (rc == 0) {
+ spdk_json_write_name(w, "clones");
+ spdk_json_write_array_begin(w);
+ for (i = 0; i < count; i++) {
+ name = vbdev_lvol_find_name(lvol, ids[i]);
+ if (name != NULL) {
+ spdk_json_write_string(w, name);
+ } else {
+ SPDK_ERRLOG("Cannot obtain clone name\n");
+ }
+
+ }
+ spdk_json_write_array_end(w);
+ }
+ free(ids);
+ }
+
+ }
+
+end:
+ spdk_json_write_object_end(w);
+
+ return rc;
+}
+
+static void
+vbdev_lvol_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* Nothing to dump as lvol configuration is saved on physical device. */
+}
+
+static struct spdk_io_channel *
+vbdev_lvol_get_io_channel(void *ctx)
+{
+ struct spdk_lvol *lvol = ctx;
+
+ return spdk_lvol_get_io_channel(lvol);
+}
+
+static bool
+vbdev_lvol_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct spdk_lvol *lvol = ctx;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return !spdk_blob_is_read_only(lvol->blob);
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_READ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void
+lvol_op_comp(void *cb_arg, int bserrno)
+{
+ struct lvol_task *task = cb_arg;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task);
+
+ if (bserrno != 0) {
+ if (bserrno == -ENOMEM) {
+ task->status = SPDK_BDEV_IO_STATUS_NOMEM;
+ } else {
+ task->status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+ }
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Vbdev processing callback on device %s with type %d\n",
+ bdev_io->bdev->name, bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, task->status);
+}
+
+static void
+lvol_unmap(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ uint64_t start_page, num_pages;
+ struct spdk_blob *blob = lvol->blob;
+ struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+ start_page = bdev_io->u.bdev.offset_blocks;
+ num_pages = bdev_io->u.bdev.num_blocks;
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+ "Vbdev doing unmap at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+ num_pages, bdev_io->bdev->name);
+ spdk_blob_io_unmap(blob, ch, start_page, num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_write_zeroes(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ uint64_t start_page, num_pages;
+ struct spdk_blob *blob = lvol->blob;
+ struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+ start_page = bdev_io->u.bdev.offset_blocks;
+ num_pages = bdev_io->u.bdev.num_blocks;
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+ "Vbdev doing write zeros at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+ num_pages, bdev_io->bdev->name);
+ spdk_blob_io_write_zeroes(blob, ch, start_page, num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_read(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ uint64_t start_page, num_pages;
+ struct spdk_lvol *lvol = bdev_io->bdev->ctxt;
+ struct spdk_blob *blob = lvol->blob;
+ struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+ start_page = bdev_io->u.bdev.offset_blocks;
+ num_pages = bdev_io->u.bdev.num_blocks;
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+ "Vbdev doing read at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+ num_pages, bdev_io->bdev->name);
+ spdk_blob_io_readv(blob, ch, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, start_page,
+ num_pages, lvol_op_comp, task);
+}
+
+static void
+lvol_write(struct spdk_lvol *lvol, struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ uint64_t start_page, num_pages;
+ struct spdk_blob *blob = lvol->blob;
+ struct lvol_task *task = (struct lvol_task *)bdev_io->driver_ctx;
+
+ start_page = bdev_io->u.bdev.offset_blocks;
+ num_pages = bdev_io->u.bdev.num_blocks;
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+ "Vbdev doing write at offset %" PRIu64 " using %" PRIu64 " pages on device %s\n", start_page,
+ num_pages, bdev_io->bdev->name);
+ spdk_blob_io_writev(blob, ch, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, start_page,
+ num_pages, lvol_op_comp, task);
+}
+
+static int
+lvol_reset(struct spdk_bdev_io *bdev_io)
+{
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+
+ return 0;
+}
+
+static void
+vbdev_lvol_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_lvol *lvol = bdev_io->bdev->ctxt;
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Vbdev request type %d submitted\n", bdev_io->type);
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, lvol_read,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ lvol_write(lvol, ch, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ lvol_reset(bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ lvol_unmap(lvol, ch, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ lvol_write_zeroes(lvol, ch, bdev_io);
+ break;
+ default:
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "lvol: unsupported I/O type %d\n", bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+ return;
+}
+
+static struct spdk_bdev_fn_table vbdev_lvol_fn_table = {
+ .destruct = vbdev_lvol_unregister,
+ .io_type_supported = vbdev_lvol_io_type_supported,
+ .submit_request = vbdev_lvol_submit_request,
+ .get_io_channel = vbdev_lvol_get_io_channel,
+ .dump_info_json = vbdev_lvol_dump_info_json,
+ .write_config_json = vbdev_lvol_write_config_json,
+};
+
+static void
+_spdk_lvol_destroy_cb(void *cb_arg, int bdeverrno)
+{
+}
+
+static void
+_create_lvol_disk_destroy_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_lvol *lvol = cb_arg;
+
+ if (bdeverrno < 0) {
+ SPDK_ERRLOG("Could not unregister bdev for lvol %s\n",
+ lvol->unique_id);
+ return;
+ }
+
+ spdk_lvol_destroy(lvol, _spdk_lvol_destroy_cb, NULL);
+}
+
+static void
+_create_lvol_disk_unload_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_lvol *lvol = cb_arg;
+
+ if (bdeverrno < 0) {
+ SPDK_ERRLOG("Could not unregister bdev for lvol %s\n",
+ lvol->unique_id);
+ return;
+ }
+
+ TAILQ_REMOVE(&lvol->lvol_store->lvols, lvol, link);
+ free(lvol->unique_id);
+ free(lvol);
+}
+
+static int
+_create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
+{
+ struct spdk_bdev *bdev;
+ struct lvol_store_bdev *lvs_bdev;
+ uint64_t total_size;
+ unsigned char *alias;
+ int rc;
+
+ if (!lvol->unique_id) {
+ return -EINVAL;
+ }
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
+ if (lvs_bdev == NULL) {
+ SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
+ return -ENODEV;
+ }
+
+ bdev = calloc(1, sizeof(struct spdk_bdev));
+ if (!bdev) {
+ SPDK_ERRLOG("Cannot alloc memory for lvol bdev\n");
+ return -ENOMEM;
+ }
+
+ bdev->name = lvol->unique_id;
+ bdev->product_name = "Logical Volume";
+ bdev->blocklen = spdk_bs_get_io_unit_size(lvol->lvol_store->blobstore);
+ total_size = spdk_blob_get_num_clusters(lvol->blob) *
+ spdk_bs_get_cluster_size(lvol->lvol_store->blobstore);
+ assert((total_size % bdev->blocklen) == 0);
+ bdev->blockcnt = total_size / bdev->blocklen;
+ bdev->uuid = lvol->uuid;
+ bdev->need_aligned_buffer = lvs_bdev->bdev->need_aligned_buffer;
+ bdev->split_on_optimal_io_boundary = true;
+ bdev->optimal_io_boundary = spdk_bs_get_cluster_size(lvol->lvol_store->blobstore) / bdev->blocklen;
+
+ bdev->ctxt = lvol;
+ bdev->fn_table = &vbdev_lvol_fn_table;
+ bdev->module = &g_lvol_if;
+
+ rc = spdk_vbdev_register(bdev, &lvs_bdev->bdev, 1);
+ if (rc) {
+ free(bdev);
+ return rc;
+ }
+ lvol->bdev = bdev;
+
+ alias = spdk_sprintf_alloc("%s/%s", lvs_bdev->lvs->name, lvol->name);
+ if (alias == NULL) {
+ SPDK_ERRLOG("Cannot alloc memory for alias\n");
+ spdk_bdev_unregister(lvol->bdev, (destroy ? _create_lvol_disk_destroy_cb :
+ _create_lvol_disk_unload_cb), lvol);
+ return -ENOMEM;
+ }
+
+ rc = spdk_bdev_alias_add(bdev, alias);
+ if (rc != 0) {
+ SPDK_ERRLOG("Cannot add alias to lvol bdev\n");
+ spdk_bdev_unregister(lvol->bdev, (destroy ? _create_lvol_disk_destroy_cb :
+ _create_lvol_disk_unload_cb), lvol);
+ }
+ free(alias);
+
+ return rc;
+}
+
+static void
+_vbdev_lvol_create_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+ struct spdk_lvol_with_handle_req *req = cb_arg;
+
+ if (lvolerrno < 0) {
+ goto end;
+ }
+
+ lvolerrno = _create_lvol_disk(lvol, true);
+
+end:
+ req->cb_fn(req->cb_arg, lvol, lvolerrno);
+ free(req);
+}
+
+int
+vbdev_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+ bool thin_provision, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+ struct spdk_lvol_with_handle_req *req;
+ int rc;
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ return -ENOMEM;
+ }
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+
+ rc = spdk_lvol_create(lvs, name, sz, thin_provision, _vbdev_lvol_create_cb, req);
+ if (rc != 0) {
+ free(req);
+ }
+
+ return rc;
+}
+
+void
+vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+ struct spdk_lvol_with_handle_req *req;
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ cb_fn(cb_arg, NULL, -ENOMEM);
+ return;
+ }
+
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+
+ spdk_lvol_create_snapshot(lvol, snapshot_name, _vbdev_lvol_create_cb, req);
+}
+
+void
+vbdev_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg)
+{
+ struct spdk_lvol_with_handle_req *req;
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ cb_fn(cb_arg, NULL, -ENOMEM);
+ return;
+ }
+
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+
+ spdk_lvol_create_clone(lvol, clone_name, _vbdev_lvol_create_cb, req);
+}
+
+static void
+_vbdev_lvol_rename_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_lvol_req *req = cb_arg;
+
+ if (lvolerrno != 0) {
+ SPDK_ERRLOG("Renaming lvol failed\n");
+ }
+
+ req->cb_fn(req->cb_arg, lvolerrno);
+ free(req);
+}
+
+void
+vbdev_lvol_rename(struct spdk_lvol *lvol, const char *new_lvol_name,
+ spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+ struct spdk_lvol_req *req;
+ int rc;
+
+ rc = _vbdev_lvol_change_bdev_alias(lvol, new_lvol_name);
+ if (rc != 0) {
+ SPDK_ERRLOG("renaming lvol to '%s' does not succeed\n", new_lvol_name);
+ cb_fn(cb_arg, rc);
+ return;
+ }
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+
+ spdk_lvol_rename(lvol, new_lvol_name, _vbdev_lvol_rename_cb, req);
+}
+
+static void
+_vbdev_lvol_resize_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_lvol_req *req = cb_arg;
+ struct spdk_lvol *lvol = req->lvol;
+ uint64_t total_size;
+
+ /* change bdev size */
+ if (lvolerrno != 0) {
+ SPDK_ERRLOG("CB function for bdev lvol %s receive error no: %d.\n", lvol->name, lvolerrno);
+ goto finish;
+ }
+
+ total_size = spdk_blob_get_num_clusters(lvol->blob) *
+ spdk_bs_get_cluster_size(lvol->lvol_store->blobstore);
+ assert((total_size % lvol->bdev->blocklen) == 0);
+
+ lvolerrno = spdk_bdev_notify_blockcnt_change(lvol->bdev, total_size / lvol->bdev->blocklen);
+ if (lvolerrno != 0) {
+ SPDK_ERRLOG("Could not change num blocks for bdev lvol %s with error no: %d.\n",
+ lvol->name, lvolerrno);
+ }
+
+finish:
+ req->cb_fn(req->cb_arg, lvolerrno);
+ free(req);
+}
+
+void
+vbdev_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn, void *cb_arg)
+{
+ struct spdk_lvol_req *req;
+
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ cb_fn(cb_arg, -EINVAL);
+ return;
+ }
+
+ assert(lvol->bdev != NULL);
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ cb_fn(cb_arg, -ENOMEM);
+ return;
+ }
+
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+ req->sz = sz;
+ req->lvol = lvol;
+
+ spdk_lvol_resize(req->lvol, req->sz, _vbdev_lvol_resize_cb, req);
+}
+
+static int
+vbdev_lvs_init(void)
+{
+ return 0;
+}
+
+static int
+vbdev_lvs_get_ctx_size(void)
+{
+ return sizeof(struct lvol_task);
+}
+
+static void
+_vbdev_lvs_examine_failed(void *cb_arg, int lvserrno)
+{
+ spdk_bdev_module_examine_done(&g_lvol_if);
+}
+
+static void
+_vbdev_lvol_examine_close_cb(struct spdk_lvol_store *lvs)
+{
+ if (lvs->lvols_opened >= lvs->lvol_count) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvols finished\n");
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ }
+}
+
+static void
+_vbdev_lvs_examine_finish(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+ struct spdk_lvol_store *lvs = cb_arg;
+
+ if (lvolerrno != 0) {
+ SPDK_ERRLOG("Error opening lvol %s\n", lvol->unique_id);
+ TAILQ_REMOVE(&lvs->lvols, lvol, link);
+ lvs->lvol_count--;
+ free(lvol->unique_id);
+ free(lvol);
+ goto end;
+ }
+
+ if (_create_lvol_disk(lvol, false)) {
+ SPDK_ERRLOG("Cannot create bdev for lvol %s\n", lvol->unique_id);
+ lvs->lvol_count--;
+ _vbdev_lvol_examine_close_cb(lvs);
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvol %s failed\n", lvol->unique_id);
+ return;
+ }
+
+ lvs->lvols_opened++;
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvol %s succeeded\n", lvol->unique_id);
+
+end:
+
+ if (lvs->lvols_opened >= lvs->lvol_count) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Opening lvols finished\n");
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ }
+}
+
+static void
+_vbdev_lvs_examine_cb(void *arg, struct spdk_lvol_store *lvol_store, int lvserrno)
+{
+ struct lvol_store_bdev *lvs_bdev;
+ struct spdk_lvs_with_handle_req *req = (struct spdk_lvs_with_handle_req *)arg;
+ struct spdk_lvol *lvol, *tmp;
+
+ if (lvserrno == -EEXIST) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL,
+ "Name for lvolstore on device %s conflicts with name for already loaded lvs\n",
+ req->base_bdev->name);
+ /* On error blobstore destroys bs_dev itself */
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ goto end;
+ } else if (lvserrno != 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store not found on %s\n", req->base_bdev->name);
+ /* On error blobstore destroys bs_dev itself */
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ goto end;
+ }
+
+ lvserrno = spdk_bs_bdev_claim(lvol_store->bs_dev, &g_lvol_if);
+ if (lvserrno != 0) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store base bdev already claimed by another bdev\n");
+ spdk_lvs_unload(lvol_store, _vbdev_lvs_examine_failed, NULL);
+ goto end;
+ }
+
+ lvs_bdev = calloc(1, sizeof(*lvs_bdev));
+ if (!lvs_bdev) {
+ SPDK_ERRLOG("Cannot alloc memory for lvs_bdev\n");
+ spdk_lvs_unload(lvol_store, _vbdev_lvs_examine_failed, NULL);
+ goto end;
+ }
+
+ lvs_bdev->lvs = lvol_store;
+ lvs_bdev->bdev = req->base_bdev;
+
+ TAILQ_INSERT_TAIL(&g_spdk_lvol_pairs, lvs_bdev, lvol_stores);
+
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store found on %s - begin parsing\n",
+ req->base_bdev->name);
+
+ lvol_store->lvols_opened = 0;
+
+ if (TAILQ_EMPTY(&lvol_store->lvols)) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Lvol store examination done\n");
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ } else {
+ /* Open all lvols */
+ TAILQ_FOREACH_SAFE(lvol, &lvol_store->lvols, link, tmp) {
+ spdk_lvol_open(lvol, _vbdev_lvs_examine_finish, lvol_store);
+ }
+ }
+
+end:
+ free(req);
+}
+
+static void
+vbdev_lvs_examine(struct spdk_bdev *bdev)
+{
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_lvs_with_handle_req *req;
+
+ req = calloc(1, sizeof(*req));
+ if (req == NULL) {
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ SPDK_ERRLOG("Cannot alloc memory for vbdev lvol store request pointer\n");
+ return;
+ }
+
+ bs_dev = spdk_bdev_create_bs_dev(bdev, vbdev_lvs_hotremove_cb, bdev);
+ if (!bs_dev) {
+ SPDK_INFOLOG(SPDK_LOG_VBDEV_LVOL, "Cannot create bs dev on %s\n", bdev->name);
+ spdk_bdev_module_examine_done(&g_lvol_if);
+ free(req);
+ return;
+ }
+
+ req->base_bdev = bdev;
+
+ spdk_lvs_load(bs_dev, _vbdev_lvs_examine_cb, req);
+}
+
+struct spdk_lvol *
+vbdev_lvol_get_from_bdev(struct spdk_bdev *bdev)
+{
+ if (!bdev || bdev->module != &g_lvol_if) {
+ return NULL;
+ }
+
+ if (bdev->ctxt == NULL) {
+ SPDK_ERRLOG("No lvol ctx assigned to bdev %s\n", bdev->name);
+ return NULL;
+ }
+
+ return (struct spdk_lvol *)bdev->ctxt;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_lvol", SPDK_LOG_VBDEV_LVOL);
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol.h b/src/spdk/lib/bdev/lvol/vbdev_lvol.h
new file mode 100644
index 00000000..93991d08
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol.h
@@ -0,0 +1,120 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_LVOL_H
+#define SPDK_VBDEV_LVOL_H
+
+#include "spdk/lvol.h"
+#include "spdk/bdev_module.h"
+
+#include "spdk_internal/lvolstore.h"
+
+struct lvol_store_bdev {
+ struct spdk_lvol_store *lvs;
+ struct spdk_bdev *bdev;
+ struct spdk_lvs_req *req;
+
+ TAILQ_ENTRY(lvol_store_bdev) lvol_stores;
+};
+
+int vbdev_lvs_create(struct spdk_bdev *base_bdev, const char *name, uint32_t cluster_sz,
+ spdk_lvs_op_with_handle_complete cb_fn, void *cb_arg);
+void vbdev_lvs_destruct(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg);
+void vbdev_lvs_unload(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+int vbdev_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz,
+ bool thin_provisioned, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+void vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+void vbdev_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
+ spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Change size of lvol
+ * \param lvol Handle to lvol
+ * \param sz Size of lvol to change
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ * \return error
+ */
+void vbdev_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn,
+ void *cb_arg);
+
+void vbdev_lvol_rename(struct spdk_lvol *lvol, const char *new_lvol_name,
+ spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * Destroy a logical volume
+ * \param lvol Handle to lvol
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Renames given lvolstore.
+ *
+ * \param lvs Pointer to lvolstore
+ * \param new_name New name of lvs
+ * \param cb_fn Completion callback
+ * \param cb_arg Completion callback custom arguments
+ */
+void vbdev_lvs_rename(struct spdk_lvol_store *lvs, const char *new_lvs_name,
+ spdk_lvs_op_complete cb_fn, void *cb_arg);
+
+/**
+ * \brief Search for handle lvolstore
+ * \param uuid_str UUID of lvolstore
+ * \return Handle to spdk_lvol_store or NULL if not found.
+ */
+struct spdk_lvol_store *vbdev_get_lvol_store_by_uuid(const char *uuid_str);
+
+/**
+ * \brief Search for handle to lvolstore
+ * \param name name of lvolstore
+ * \return Handle to spdk_lvol_store or NULL if not found.
+ */
+struct spdk_lvol_store *vbdev_get_lvol_store_by_name(const char *name);
+
+/**
+ * \brief Search for handle to lvol_store_bdev
+ * \param lvs handle to lvolstore
+ * \return Handle to lvol_store_bdev or NULL if not found.
+ */
+struct lvol_store_bdev *vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs);
+
+struct spdk_lvol *vbdev_lvol_get_from_bdev(struct spdk_bdev *bdev);
+
+#endif /* SPDK_VBDEV_LVOL_H */
diff --git a/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c b/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c
new file mode 100644
index 00000000..30f67f35
--- /dev/null
+++ b/src/spdk/lib/bdev/lvol/vbdev_lvol_rpc.c
@@ -0,0 +1,1089 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/bdev.h"
+#include "spdk/util.h"
+#include "vbdev_lvol.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+SPDK_LOG_REGISTER_COMPONENT("lvolrpc", SPDK_LOG_LVOL_RPC)
+
+struct rpc_construct_lvol_store {
+ char *lvs_name;
+ char *bdev_name;
+ uint32_t cluster_sz;
+};
+
+static int
+vbdev_get_lvol_store_by_uuid_xor_name(const char *uuid, const char *lvs_name,
+ struct spdk_lvol_store **lvs)
+{
+ if ((uuid == NULL && lvs_name == NULL)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "lvs UUID nor lvs name specified\n");
+ return -EINVAL;
+ } else if ((uuid && lvs_name)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "both lvs UUID '%s' and lvs name '%s' specified\n", uuid,
+ lvs_name);
+ return -EINVAL;
+ } else if (uuid) {
+ *lvs = vbdev_get_lvol_store_by_uuid(uuid);
+
+ if (*lvs == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "blobstore with UUID '%s' not found\n", uuid);
+ return -ENODEV;
+ }
+ } else if (lvs_name) {
+
+ *lvs = vbdev_get_lvol_store_by_name(lvs_name);
+
+ if (*lvs == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "blobstore with name '%s' not found\n", lvs_name);
+ return -ENODEV;
+ }
+ }
+ return 0;
+}
+
+static void
+free_rpc_construct_lvol_store(struct rpc_construct_lvol_store *req)
+{
+ free(req->bdev_name);
+ free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_lvol_store_decoders[] = {
+ {"bdev_name", offsetof(struct rpc_construct_lvol_store, bdev_name), spdk_json_decode_string},
+ {"cluster_sz", offsetof(struct rpc_construct_lvol_store, cluster_sz), spdk_json_decode_uint32, true},
+ {"lvs_name", offsetof(struct rpc_construct_lvol_store, lvs_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_lvol_store_construct_cb(void *cb_arg, struct spdk_lvol_store *lvol_store, int lvserrno)
+{
+ struct spdk_json_write_ctx *w;
+ char lvol_store_uuid[SPDK_UUID_STRING_LEN];
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvserrno != 0) {
+ goto invalid;
+ }
+
+ spdk_uuid_fmt_lower(lvol_store_uuid, sizeof(lvol_store_uuid), &lvol_store->uuid);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, lvol_store_uuid);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_construct_lvol_store(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_lvol_store req = {};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_lvol_store_decoders,
+ SPDK_COUNTOF(rpc_construct_lvol_store_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ if (req.bdev_name == NULL) {
+ SPDK_ERRLOG("missing bdev_name param\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ if (req.lvs_name == NULL) {
+ SPDK_ERRLOG("missing lvs_name param\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+ bdev = spdk_bdev_get_by_name(req.bdev_name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.bdev_name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ rc = vbdev_lvs_create(bdev, req.lvs_name, req.cluster_sz, _spdk_rpc_lvol_store_construct_cb,
+ request);
+ if (rc < 0) {
+ goto invalid;
+ }
+ free_rpc_construct_lvol_store(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_construct_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("construct_lvol_store", spdk_rpc_construct_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_rename_lvol_store {
+ char *old_name;
+ char *new_name;
+};
+
+static void
+free_rpc_rename_lvol_store(struct rpc_rename_lvol_store *req)
+{
+ free(req->old_name);
+ free(req->new_name);
+}
+
+static const struct spdk_json_object_decoder rpc_rename_lvol_store_decoders[] = {
+ {"old_name", offsetof(struct rpc_rename_lvol_store, old_name), spdk_json_decode_string},
+ {"new_name", offsetof(struct rpc_rename_lvol_store, new_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_rename_lvol_store_cb(void *cb_arg, int lvserrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvserrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_rename_lvol_store(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_rename_lvol_store req = {};
+ struct spdk_lvol_store *lvs;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_rename_lvol_store_decoders,
+ SPDK_COUNTOF(rpc_rename_lvol_store_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ lvs = vbdev_get_lvol_store_by_name(req.old_name);
+ if (lvs == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "no lvs existing for given name\n");
+ rc = -ENOENT;
+ goto invalid;
+ }
+
+ vbdev_lvs_rename(lvs, req.new_name, _spdk_rpc_rename_lvol_store_cb, request);
+
+ free_rpc_rename_lvol_store(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_rename_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("rename_lvol_store", spdk_rpc_rename_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_destroy_lvol_store {
+ char *uuid;
+ char *lvs_name;
+};
+
+static void
+free_rpc_destroy_lvol_store(struct rpc_destroy_lvol_store *req)
+{
+ free(req->uuid);
+ free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_destroy_lvol_store_decoders[] = {
+ {"uuid", offsetof(struct rpc_destroy_lvol_store, uuid), spdk_json_decode_string, true},
+ {"lvs_name", offsetof(struct rpc_destroy_lvol_store, lvs_name), spdk_json_decode_string, true},
+};
+
+static void
+_spdk_rpc_lvol_store_destroy_cb(void *cb_arg, int lvserrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvserrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvserrno));
+}
+
+static void
+spdk_rpc_destroy_lvol_store(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_destroy_lvol_store req = {};
+ struct spdk_lvol_store *lvs = NULL;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_destroy_lvol_store_decoders,
+ SPDK_COUNTOF(rpc_destroy_lvol_store_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ vbdev_lvs_destruct(lvs, _spdk_rpc_lvol_store_destroy_cb, request);
+
+ free_rpc_destroy_lvol_store(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_destroy_lvol_store(&req);
+}
+SPDK_RPC_REGISTER("destroy_lvol_store", spdk_rpc_destroy_lvol_store, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_lvol_bdev {
+ char *uuid;
+ char *lvs_name;
+ char *lvol_name;
+ uint64_t size;
+ bool thin_provision;
+};
+
+static void
+free_rpc_construct_lvol_bdev(struct rpc_construct_lvol_bdev *req)
+{
+ free(req->uuid);
+ free(req->lvs_name);
+ free(req->lvol_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_lvol_bdev_decoders[] = {
+ {"uuid", offsetof(struct rpc_construct_lvol_bdev, uuid), spdk_json_decode_string, true},
+ {"lvs_name", offsetof(struct rpc_construct_lvol_bdev, lvs_name), spdk_json_decode_string, true},
+ {"lvol_name", offsetof(struct rpc_construct_lvol_bdev, lvol_name), spdk_json_decode_string, true},
+ {"size", offsetof(struct rpc_construct_lvol_bdev, size), spdk_json_decode_uint64},
+ {"thin_provision", offsetof(struct rpc_construct_lvol_bdev, thin_provision), spdk_json_decode_bool, true},
+};
+
+static void
+_spdk_rpc_construct_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, lvol->unique_id);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_construct_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_lvol_bdev req = {};
+ int rc;
+ struct spdk_lvol_store *lvs = NULL;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Creating blob\n");
+
+ if (spdk_json_decode_object(params, rpc_construct_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_construct_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ if (req.lvol_name == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "no bdev name\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = vbdev_lvol_create(lvs, req.lvol_name, req.size, req.thin_provision,
+ _spdk_rpc_construct_lvol_bdev_cb, request);
+ if (rc < 0) {
+ goto invalid;
+ }
+
+ free_rpc_construct_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_construct_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("construct_lvol_bdev", spdk_rpc_construct_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_snapshot_lvol_bdev {
+ char *lvol_name;
+ char *snapshot_name;
+};
+
+static void
+free_rpc_snapshot_lvol_bdev(struct rpc_snapshot_lvol_bdev *req)
+{
+ free(req->lvol_name);
+ free(req->snapshot_name);
+}
+
+static const struct spdk_json_object_decoder rpc_snapshot_lvol_bdev_decoders[] = {
+ {"lvol_name", offsetof(struct rpc_snapshot_lvol_bdev, lvol_name), spdk_json_decode_string},
+ {"snapshot_name", offsetof(struct rpc_snapshot_lvol_bdev, snapshot_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_snapshot_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, lvol->unique_id);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_snapshot_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_snapshot_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Snapshotting blob\n");
+
+ if (spdk_json_decode_object(params, rpc_snapshot_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_snapshot_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.lvol_name);
+ if (bdev == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "bdev '%s' does not exist\n", req.lvol_name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ vbdev_lvol_create_snapshot(lvol, req.snapshot_name, _spdk_rpc_snapshot_lvol_bdev_cb, request);
+
+ free_rpc_snapshot_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_snapshot_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("snapshot_lvol_bdev", spdk_rpc_snapshot_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_clone_lvol_bdev {
+ char *snapshot_name;
+ char *clone_name;
+};
+
+static void
+free_rpc_clone_lvol_bdev(struct rpc_clone_lvol_bdev *req)
+{
+ free(req->snapshot_name);
+ free(req->clone_name);
+}
+
+static const struct spdk_json_object_decoder rpc_clone_lvol_bdev_decoders[] = {
+ {"snapshot_name", offsetof(struct rpc_clone_lvol_bdev, snapshot_name), spdk_json_decode_string},
+ {"clone_name", offsetof(struct rpc_clone_lvol_bdev, clone_name), spdk_json_decode_string, true},
+};
+
+static void
+_spdk_rpc_clone_lvol_bdev_cb(void *cb_arg, struct spdk_lvol *lvol, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, lvol->unique_id);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_clone_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_clone_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Cloning blob\n");
+
+ if (spdk_json_decode_object(params, rpc_clone_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_clone_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.snapshot_name);
+ if (bdev == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "bdev '%s' does not exist\n", req.snapshot_name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ vbdev_lvol_create_clone(lvol, req.clone_name, _spdk_rpc_clone_lvol_bdev_cb, request);
+
+ free_rpc_clone_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_clone_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("clone_lvol_bdev", spdk_rpc_clone_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_rename_lvol_bdev {
+ char *old_name;
+ char *new_name;
+};
+
+static void
+free_rpc_rename_lvol_bdev(struct rpc_rename_lvol_bdev *req)
+{
+ free(req->old_name);
+ free(req->new_name);
+}
+
+static const struct spdk_json_object_decoder rpc_rename_lvol_bdev_decoders[] = {
+ {"old_name", offsetof(struct rpc_rename_lvol_bdev, old_name), spdk_json_decode_string},
+ {"new_name", offsetof(struct rpc_rename_lvol_bdev, new_name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_rename_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_rename_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_rename_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc = 0;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Renaming lvol\n");
+
+ if (spdk_json_decode_object(params, rpc_rename_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_rename_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.old_name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.old_name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ vbdev_lvol_rename(lvol, req.new_name, _spdk_rpc_rename_lvol_bdev_cb, request);
+
+ free_rpc_rename_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_rename_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("rename_lvol_bdev", spdk_rpc_rename_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_inflate_lvol_bdev {
+ char *name;
+};
+
+static void
+free_rpc_inflate_lvol_bdev(struct rpc_inflate_lvol_bdev *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_inflate_lvol_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_inflate_lvol_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_inflate_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_inflate_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_inflate_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc = 0;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Inflating lvol\n");
+
+ if (spdk_json_decode_object(params, rpc_inflate_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_inflate_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ spdk_lvol_inflate(lvol, _spdk_rpc_inflate_lvol_bdev_cb, request);
+
+ free_rpc_inflate_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_inflate_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("inflate_lvol_bdev", spdk_rpc_inflate_lvol_bdev, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_decouple_parent_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_inflate_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc = 0;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Decoupling parent of lvol\n");
+
+ if (spdk_json_decode_object(params, rpc_inflate_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_inflate_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ SPDK_ERRLOG("lvol does not exist\n");
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ spdk_lvol_decouple_parent(lvol, _spdk_rpc_inflate_lvol_bdev_cb, request);
+
+ free_rpc_inflate_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ free_rpc_inflate_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("decouple_parent_lvol_bdev", spdk_rpc_decouple_parent_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_resize_lvol_bdev {
+ char *name;
+ uint64_t size;
+};
+
+static void
+free_rpc_resize_lvol_bdev(struct rpc_resize_lvol_bdev *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_resize_lvol_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_resize_lvol_bdev, name), spdk_json_decode_string},
+ {"size", offsetof(struct rpc_resize_lvol_bdev, size), spdk_json_decode_uint64},
+};
+
+static void
+_spdk_rpc_resize_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_resize_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_resize_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc = 0;
+
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "Resizing lvol\n");
+
+ if (spdk_json_decode_object(params, rpc_resize_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_resize_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ if (req.name == NULL) {
+ SPDK_ERRLOG("missing name param\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("no bdev for provided name %s\n", req.name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ vbdev_lvol_resize(lvol, req.size, _spdk_rpc_resize_lvol_bdev_cb, request);
+
+ free_rpc_resize_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_resize_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("resize_lvol_bdev", spdk_rpc_resize_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_destroy_lvol_bdev {
+ char *name;
+};
+
+static void
+free_rpc_destroy_lvol_bdev(struct rpc_destroy_lvol_bdev *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_destroy_lvol_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_destroy_lvol_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_destroy_lvol_bdev_cb(void *cb_arg, int lvolerrno)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = cb_arg;
+
+ if (lvolerrno != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ spdk_strerror(-lvolerrno));
+}
+
+static void
+spdk_rpc_destroy_lvol_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_destroy_lvol_bdev req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_lvol *lvol;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_destroy_lvol_bdev_decoders,
+ SPDK_COUNTOF(rpc_destroy_lvol_bdev_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("no bdev for provided name %s\n", req.name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ lvol = vbdev_lvol_get_from_bdev(bdev);
+ if (lvol == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ vbdev_lvol_destroy(lvol, _spdk_rpc_destroy_lvol_bdev_cb, request);
+
+ free_rpc_destroy_lvol_bdev(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_destroy_lvol_bdev(&req);
+}
+
+SPDK_RPC_REGISTER("destroy_lvol_bdev", spdk_rpc_destroy_lvol_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_get_lvol_stores {
+ char *uuid;
+ char *lvs_name;
+};
+
+static void
+free_rpc_get_lvol_stores(struct rpc_get_lvol_stores *req)
+{
+ free(req->uuid);
+ free(req->lvs_name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_lvol_stores_decoders[] = {
+ {"uuid", offsetof(struct rpc_get_lvol_stores, uuid), spdk_json_decode_string, true},
+ {"lvs_name", offsetof(struct rpc_get_lvol_stores, lvs_name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_dump_lvol_store_info(struct spdk_json_write_ctx *w, struct lvol_store_bdev *lvs_bdev)
+{
+ struct spdk_blob_store *bs;
+ uint64_t cluster_size, block_size;
+ char uuid[SPDK_UUID_STRING_LEN];
+
+ bs = lvs_bdev->lvs->blobstore;
+ cluster_size = spdk_bs_get_cluster_size(bs);
+ /* Block size of lvols is always size of blob store page */
+ block_size = spdk_bs_get_page_size(bs);
+
+ spdk_json_write_object_begin(w);
+
+ spdk_uuid_fmt_lower(uuid, sizeof(uuid), &lvs_bdev->lvs->uuid);
+ spdk_json_write_name(w, "uuid");
+ spdk_json_write_string(w, uuid);
+
+ spdk_json_write_name(w, "name");
+ spdk_json_write_string(w, lvs_bdev->lvs->name);
+
+ spdk_json_write_name(w, "base_bdev");
+ spdk_json_write_string(w, spdk_bdev_get_name(lvs_bdev->bdev));
+
+ spdk_json_write_name(w, "total_data_clusters");
+ spdk_json_write_uint64(w, spdk_bs_total_data_cluster_count(bs));
+
+ spdk_json_write_name(w, "free_clusters");
+ spdk_json_write_uint64(w, spdk_bs_free_cluster_count(bs));
+
+ spdk_json_write_name(w, "block_size");
+ spdk_json_write_uint64(w, block_size);
+
+ spdk_json_write_name(w, "cluster_size");
+ spdk_json_write_uint64(w, cluster_size);
+
+ spdk_json_write_object_end(w);
+}
+
+static void
+spdk_rpc_get_lvol_stores(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_lvol_stores req = {};
+ struct spdk_json_write_ctx *w;
+ struct lvol_store_bdev *lvs_bdev = NULL;
+ struct spdk_lvol_store *lvs = NULL;
+ int rc;
+
+ if (params != NULL) {
+ if (spdk_json_decode_object(params, rpc_get_lvol_stores_decoders,
+ SPDK_COUNTOF(rpc_get_lvol_stores_decoders),
+ &req)) {
+ SPDK_INFOLOG(SPDK_LOG_LVOL_RPC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = vbdev_get_lvol_store_by_uuid_xor_name(req.uuid, req.lvs_name, &lvs);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvs);
+ if (lvs_bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_get_lvol_stores(&req);
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+
+ if (lvs_bdev != NULL) {
+ spdk_rpc_dump_lvol_store_info(w, lvs_bdev);
+ } else {
+ for (lvs_bdev = vbdev_lvol_store_first(); lvs_bdev != NULL;
+ lvs_bdev = vbdev_lvol_store_next(lvs_bdev)) {
+ spdk_rpc_dump_lvol_store_info(w, lvs_bdev);
+ }
+ }
+ spdk_json_write_array_end(w);
+
+ spdk_jsonrpc_end_result(request, w);
+
+ free_rpc_get_lvol_stores(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_get_lvol_stores(&req);
+}
+
+SPDK_RPC_REGISTER("get_lvol_stores", spdk_rpc_get_lvol_stores, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/malloc/Makefile b/src/spdk/lib/bdev/malloc/Makefile
new file mode 100644
index 00000000..f4eb9aaa
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/Makefile
@@ -0,0 +1,41 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_malloc.c bdev_malloc_rpc.c
+LIBNAME = bdev_malloc
+LOCAL_SYS_LIBS = -luuid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc.c b/src/spdk/lib/bdev/malloc/bdev_malloc.c
new file mode 100644
index 00000000..eb4b2b9c
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc.c
@@ -0,0 +1,524 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_malloc.h"
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/copy_engine.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/queue.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+struct malloc_disk {
+ struct spdk_bdev disk;
+ void *malloc_buf;
+ TAILQ_ENTRY(malloc_disk) link;
+};
+
+struct malloc_task {
+ int num_outstanding;
+ enum spdk_bdev_io_status status;
+};
+
+static struct malloc_task *
+__malloc_task_from_copy_task(struct spdk_copy_task *ct)
+{
+ return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task));
+}
+
+static struct spdk_copy_task *
+__copy_task_from_malloc_task(struct malloc_task *mt)
+{
+ return (struct spdk_copy_task *)((uintptr_t)mt + sizeof(struct malloc_task));
+}
+
+static void
+malloc_done(void *ref, int status)
+{
+ struct malloc_task *task = __malloc_task_from_copy_task(ref);
+
+ if (status != 0) {
+ if (status == -ENOMEM) {
+ task->status = SPDK_BDEV_IO_STATUS_NOMEM;
+ } else {
+ task->status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+ }
+
+ if (--task->num_outstanding == 0) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
+ }
+}
+
+static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks);
+
+int malloc_disk_count = 0;
+
+static int bdev_malloc_initialize(void);
+static void bdev_malloc_get_spdk_running_config(FILE *fp);
+
+static int
+bdev_malloc_get_ctx_size(void)
+{
+ return sizeof(struct malloc_task) + spdk_copy_task_size();
+}
+
+static struct spdk_bdev_module malloc_if = {
+ .name = "malloc",
+ .module_init = bdev_malloc_initialize,
+ .config_text = bdev_malloc_get_spdk_running_config,
+ .get_ctx_size = bdev_malloc_get_ctx_size,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&malloc_if)
+
+static void
+malloc_disk_free(struct malloc_disk *malloc_disk)
+{
+ if (!malloc_disk) {
+ return;
+ }
+
+ free(malloc_disk->disk.name);
+ spdk_dma_free(malloc_disk->malloc_buf);
+ spdk_dma_free(malloc_disk);
+}
+
+static int
+bdev_malloc_destruct(void *ctx)
+{
+ struct malloc_disk *malloc_disk = ctx;
+
+ TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link);
+ malloc_disk_free(malloc_disk);
+ return 0;
+}
+
+static int
+bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ if (nbytes < iovs[i].iov_len) {
+ return 0;
+ }
+
+ nbytes -= iovs[i].iov_len;
+ }
+
+ return nbytes != 0;
+}
+
+static void
+bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
+ struct malloc_task *task,
+ struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+ int64_t res = 0;
+ void *src = mdisk->malloc_buf + offset;
+ int i;
+
+ if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
+ SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n",
+ len, offset);
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ task->num_outstanding = iovcnt;
+
+ for (i = 0; i < iovcnt; i++) {
+ res = spdk_copy_submit(__copy_task_from_malloc_task(task),
+ ch, iov[i].iov_base,
+ src, iov[i].iov_len, malloc_done);
+
+ if (res != 0) {
+ malloc_done(__copy_task_from_malloc_task(task), res);
+ }
+
+ src += iov[i].iov_len;
+ len -= iov[i].iov_len;
+ }
+}
+
+static void
+bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
+ struct malloc_task *task,
+ struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
+{
+ int64_t res = 0;
+ void *dst = mdisk->malloc_buf + offset;
+ int i;
+
+ if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
+ SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n",
+ len, offset);
+
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ task->num_outstanding = iovcnt;
+
+ for (i = 0; i < iovcnt; i++) {
+ res = spdk_copy_submit(__copy_task_from_malloc_task(task),
+ ch, dst, iov[i].iov_base,
+ iov[i].iov_len, malloc_done);
+
+ if (res != 0) {
+ malloc_done(__copy_task_from_malloc_task(task), res);
+ }
+
+ dst += iov[i].iov_len;
+ }
+}
+
+static int
+bdev_malloc_unmap(struct malloc_disk *mdisk,
+ struct spdk_io_channel *ch,
+ struct malloc_task *task,
+ uint64_t offset,
+ uint64_t byte_count)
+{
+ task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ task->num_outstanding = 1;
+
+ return spdk_copy_submit_fill(__copy_task_from_malloc_task(task), ch,
+ mdisk->malloc_buf + offset, 0, byte_count, malloc_done);
+}
+
+static int64_t
+bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task,
+ uint64_t offset, uint64_t nbytes)
+{
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+ return 0;
+}
+
+static int
+bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task)
+{
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+ return 0;
+}
+
+static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ uint32_t block_size = bdev_io->bdev->blocklen;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+ assert(bdev_io->u.bdev.iovcnt == 1);
+ bdev_io->u.bdev.iovs[0].iov_base =
+ ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
+ bdev_io->u.bdev.offset_blocks * block_size;
+ bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size;
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx),
+ SPDK_BDEV_IO_STATUS_SUCCESS);
+ return 0;
+ }
+
+ bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct malloc_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * block_size,
+ bdev_io->u.bdev.offset_blocks * block_size);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct malloc_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * block_size,
+ bdev_io->u.bdev.offset_blocks * block_size);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt,
+ (struct malloc_task *)bdev_io->driver_ctx);
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt,
+ (struct malloc_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks * block_size,
+ bdev_io->u.bdev.num_blocks * block_size);
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct malloc_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks * block_size,
+ bdev_io->u.bdev.num_blocks * block_size);
+
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */
+ return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
+ ch,
+ (struct malloc_task *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks * block_size,
+ bdev_io->u.bdev.num_blocks * block_size);
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_malloc_submit_request(ch, bdev_io) != 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_malloc_get_io_channel(void *ctx)
+{
+ return spdk_copy_engine_get_io_channel();
+}
+
+static void
+bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ char uuid_str[SPDK_UUID_STRING_LEN];
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_malloc_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
+ spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table malloc_fn_table = {
+ .destruct = bdev_malloc_destruct,
+ .submit_request = bdev_malloc_submit_request,
+ .io_type_supported = bdev_malloc_io_type_supported,
+ .get_io_channel = bdev_malloc_get_io_channel,
+ .write_config_json = bdev_malloc_write_json_config,
+};
+
+struct spdk_bdev *create_malloc_disk(const char *name, const struct spdk_uuid *uuid,
+ uint64_t num_blocks, uint32_t block_size)
+{
+ struct malloc_disk *mdisk;
+ int rc;
+
+ if (num_blocks == 0) {
+ SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+ return NULL;
+ }
+
+ mdisk = spdk_dma_zmalloc(sizeof(*mdisk), 0, NULL);
+ if (!mdisk) {
+ SPDK_ERRLOG("mdisk spdk_dma_zmalloc() failed\n");
+ return NULL;
+ }
+
+ /*
+ * Allocate the large backend memory buffer from pinned memory.
+ *
+ * TODO: need to pass a hint so we know which socket to allocate
+ * from on multi-socket systems.
+ */
+ mdisk->malloc_buf = spdk_dma_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL);
+ if (!mdisk->malloc_buf) {
+ SPDK_ERRLOG("malloc_buf spdk_dma_zmalloc() failed\n");
+ malloc_disk_free(mdisk);
+ return NULL;
+ }
+
+ if (name) {
+ mdisk->disk.name = strdup(name);
+ } else {
+ /* Auto-generate a name */
+ mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count);
+ malloc_disk_count++;
+ }
+ if (!mdisk->disk.name) {
+ malloc_disk_free(mdisk);
+ return NULL;
+ }
+ mdisk->disk.product_name = "Malloc disk";
+
+ mdisk->disk.write_cache = 1;
+ mdisk->disk.blocklen = block_size;
+ mdisk->disk.blockcnt = num_blocks;
+ if (uuid) {
+ mdisk->disk.uuid = *uuid;
+ } else {
+ spdk_uuid_generate(&mdisk->disk.uuid);
+ }
+
+ mdisk->disk.ctxt = mdisk;
+ mdisk->disk.fn_table = &malloc_fn_table;
+ mdisk->disk.module = &malloc_if;
+
+ rc = spdk_bdev_register(&mdisk->disk);
+ if (rc) {
+ malloc_disk_free(mdisk);
+ return NULL;
+ }
+
+ TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link);
+
+ return &mdisk->disk;
+}
+
+void
+delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &malloc_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int bdev_malloc_initialize(void)
+{
+ struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
+ int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0;
+ uint64_t size;
+ struct spdk_bdev *bdev;
+
+ if (sp != NULL) {
+ NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
+ LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
+ BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
+ if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
+ SPDK_ERRLOG("Malloc section present, but no devices specified\n");
+ goto end;
+ }
+ if (BlockSize < 1) {
+ /* Default is 512 bytes */
+ BlockSize = 512;
+ }
+ size = (uint64_t)LunSizeInMB * 1024 * 1024;
+ for (i = 0; i < NumberOfLuns; i++) {
+ bdev = create_malloc_disk(NULL, NULL, size / BlockSize, BlockSize);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("Could not create malloc disk\n");
+ rc = EINVAL;
+ goto end;
+ }
+ }
+ }
+
+end:
+ return rc;
+}
+
+static void
+bdev_malloc_get_spdk_running_config(FILE *fp)
+{
+ int num_malloc_luns = 0;
+ uint64_t malloc_lun_size = 0;
+ struct malloc_disk *mdisk;
+
+ /* count number of malloc LUNs, get LUN size */
+ TAILQ_FOREACH(mdisk, &g_malloc_disks, link) {
+ if (0 == malloc_lun_size) {
+ /* assume all malloc luns the same size */
+ malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
+ malloc_lun_size /= (1024 * 1024);
+ }
+ num_malloc_luns++;
+ }
+
+ if (num_malloc_luns > 0) {
+ fprintf(fp,
+ "\n"
+ "# Users may change this section to create a different number or size of\n"
+ "# malloc LUNs.\n"
+ "# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n"
+ "# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
+ "# Not all LUNs defined here are necessarily used below.\n"
+ "[Malloc]\n"
+ " NumberOfLuns %d\n"
+ " LunSizeInMB %" PRIu64 "\n",
+ num_malloc_luns, malloc_lun_size,
+ num_malloc_luns - 1, num_malloc_luns,
+ malloc_lun_size);
+ }
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC)
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc.h b/src/spdk/lib/bdev/malloc/bdev_malloc.h
new file mode 100644
index 00000000..8ebdba78
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc.h
@@ -0,0 +1,48 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_MALLOC_H
+#define SPDK_BDEV_MALLOC_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_malloc_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev *create_malloc_disk(const char *name, const struct spdk_uuid *uuid,
+ uint64_t num_blocks, uint32_t block_size);
+
+void delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg);
+
+#endif /* SPDK_BDEV_MALLOC_H */
diff --git a/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c b/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c
new file mode 100644
index 00000000..4066cf2f
--- /dev/null
+++ b/src/spdk/lib/bdev/malloc/bdev_malloc_rpc.c
@@ -0,0 +1,170 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_malloc.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/uuid.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_malloc {
+ char *name;
+ char *uuid;
+ uint64_t num_blocks;
+ uint32_t block_size;
+};
+
+static void
+free_rpc_construct_malloc(struct rpc_construct_malloc *r)
+{
+ free(r->name);
+ free(r->uuid);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
+ {"name", offsetof(struct rpc_construct_malloc, name), spdk_json_decode_string, true},
+ {"uuid", offsetof(struct rpc_construct_malloc, uuid), spdk_json_decode_string, true},
+ {"num_blocks", offsetof(struct rpc_construct_malloc, num_blocks), spdk_json_decode_uint64},
+ {"block_size", offsetof(struct rpc_construct_malloc, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_malloc_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_malloc req = {NULL};
+ struct spdk_json_write_ctx *w;
+ struct spdk_uuid *uuid = NULL;
+ struct spdk_uuid decoded_uuid;
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_construct_malloc_decoders,
+ SPDK_COUNTOF(rpc_construct_malloc_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.uuid) {
+ if (spdk_uuid_parse(&decoded_uuid, req.uuid)) {
+ goto invalid;
+ }
+ uuid = &decoded_uuid;
+ }
+
+ bdev = create_malloc_disk(req.name, uuid, req.num_blocks, req.block_size);
+ if (bdev == NULL) {
+ goto invalid;
+ }
+
+ free_rpc_construct_malloc(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ free_rpc_construct_malloc(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_malloc_bdev", spdk_rpc_construct_malloc_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_malloc {
+ char *name;
+};
+
+static void
+free_rpc_delete_malloc(struct rpc_delete_malloc *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_malloc_decoders[] = {
+ {"name", offsetof(struct rpc_delete_malloc, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_malloc_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_malloc_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_malloc req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_malloc_decoders,
+ SPDK_COUNTOF(rpc_delete_malloc_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_BDEV_MALLOC, "bdev '%s' does not exist\n", req.name);
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_malloc_disk(bdev, _spdk_rpc_delete_malloc_bdev_cb, request);
+
+ free_rpc_delete_malloc(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_malloc(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_malloc_bdev", spdk_rpc_delete_malloc_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/null/Makefile b/src/spdk/lib/bdev/null/Makefile
new file mode 100644
index 00000000..24962e58
--- /dev/null
+++ b/src/spdk/lib/bdev/null/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_null.c bdev_null_rpc.c
+LIBNAME = bdev_null
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/null/bdev_null.c b/src/spdk/lib/bdev/null/bdev_null.c
new file mode 100644
index 00000000..9ff64725
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null.c
@@ -0,0 +1,384 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_null.h"
+
+struct null_bdev {
+ struct spdk_bdev bdev;
+ TAILQ_ENTRY(null_bdev) tailq;
+};
+
+struct null_io_channel {
+ struct spdk_poller *poller;
+ TAILQ_HEAD(, spdk_bdev_io) io;
+};
+
+static TAILQ_HEAD(, null_bdev) g_null_bdev_head;
+static void *g_null_read_buf;
+
+static int bdev_null_initialize(void);
+static void bdev_null_finish(void);
+static void bdev_null_get_spdk_running_config(FILE *fp);
+
+static struct spdk_bdev_module null_if = {
+ .name = "null",
+ .module_init = bdev_null_initialize,
+ .module_fini = bdev_null_finish,
+ .config_text = bdev_null_get_spdk_running_config,
+ .async_fini = true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&null_if)
+
+static int
+bdev_null_destruct(void *ctx)
+{
+ struct null_bdev *bdev = ctx;
+
+ TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq);
+ free(bdev->bdev.name);
+ spdk_dma_free(bdev);
+
+ return 0;
+}
+
+static void
+bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch);
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+ assert(bdev_io->u.bdev.iovcnt == 1);
+ bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf;
+ bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
+ }
+ TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ default:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ break;
+ }
+}
+
+static bool
+bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_null_get_io_channel(void *ctx)
+{
+ return spdk_get_io_channel(&g_null_bdev_head);
+}
+
+static void
+bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ char uuid_str[SPDK_UUID_STRING_LEN];
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_null_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
+ spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table null_fn_table = {
+ .destruct = bdev_null_destruct,
+ .submit_request = bdev_null_submit_request,
+ .io_type_supported = bdev_null_io_type_supported,
+ .get_io_channel = bdev_null_get_io_channel,
+ .write_config_json = bdev_null_write_config_json,
+};
+
+struct spdk_bdev *
+create_null_bdev(const char *name, const struct spdk_uuid *uuid,
+ uint64_t num_blocks, uint32_t block_size)
+{
+ struct null_bdev *bdev;
+ int rc;
+
+ if (block_size % 512 != 0) {
+ SPDK_ERRLOG("Block size %u is not a multiple of 512.\n", block_size);
+ return NULL;
+ }
+
+ if (num_blocks == 0) {
+ SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+ return NULL;
+ }
+
+ bdev = spdk_dma_zmalloc(sizeof(*bdev), 0, NULL);
+ if (!bdev) {
+ SPDK_ERRLOG("could not allocate null_bdev\n");
+ return NULL;
+ }
+
+ bdev->bdev.name = strdup(name);
+ if (!bdev->bdev.name) {
+ spdk_dma_free(bdev);
+ return NULL;
+ }
+ bdev->bdev.product_name = "Null disk";
+
+ bdev->bdev.write_cache = 0;
+ bdev->bdev.blocklen = block_size;
+ bdev->bdev.blockcnt = num_blocks;
+ if (uuid) {
+ bdev->bdev.uuid = *uuid;
+ } else {
+ spdk_uuid_generate(&bdev->bdev.uuid);
+ }
+
+ bdev->bdev.ctxt = bdev;
+ bdev->bdev.fn_table = &null_fn_table;
+ bdev->bdev.module = &null_if;
+
+ rc = spdk_bdev_register(&bdev->bdev);
+ if (rc) {
+ free(bdev->bdev.name);
+ spdk_dma_free(bdev);
+ return NULL;
+ }
+
+ TAILQ_INSERT_TAIL(&g_null_bdev_head, bdev, tailq);
+
+ return &bdev->bdev;
+}
+
+void
+delete_null_bdev(struct spdk_bdev *bdev, spdk_delete_null_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &null_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int
+null_io_poll(void *arg)
+{
+ struct null_io_channel *ch = arg;
+ TAILQ_HEAD(, spdk_bdev_io) io;
+ struct spdk_bdev_io *bdev_io;
+
+ TAILQ_INIT(&io);
+ TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link);
+
+ if (TAILQ_EMPTY(&io)) {
+ return 0;
+ }
+
+ while (!TAILQ_EMPTY(&io)) {
+ bdev_io = TAILQ_FIRST(&io);
+ TAILQ_REMOVE(&io, bdev_io, module_link);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ }
+
+ return 1;
+}
+
+static int
+null_bdev_create_cb(void *io_device, void *ctx_buf)
+{
+ struct null_io_channel *ch = ctx_buf;
+
+ TAILQ_INIT(&ch->io);
+ ch->poller = spdk_poller_register(null_io_poll, ch, 0);
+
+ return 0;
+}
+
+static void
+null_bdev_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct null_io_channel *ch = ctx_buf;
+
+ spdk_poller_unregister(&ch->poller);
+}
+
+static int
+bdev_null_initialize(void)
+{
+ struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Null");
+ uint64_t size_in_mb, num_blocks;
+ int block_size, i, rc = 0;
+ struct spdk_bdev *bdev;
+ const char *name, *val;
+
+ TAILQ_INIT(&g_null_bdev_head);
+
+ /*
+ * This will be used if upper layer expects us to allocate the read buffer.
+ * Instead of using a real rbuf from the bdev pool, just always point to
+ * this same zeroed buffer.
+ */
+ g_null_read_buf = spdk_dma_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL);
+
+ /*
+ * We need to pick some unique address as our "io device" - so just use the
+ * address of the global tailq.
+ */
+ spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb,
+ sizeof(struct null_io_channel),
+ "null_bdev");
+
+ if (sp == NULL) {
+ goto end;
+ }
+
+ i = 0;
+ while (true) {
+ val = spdk_conf_section_get_nval(sp, "Dev", i);
+ if (val == NULL) {
+ break;
+ }
+
+ name = spdk_conf_section_get_nmval(sp, "Dev", i, 0);
+ if (name == NULL) {
+ SPDK_ERRLOG("Null entry %d: Name must be provided\n", i);
+ continue;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "Dev", i, 1);
+ if (val == NULL) {
+ SPDK_ERRLOG("Null entry %d: Size in MB must be provided\n", i);
+ continue;
+ }
+
+ errno = 0;
+ size_in_mb = strtoull(val, NULL, 10);
+ if (errno) {
+ SPDK_ERRLOG("Null entry %d: Invalid size in MB %s\n", i, val);
+ continue;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "Dev", i, 2);
+ if (val == NULL) {
+ block_size = 512;
+ } else {
+ errno = 0;
+ block_size = (int)strtol(val, NULL, 10);
+ if (errno) {
+ SPDK_ERRLOG("Null entry %d: Invalid block size %s\n", i, val);
+ continue;
+ }
+ }
+
+ num_blocks = size_in_mb * (1024 * 1024) / block_size;
+
+ bdev = create_null_bdev(name, NULL, num_blocks, block_size);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("Could not create null bdev\n");
+ rc = EINVAL;
+ goto end;
+ }
+
+ i++;
+ }
+
+end:
+ return rc;
+}
+
+static void
+_bdev_null_finish_cb(void *arg)
+{
+ spdk_dma_free(g_null_read_buf);
+ spdk_bdev_module_finish_done();
+}
+
+static void
+bdev_null_finish(void)
+{
+ spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb);
+}
+
+static void
+bdev_null_get_spdk_running_config(FILE *fp)
+{
+ struct null_bdev *bdev;
+ uint64_t null_bdev_size;
+
+ fprintf(fp, "\n[Null]\n");
+
+ TAILQ_FOREACH(bdev, &g_null_bdev_head, tailq) {
+ null_bdev_size = bdev->bdev.blocklen * bdev->bdev.blockcnt;
+ null_bdev_size /= (1024 * 1024);
+ fprintf(fp, " %s %" PRIu64 " %d\n",
+ bdev->bdev.name, null_bdev_size, bdev->bdev.blocklen);
+ }
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_null", SPDK_LOG_BDEV_NULL)
diff --git a/src/spdk/lib/bdev/null/bdev_null.h b/src/spdk/lib/bdev/null/bdev_null.h
new file mode 100644
index 00000000..fa0123e3
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null.h
@@ -0,0 +1,57 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_NULL_H
+#define SPDK_BDEV_NULL_H
+
+#include "spdk/stdinc.h"
+
+typedef void (*spdk_delete_null_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev;
+struct spdk_uuid;
+
+struct spdk_bdev *create_null_bdev(const char *name, const struct spdk_uuid *uuid,
+ uint64_t num_blocks, uint32_t block_size);
+
+/**
+ * Delete null bdev.
+ *
+ * \param bdev Pointer to null bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_null_bdev(struct spdk_bdev *bdev, spdk_delete_null_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_BDEV_NULL_H */
diff --git a/src/spdk/lib/bdev/null/bdev_null_rpc.c b/src/spdk/lib/bdev/null/bdev_null_rpc.c
new file mode 100644
index 00000000..9410b7ad
--- /dev/null
+++ b/src/spdk/lib/bdev/null/bdev_null_rpc.c
@@ -0,0 +1,169 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_null.h"
+
+struct rpc_construct_null {
+ char *name;
+ char *uuid;
+ uint64_t num_blocks;
+ uint32_t block_size;
+};
+
+static void
+free_rpc_construct_null(struct rpc_construct_null *req)
+{
+ free(req->name);
+ free(req->uuid);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_null_decoders[] = {
+ {"name", offsetof(struct rpc_construct_null, name), spdk_json_decode_string},
+ {"uuid", offsetof(struct rpc_construct_null, uuid), spdk_json_decode_string, true},
+ {"num_blocks", offsetof(struct rpc_construct_null, num_blocks), spdk_json_decode_uint64},
+ {"block_size", offsetof(struct rpc_construct_null, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_null_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_null req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_uuid *uuid = NULL;
+ struct spdk_uuid decoded_uuid;
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_construct_null_decoders,
+ SPDK_COUNTOF(rpc_construct_null_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NULL, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.uuid) {
+ if (spdk_uuid_parse(&decoded_uuid, req.uuid)) {
+ goto invalid;
+ }
+ uuid = &decoded_uuid;
+ }
+
+ bdev = create_null_bdev(req.name, uuid, req.num_blocks, req.block_size);
+ if (bdev == NULL) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_null(&req);
+ return;
+ }
+
+ spdk_json_write_string(w, bdev->name);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_construct_null(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_construct_null(&req);
+}
+SPDK_RPC_REGISTER("construct_null_bdev", spdk_rpc_construct_null_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_null {
+ char *name;
+};
+
+static void
+free_rpc_delete_null(struct rpc_delete_null *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_null_decoders[] = {
+ {"name", offsetof(struct rpc_delete_null, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_null_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_null_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_null req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_null_decoders,
+ SPDK_COUNTOF(rpc_delete_null_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_null_bdev(bdev, _spdk_rpc_delete_null_bdev_cb, request);
+
+ free_rpc_delete_null(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_null(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_null_bdev", spdk_rpc_delete_null_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/nvme/Makefile b/src/spdk/lib/bdev/nvme/Makefile
new file mode 100644
index 00000000..c5a40c74
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c
+LIBNAME = bdev_nvme
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.c b/src/spdk/lib/bdev/nvme/bdev_nvme.c
new file mode 100644
index 00000000..07c3b6ce
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme.c
@@ -0,0 +1,1856 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+
+#include "spdk/config.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/bdev.h"
+#include "spdk/json.h"
+#include "spdk/nvme.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+static void bdev_nvme_get_spdk_running_config(FILE *fp);
+static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
+
+struct nvme_io_channel {
+ struct spdk_nvme_qpair *qpair;
+ struct spdk_poller *poller;
+
+ bool collect_spin_stat;
+ uint64_t spin_ticks;
+ uint64_t start_ticks;
+ uint64_t end_ticks;
+};
+
+struct nvme_bdev_io {
+ /** array of iovecs to transfer. */
+ struct iovec *iovs;
+
+ /** Number of iovecs in iovs array. */
+ int iovcnt;
+
+ /** Current iovec position. */
+ int iovpos;
+
+ /** Offset in current iovec. */
+ uint32_t iov_offset;
+
+ /** Saved status for admin passthru completion event. */
+ struct spdk_nvme_cpl cpl;
+
+ /** Originating thread */
+ struct spdk_thread *orig_thread;
+};
+
+enum data_direction {
+ BDEV_DISK_READ = 0,
+ BDEV_DISK_WRITE = 1
+};
+
+struct nvme_probe_ctx {
+ size_t count;
+ struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
+ const char *names[NVME_MAX_CONTROLLERS];
+ const char *hostnqn;
+};
+
+static struct spdk_bdev_nvme_opts g_opts = {
+ .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
+ .timeout_us = 0,
+ .retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT,
+ .nvme_adminq_poll_period_us = 1000000ULL,
+};
+
+#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
+#define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
+
+static int g_hot_insert_nvme_controller_index = 0;
+static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
+static bool g_nvme_hotplug_enabled = false;
+static struct spdk_thread *g_bdev_nvme_init_thread;
+static struct spdk_poller *g_hotplug_poller;
+static char *g_nvme_hostnqn = NULL;
+static pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static TAILQ_HEAD(, nvme_ctrlr) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
+
+static int nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr);
+static int bdev_nvme_library_init(void);
+static void bdev_nvme_library_fini(void);
+static int bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair,
+ struct nvme_bdev_io *bio,
+ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count,
+ uint64_t lba);
+static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len);
+static int nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid);
+
+struct spdk_nvme_qpair *
+spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
+{
+ struct nvme_io_channel *nvme_ch;
+
+ nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
+
+ return nvme_ch->qpair;
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name)
+{
+ struct nvme_ctrlr *_nvme_ctrlr;
+
+ TAILQ_FOREACH(_nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ if (strcmp(ctrlr_name, _nvme_ctrlr->name) == 0) {
+ return _nvme_ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_first_ctrlr(void)
+{
+ return TAILQ_FIRST(&g_nvme_ctrlrs);
+}
+
+struct nvme_ctrlr *
+spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev)
+{
+ return TAILQ_NEXT(prev, tailq);
+}
+
+static int
+bdev_nvme_get_ctx_size(void)
+{
+ return sizeof(struct nvme_bdev_io);
+}
+
+static struct spdk_bdev_module nvme_if = {
+ .name = "nvme",
+ .module_init = bdev_nvme_library_init,
+ .module_fini = bdev_nvme_library_fini,
+ .config_text = bdev_nvme_get_spdk_running_config,
+ .config_json = bdev_nvme_config_json,
+ .get_ctx_size = bdev_nvme_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&nvme_if)
+
+static int
+bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_READ,
+ iov, iovcnt, lba_count, lba);
+}
+
+static int
+bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_WRITE,
+ iov, iovcnt, lba_count, lba);
+}
+
+static int
+bdev_nvme_poll(void *arg)
+{
+ struct nvme_io_channel *ch = arg;
+ int32_t num_completions;
+
+ if (ch->qpair == NULL) {
+ return -1;
+ }
+
+ if (ch->collect_spin_stat && ch->start_ticks == 0) {
+ ch->start_ticks = spdk_get_ticks();
+ }
+
+ num_completions = spdk_nvme_qpair_process_completions(ch->qpair, 0);
+
+ if (ch->collect_spin_stat) {
+ if (num_completions > 0) {
+ if (ch->end_ticks != 0) {
+ ch->spin_ticks += (ch->end_ticks - ch->start_ticks);
+ ch->end_ticks = 0;
+ }
+ ch->start_ticks = 0;
+ } else {
+ ch->end_ticks = spdk_get_ticks();
+ }
+ }
+
+ return num_completions;
+}
+
+static int
+bdev_nvme_poll_adminq(void *arg)
+{
+ struct spdk_nvme_ctrlr *ctrlr = arg;
+
+ return spdk_nvme_ctrlr_process_admin_completions(ctrlr);
+}
+
+static void
+bdev_nvme_unregister_cb(void *io_device)
+{
+ struct spdk_nvme_ctrlr *ctrlr = io_device;
+
+ spdk_nvme_detach(ctrlr);
+}
+
+static int
+bdev_nvme_destruct(void *ctx)
+{
+ struct nvme_bdev *nvme_disk = ctx;
+ struct nvme_ctrlr *nvme_ctrlr = nvme_disk->nvme_ctrlr;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ nvme_ctrlr->ref--;
+ free(nvme_disk->disk.name);
+ memset(nvme_disk, 0, sizeof(*nvme_disk));
+ if (nvme_ctrlr->ref == 0) {
+ TAILQ_REMOVE(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ spdk_io_device_unregister(nvme_ctrlr->ctrlr, bdev_nvme_unregister_cb);
+ spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
+ free(nvme_ctrlr->name);
+ free(nvme_ctrlr->bdevs);
+ free(nvme_ctrlr);
+ return 0;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return 0;
+
+}
+
+static int
+bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio,
+ uint64_t offset, uint64_t nbytes)
+{
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+ return 0;
+}
+
+static void
+_bdev_nvme_reset_done(struct spdk_io_channel_iter *i, int status)
+{
+ void *ctx = spdk_io_channel_iter_get_ctx(i);
+ int rc = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ if (status) {
+ rc = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc);
+}
+
+static void
+_bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
+{
+ struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
+ struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch);
+
+ nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
+ if (!nvme_ch->qpair) {
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_bdev_nvme_reset(struct spdk_io_channel_iter *i, int status)
+{
+ struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
+ struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
+ int rc;
+
+ if (status) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ rc = spdk_nvme_ctrlr_reset(ctrlr);
+ if (rc != 0) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ /* Recreate all of the I/O queue pairs */
+ spdk_for_each_channel(ctrlr,
+ _bdev_nvme_reset_create_qpair,
+ bio,
+ _bdev_nvme_reset_done);
+
+
+}
+
+static void
+_bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
+ if (!rc) {
+ nvme_ch->qpair = NULL;
+ }
+
+ spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+bdev_nvme_reset(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
+{
+ /* First, delete all NVMe I/O queue pairs. */
+ spdk_for_each_channel(nbdev->nvme_ctrlr->ctrlr,
+ _bdev_nvme_reset_destroy_qpair,
+ bio,
+ _bdev_nvme_reset);
+
+ return 0;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ uint64_t offset_blocks,
+ uint64_t num_blocks);
+
+static void
+bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ int ret;
+
+ ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ if (spdk_likely(ret == 0)) {
+ return;
+ } else if (ret == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static int
+_bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ if (nvme_ch->qpair == NULL) {
+ /* The device is currently resetting */
+ return -1;
+ }
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ return bdev_nvme_writev((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return bdev_nvme_reset((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx);
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return bdev_nvme_flush((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+ return bdev_nvme_admin_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes);
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ return bdev_nvme_io_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes);
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return bdev_nvme_io_passthru_md((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes,
+ bdev_io->u.nvme_passthru.md_buf,
+ bdev_io->u.nvme_passthru.md_len);
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void
+bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ int rc = _bdev_nvme_submit_request(ch, bdev_io);
+
+ if (spdk_unlikely(rc != 0)) {
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static bool
+bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct nvme_bdev *nbdev = ctx;
+ const struct spdk_nvme_ctrlr_data *cdata;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ return true;
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return spdk_nvme_ns_get_md_size(nbdev->ns) ? true : false;
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr);
+ return cdata->oncs.dsm;
+
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr);
+ /*
+ * If an NVMe controller guarantees reading unallocated blocks returns zero,
+ * we can implement WRITE_ZEROES as an NVMe deallocate command.
+ */
+ if (cdata->oncs.dsm &&
+ spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->ns) == SPDK_NVME_DEALLOC_READ_00) {
+ return true;
+ }
+ /*
+ * The NVMe controller write_zeroes function is currently not used by our driver.
+ * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail.
+ * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read.
+ */
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_nvme_create_cb(void *io_device, void *ctx_buf)
+{
+ struct spdk_nvme_ctrlr *ctrlr = io_device;
+ struct nvme_io_channel *ch = ctx_buf;
+
+#ifdef SPDK_CONFIG_VTUNE
+ ch->collect_spin_stat = true;
+#else
+ ch->collect_spin_stat = false;
+#endif
+
+ ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
+
+ if (ch->qpair == NULL) {
+ return -1;
+ }
+
+ ch->poller = spdk_poller_register(bdev_nvme_poll, ch, 0);
+ return 0;
+}
+
+static void
+bdev_nvme_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct nvme_io_channel *ch = ctx_buf;
+
+ spdk_nvme_ctrlr_free_io_qpair(ch->qpair);
+ spdk_poller_unregister(&ch->poller);
+}
+
+static struct spdk_io_channel *
+bdev_nvme_get_io_channel(void *ctx)
+{
+ struct nvme_bdev *nvme_bdev = ctx;
+
+ return spdk_get_io_channel(nvme_bdev->nvme_ctrlr->ctrlr);
+}
+
+void
+spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
+{
+ const char *trtype_str;
+ const char *adrfam_str;
+
+ trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
+ if (trtype_str) {
+ spdk_json_write_named_string(w, "trtype", trtype_str);
+ }
+
+ adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+ if (adrfam_str) {
+ spdk_json_write_named_string(w, "adrfam", adrfam_str);
+ }
+
+ if (trid->traddr[0] != '\0') {
+ spdk_json_write_named_string(w, "traddr", trid->traddr);
+ }
+
+ if (trid->trsvcid[0] != '\0') {
+ spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+ }
+
+ if (trid->subnqn[0] != '\0') {
+ spdk_json_write_named_string(w, "subnqn", trid->subnqn);
+ }
+}
+
+static int
+bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct nvme_bdev *nvme_bdev = ctx;
+ struct nvme_ctrlr *nvme_ctrlr = nvme_bdev->nvme_ctrlr;
+ const struct spdk_nvme_ctrlr_data *cdata;
+ struct spdk_nvme_ns *ns;
+ union spdk_nvme_vs_register vs;
+ union spdk_nvme_csts_register csts;
+ char buf[128];
+
+ cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_ctrlr->ctrlr);
+ vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_ctrlr->ctrlr);
+ csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_ctrlr->ctrlr);
+ ns = nvme_bdev->ns;
+
+ spdk_json_write_named_object_begin(w, "nvme");
+
+ if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ spdk_json_write_named_string(w, "pci_address", nvme_ctrlr->trid.traddr);
+ }
+
+ spdk_json_write_named_object_begin(w, "trid");
+
+ spdk_bdev_nvme_dump_trid_json(&nvme_ctrlr->trid, w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "ctrlr_data");
+
+ spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
+
+ snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "model_number", buf);
+
+ snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "serial_number", buf);
+
+ snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "firmware_revision", buf);
+
+ spdk_json_write_named_object_begin(w, "oacs");
+
+ spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
+ spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
+ spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
+ spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "vs");
+
+ spdk_json_write_name(w, "nvme_version");
+ if (vs.bits.ter) {
+ spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
+ } else {
+ spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
+ }
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "csts");
+
+ spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy);
+ spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "ns_data");
+
+ spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* No config per bdev needed */
+}
+
+static uint64_t
+bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ uint64_t spin_time;
+
+ if (!nvme_ch->collect_spin_stat) {
+ return 0;
+ }
+
+ if (nvme_ch->end_ticks != 0) {
+ nvme_ch->spin_ticks += (nvme_ch->end_ticks - nvme_ch->start_ticks);
+ nvme_ch->end_ticks = 0;
+ }
+
+ spin_time = (nvme_ch->spin_ticks * 1000000ULL) / spdk_get_ticks_hz();
+ nvme_ch->start_ticks = 0;
+ nvme_ch->spin_ticks = 0;
+
+ return spin_time;
+}
+
+static const struct spdk_bdev_fn_table nvmelib_fn_table = {
+ .destruct = bdev_nvme_destruct,
+ .submit_request = bdev_nvme_submit_request,
+ .io_type_supported = bdev_nvme_io_type_supported,
+ .get_io_channel = bdev_nvme_get_io_channel,
+ .dump_info_json = bdev_nvme_dump_info_json,
+ .write_config_json = bdev_nvme_write_config_json,
+ .get_spin_time = bdev_nvme_get_spin_time,
+};
+
+static int
+nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
+{
+ struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
+ struct nvme_bdev *bdev;
+ struct spdk_nvme_ns *ns;
+ const struct spdk_uuid *uuid;
+ const struct spdk_nvme_ctrlr_data *cdata;
+ int rc;
+
+ cdata = spdk_nvme_ctrlr_get_data(ctrlr);
+
+ ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+ if (!ns) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nsid);
+ return -EINVAL;
+ }
+
+ bdev = &nvme_ctrlr->bdevs[nsid - 1];
+ bdev->id = nsid;
+
+ bdev->nvme_ctrlr = nvme_ctrlr;
+ bdev->ns = ns;
+ nvme_ctrlr->ref++;
+
+ bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_ctrlr->name, spdk_nvme_ns_get_id(ns));
+ if (!bdev->disk.name) {
+ nvme_ctrlr->ref--;
+ memset(bdev, 0, sizeof(*bdev));
+ return -ENOMEM;
+ }
+ bdev->disk.product_name = "NVMe disk";
+
+ bdev->disk.write_cache = 0;
+ if (cdata->vwc.present) {
+ /* Enable if the Volatile Write Cache exists */
+ bdev->disk.write_cache = 1;
+ }
+ bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
+ bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns);
+ bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
+
+ uuid = spdk_nvme_ns_get_uuid(ns);
+ if (uuid != NULL) {
+ bdev->disk.uuid = *uuid;
+ }
+
+ bdev->disk.ctxt = bdev;
+ bdev->disk.fn_table = &nvmelib_fn_table;
+ bdev->disk.module = &nvme_if;
+ rc = spdk_bdev_register(&bdev->disk);
+ if (rc) {
+ free(bdev->disk.name);
+ nvme_ctrlr->ref--;
+ memset(bdev, 0, sizeof(*bdev));
+ return rc;
+ }
+ bdev->active = true;
+
+ return 0;
+}
+
+
+static bool
+hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr);
+
+ return true;
+}
+
+static struct nvme_ctrlr *
+nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
+{
+ struct nvme_ctrlr *nvme_ctrlr;
+
+ TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->trid) == 0) {
+ return nvme_ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+static struct nvme_ctrlr *
+nvme_ctrlr_get_by_name(const char *name)
+{
+ struct nvme_ctrlr *nvme_ctrlr;
+
+ if (name == NULL) {
+ return NULL;
+ }
+
+ TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ if (strcmp(name, nvme_ctrlr->name) == 0) {
+ return nvme_ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+static bool
+probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct nvme_probe_ctx *ctx = cb_ctx;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr);
+
+ if (nvme_ctrlr_get(trid)) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+ trid->traddr);
+ return false;
+ }
+
+ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ bool claim_device = false;
+ size_t i;
+
+ for (i = 0; i < ctx->count; i++) {
+ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+ claim_device = true;
+ break;
+ }
+ }
+
+ if (!claim_device) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr);
+ return false;
+ }
+ }
+
+ if (ctx->hostnqn) {
+ snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn);
+ }
+
+ return true;
+}
+
+static void
+spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_nvme_ctrlr *ctrlr = ctx;
+ int rc;
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_WARNLOG("Abort failed. Resetting controller.\n");
+ rc = spdk_nvme_ctrlr_reset(ctrlr);
+ if (rc) {
+ SPDK_ERRLOG("Resetting controller failed.\n");
+ }
+ }
+}
+
+static void
+timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair, uint16_t cid)
+{
+ int rc;
+ union spdk_nvme_csts_register csts;
+
+ SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
+
+ csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
+ if (csts.bits.cfs) {
+ SPDK_ERRLOG("Controller Fatal Status, reset required\n");
+ rc = spdk_nvme_ctrlr_reset(ctrlr);
+ if (rc) {
+ SPDK_ERRLOG("Resetting controller failed.\n");
+ }
+ return;
+ }
+
+ switch (g_opts.action_on_timeout) {
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+ if (qpair) {
+ rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
+ spdk_nvme_abort_cpl, ctrlr);
+ if (rc == 0) {
+ return;
+ }
+
+ SPDK_ERRLOG("Unable to send abort. Resetting.\n");
+ }
+
+ /* FALLTHROUGH */
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+ rc = spdk_nvme_ctrlr_reset(ctrlr);
+ if (rc) {
+ SPDK_ERRLOG("Resetting controller failed.\n");
+ }
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+ break;
+ }
+}
+
+static void
+nvme_ctrlr_deactivate_bdev(struct nvme_bdev *bdev)
+{
+ spdk_bdev_unregister(&bdev->disk, NULL, NULL);
+ bdev->active = false;
+}
+
+static void
+nvme_ctrlr_update_ns_bdevs(struct nvme_ctrlr *nvme_ctrlr)
+{
+ struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
+ uint32_t i;
+ struct nvme_bdev *bdev;
+
+ for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+ uint32_t nsid = i + 1;
+
+ bdev = &nvme_ctrlr->bdevs[i];
+ if (!bdev->active && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+ SPDK_NOTICELOG("NSID %u to be added\n", nsid);
+ nvme_ctrlr_create_bdev(nvme_ctrlr, nsid);
+ }
+
+ if (bdev->active && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
+ SPDK_NOTICELOG("NSID %u Bdev %s is removed\n", nsid, bdev->disk.name);
+ nvme_ctrlr_deactivate_bdev(bdev);
+ }
+ }
+
+}
+
+static void
+aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_ctrlr *nvme_ctrlr = arg;
+ union spdk_nvme_async_event_completion event;
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_WARNLOG("AER request execute failed");
+ return;
+ }
+
+ event.raw = cpl->cdw0;
+ if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
+ (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
+ nvme_ctrlr_update_ns_bdevs(nvme_ctrlr);
+ }
+}
+
+static int
+create_ctrlr(struct spdk_nvme_ctrlr *ctrlr,
+ const char *name,
+ const struct spdk_nvme_transport_id *trid)
+{
+ struct nvme_ctrlr *nvme_ctrlr;
+
+ nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed to allocate device struct\n");
+ return -ENOMEM;
+ }
+ nvme_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
+ nvme_ctrlr->bdevs = calloc(nvme_ctrlr->num_ns, sizeof(struct nvme_bdev));
+ if (!nvme_ctrlr->bdevs) {
+ SPDK_ERRLOG("Failed to allocate block devices struct\n");
+ free(nvme_ctrlr);
+ return -ENOMEM;
+ }
+
+ nvme_ctrlr->adminq_timer_poller = NULL;
+ nvme_ctrlr->ctrlr = ctrlr;
+ nvme_ctrlr->ref = 0;
+ nvme_ctrlr->trid = *trid;
+ nvme_ctrlr->name = strdup(name);
+ if (nvme_ctrlr->name == NULL) {
+ free(nvme_ctrlr->bdevs);
+ free(nvme_ctrlr);
+ return -ENOMEM;
+ }
+
+ spdk_io_device_register(ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,
+ sizeof(struct nvme_io_channel),
+ name);
+
+ if (nvme_ctrlr_create_bdevs(nvme_ctrlr) != 0) {
+ spdk_io_device_unregister(ctrlr, bdev_nvme_unregister_cb);
+ free(nvme_ctrlr->bdevs);
+ free(nvme_ctrlr->name);
+ free(nvme_ctrlr);
+ return -1;
+ }
+
+ nvme_ctrlr->adminq_timer_poller = spdk_poller_register(bdev_nvme_poll_adminq, ctrlr,
+ g_opts.nvme_adminq_poll_period_us);
+
+ TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
+
+ if (g_opts.timeout_us > 0 && g_opts.action_on_timeout != SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE) {
+ spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
+ timeout_cb, NULL);
+ }
+
+ spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
+
+ return 0;
+}
+
+static void
+attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct nvme_probe_ctx *ctx = cb_ctx;
+ char *name = NULL;
+ size_t i;
+
+ if (ctx) {
+ for (i = 0; i < ctx->count; i++) {
+ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+ name = strdup(ctx->names[i]);
+ break;
+ }
+ }
+ } else {
+ name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
+ }
+ if (!name) {
+ SPDK_ERRLOG("Failed to assign name to NVMe device\n");
+ return;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name);
+
+ create_ctrlr(ctrlr, name, trid);
+
+ free(name);
+}
+
+static void
+remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
+{
+ uint32_t i;
+ struct nvme_ctrlr *nvme_ctrlr;
+ struct nvme_bdev *nvme_bdev;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ if (nvme_ctrlr->ctrlr == ctrlr) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+ uint32_t nsid = i + 1;
+
+ nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1];
+ assert(nvme_bdev->id == nsid);
+ if (nvme_bdev->active) {
+ spdk_bdev_unregister(&nvme_bdev->disk, NULL, NULL);
+ }
+ }
+ return;
+ }
+ }
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+static int
+bdev_nvme_hotplug(void *arg)
+{
+ if (spdk_nvme_probe(NULL, NULL, hotplug_probe_cb, attach_cb, remove_cb) != 0) {
+ SPDK_ERRLOG("spdk_nvme_probe() failed\n");
+ }
+
+ return -1;
+}
+
+void
+spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
+{
+ *opts = g_opts;
+}
+
+int
+spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
+{
+ if (g_bdev_nvme_init_thread != NULL) {
+ return -EPERM;
+ }
+
+ g_opts = *opts;
+
+ return 0;
+}
+struct set_nvme_hotplug_ctx {
+ uint64_t period_us;
+ bool enabled;
+ spdk_thread_fn fn;
+ void *fn_ctx;
+};
+
+static void
+set_nvme_hotplug_period_cb(void *_ctx)
+{
+ struct set_nvme_hotplug_ctx *ctx = _ctx;
+
+ spdk_poller_unregister(&g_hotplug_poller);
+ if (ctx->enabled) {
+ g_hotplug_poller = spdk_poller_register(bdev_nvme_hotplug, NULL, ctx->period_us);
+ }
+
+ g_nvme_hotplug_poll_period_us = ctx->period_us;
+ g_nvme_hotplug_enabled = ctx->enabled;
+ if (ctx->fn) {
+ ctx->fn(ctx->fn_ctx);
+ }
+
+ free(ctx);
+}
+
+int
+spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx)
+{
+ struct set_nvme_hotplug_ctx *ctx;
+
+ if (enabled == true && !spdk_process_is_primary()) {
+ return -EPERM;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ return -ENOMEM;
+ }
+
+ period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
+ ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
+ ctx->enabled = enabled;
+ ctx->fn = cb;
+ ctx->fn_ctx = cb_ctx;
+
+ spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
+ return 0;
+}
+
+int
+spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+ const char *base_name,
+ const char **names, size_t *count,
+ const char *hostnqn)
+{
+ struct nvme_probe_ctx *probe_ctx;
+ struct nvme_ctrlr *nvme_ctrlr;
+ struct nvme_bdev *nvme_bdev;
+ uint32_t i, nsid;
+ size_t j;
+
+ if (nvme_ctrlr_get(trid) != NULL) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
+ return -1;
+ }
+
+ probe_ctx = calloc(1, sizeof(*probe_ctx));
+ if (probe_ctx == NULL) {
+ SPDK_ERRLOG("Failed to allocate probe_ctx\n");
+ return -1;
+ }
+
+ probe_ctx->count = 1;
+ probe_ctx->trids[0] = *trid;
+ probe_ctx->names[0] = base_name;
+ probe_ctx->hostnqn = hostnqn;
+ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) {
+ SPDK_ERRLOG("Failed to probe for new devices\n");
+ free(probe_ctx);
+ return -1;
+ }
+
+ nvme_ctrlr = nvme_ctrlr_get(trid);
+ if (!nvme_ctrlr) {
+ SPDK_ERRLOG("Failed to find new NVMe controller\n");
+ free(probe_ctx);
+ return -1;
+ }
+
+ /*
+ * Report the new bdevs that were created in this call.
+ * There can be more than one bdev per NVMe controller since one bdev is created per namespace.
+ */
+ j = 0;
+ for (i = 0; i < nvme_ctrlr->num_ns; i++) {
+ nsid = i + 1;
+ nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1];
+ if (!nvme_bdev->active) {
+ continue;
+ }
+ assert(nvme_bdev->id == nsid);
+ if (j < *count) {
+ names[j] = nvme_bdev->disk.name;
+ j++;
+ } else {
+ SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %zu. Unable to return all names of created bdevs\n",
+ *count);
+ free(probe_ctx);
+ return -1;
+ }
+ }
+
+ *count = j;
+
+ free(probe_ctx);
+ return 0;
+}
+
+int
+spdk_bdev_nvme_delete(const char *name)
+{
+ struct nvme_ctrlr *nvme_ctrlr = NULL;
+
+ if (name == NULL) {
+ return -EINVAL;
+ }
+
+ nvme_ctrlr = nvme_ctrlr_get_by_name(name);
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed to find NVMe controller\n");
+ return -ENODEV;
+ }
+
+ remove_cb(NULL, nvme_ctrlr->ctrlr);
+ return 0;
+}
+
+static int
+bdev_nvme_library_init(void)
+{
+ struct spdk_conf_section *sp;
+ const char *val;
+ int rc = 0;
+ int64_t intval = 0;
+ size_t i;
+ struct nvme_probe_ctx *probe_ctx = NULL;
+ int retry_count;
+ uint32_t local_nvme_num = 0;
+ int64_t hotplug_period;
+ bool hotplug_enabled = g_nvme_hotplug_enabled;
+
+ g_bdev_nvme_init_thread = spdk_get_thread();
+
+ sp = spdk_conf_find_section(NULL, "Nvme");
+ if (sp == NULL) {
+ goto end;
+ }
+
+ probe_ctx = calloc(1, sizeof(*probe_ctx));
+ if (probe_ctx == NULL) {
+ SPDK_ERRLOG("Failed to allocate probe_ctx\n");
+ rc = -1;
+ goto end;
+ }
+
+ if ((retry_count = spdk_conf_section_get_intval(sp, "RetryCount")) < 0) {
+ if ((retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount")) < 0) {
+ retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
+ } else {
+ SPDK_WARNLOG("NvmeRetryCount was renamed to RetryCount\n");
+ SPDK_WARNLOG("Please update your configuration file\n");
+ }
+ }
+
+ g_opts.retry_count = retry_count;
+
+ val = spdk_conf_section_get_val(sp, "TimeoutUsec");
+ if (val != NULL) {
+ intval = strtoll(val, NULL, 10);
+ if (intval == LLONG_MIN || intval == LLONG_MAX) {
+ SPDK_ERRLOG("Invalid TimeoutUsec value\n");
+ rc = -1;
+ goto end;
+ } else if (intval < 0) {
+ intval = 0;
+ }
+ }
+
+ g_opts.timeout_us = intval;
+
+ if (g_opts.timeout_us > 0) {
+ val = spdk_conf_section_get_val(sp, "ActionOnTimeout");
+ if (val != NULL) {
+ if (!strcasecmp(val, "Reset")) {
+ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+ } else if (!strcasecmp(val, "Abort")) {
+ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+ }
+ } else {
+ /* Handle old name for backward compatibility */
+ val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
+ if (val) {
+ SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n");
+ SPDK_WARNLOG("Please update your configuration file\n");
+
+ if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) {
+ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+ }
+ }
+ }
+ }
+
+ intval = spdk_conf_section_get_intval(sp, "AdminPollRate");
+ if (intval > 0) {
+ g_opts.nvme_adminq_poll_period_us = intval;
+ }
+
+ if (spdk_process_is_primary()) {
+ hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false);
+ }
+
+ hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate");
+
+ g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN");
+ probe_ctx->hostnqn = g_nvme_hostnqn;
+
+ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
+ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
+ if (val == NULL) {
+ break;
+ }
+
+ rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
+ rc = -1;
+ goto end;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1);
+ if (val == NULL) {
+ SPDK_ERRLOG("No name provided for TransportID\n");
+ rc = -1;
+ goto end;
+ }
+
+ probe_ctx->names[i] = val;
+ probe_ctx->count++;
+
+ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+ struct spdk_nvme_ctrlr *ctrlr;
+ struct spdk_nvme_ctrlr_opts opts;
+
+ if (nvme_ctrlr_get(&probe_ctx->trids[i])) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+ probe_ctx->trids[i].traddr);
+ rc = -1;
+ goto end;
+ }
+
+ if (probe_ctx->trids[i].subnqn[0] == '\0') {
+ SPDK_ERRLOG("Need to provide subsystem nqn\n");
+ rc = -1;
+ goto end;
+ }
+
+ spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
+
+ if (probe_ctx->hostnqn != NULL) {
+ snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn);
+ }
+
+ ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts));
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n",
+ probe_ctx->trids[i].traddr);
+ rc = -1;
+ goto end;
+ }
+
+ rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i]);
+ if (rc) {
+ goto end;
+ }
+ } else {
+ local_nvme_num++;
+ }
+ }
+
+ if (local_nvme_num > 0) {
+ /* used to probe local NVMe device */
+ if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, NULL)) {
+ rc = -1;
+ goto end;
+ }
+
+ for (i = 0; i < probe_ctx->count; i++) {
+ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+ continue;
+ }
+
+ if (!nvme_ctrlr_get(&probe_ctx->trids[i])) {
+ SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr);
+ SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n");
+ }
+ }
+ }
+
+ rc = spdk_bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL);
+ if (rc) {
+ SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc));
+ rc = -1;
+ }
+end:
+ spdk_nvme_retry_count = g_opts.retry_count;
+
+ free(probe_ctx);
+ return rc;
+}
+
+static void
+bdev_nvme_library_fini(void)
+{
+ spdk_poller_unregister(&g_hotplug_poller);
+}
+
+static int
+nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr)
+{
+ int rc;
+ int bdev_created = 0;
+ uint32_t nsid;
+
+ for (nsid = spdk_nvme_ctrlr_get_first_active_ns(nvme_ctrlr->ctrlr);
+ nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(nvme_ctrlr->ctrlr, nsid)) {
+ rc = nvme_ctrlr_create_bdev(nvme_ctrlr, nsid);
+ if (rc == 0) {
+ bdev_created++;
+ }
+ }
+
+ return (bdev_created > 0) ? 0 : -1;
+}
+
+static void
+bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_admin_passthru_completion(void *ctx)
+{
+ struct nvme_bdev_io *bio = ctx;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+ spdk_bdev_io_complete_nvme_status(bdev_io,
+ bio->cpl.status.sct, bio->cpl.status.sc);
+}
+
+static void
+bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+
+ bio->cpl = *cpl;
+ spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio);
+}
+
+static void
+bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ bio->iov_offset = sgl_offset;
+ for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
+ iov = &bio->iovs[bio->iovpos];
+ if (bio->iov_offset < iov->iov_len) {
+ break;
+ }
+
+ bio->iov_offset -= iov->iov_len;
+ }
+}
+
+static int
+bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ assert(bio->iovpos < bio->iovcnt);
+
+ iov = &bio->iovs[bio->iovpos];
+
+ *address = iov->iov_base;
+ *length = iov->iov_len;
+
+ if (bio->iov_offset) {
+ assert(bio->iov_offset <= iov->iov_len);
+ *address += bio->iov_offset;
+ *length -= bio->iov_offset;
+ }
+
+ bio->iov_offset += *length;
+ if (bio->iov_offset == iov->iov_len) {
+ bio->iovpos++;
+ bio->iov_offset = 0;
+ }
+
+ return 0;
+}
+
+static int
+bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair,
+ struct nvme_bdev_io *bio,
+ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count,
+ uint64_t lba)
+{
+ int rc;
+
+ bio->iovs = iov;
+ bio->iovcnt = iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+
+ if (direction == BDEV_DISK_READ) {
+ rc = spdk_nvme_ns_cmd_readv(bdev->ns, qpair, lba,
+ lba_count, bdev_nvme_queued_done, bio, 0,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
+ } else {
+ rc = spdk_nvme_ns_cmd_writev(bdev->ns, qpair, lba,
+ lba_count, bdev_nvme_queued_done, bio, 0,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
+ }
+
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc);
+ }
+ return rc;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ uint64_t offset_blocks,
+ uint64_t num_blocks)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
+ struct spdk_nvme_dsm_range *range;
+ uint64_t offset, remaining;
+ uint64_t num_ranges_u64;
+ uint16_t num_ranges;
+ int rc;
+
+ num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
+ SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
+ SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
+ return -EINVAL;
+ }
+ num_ranges = (uint16_t)num_ranges_u64;
+
+ offset = offset_blocks;
+ remaining = num_blocks;
+ range = &dsm_ranges[0];
+
+ /* Fill max-size ranges until the remaining blocks fit into one range */
+ while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
+ range->attributes.raw = 0;
+ range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ range->starting_lba = offset;
+
+ offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ range++;
+ }
+
+ /* Final range describes the remaining blocks */
+ range->attributes.raw = 0;
+ range->length = remaining;
+ range->starting_lba = offset;
+
+ rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair,
+ SPDK_NVME_DSM_ATTR_DEALLOCATE,
+ dsm_ranges, num_ranges,
+ bdev_nvme_queued_done, bio);
+
+ return rc;
+}
+
+static int
+bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ bio->orig_thread = spdk_io_channel_get_thread(ch);
+
+ return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_ctrlr->ctrlr, cmd, buf,
+ (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ /*
+ * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+ * so fill it out automatically.
+ */
+ cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
+
+ return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+ (uint32_t)nbytes, bdev_nvme_queued_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->ns);
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->ns)) {
+ SPDK_ERRLOG("invalid meta data buffer size\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+ * so fill it out automatically.
+ */
+ cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
+
+ return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+ (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
+}
+
+static void
+bdev_nvme_get_spdk_running_config(FILE *fp)
+{
+ struct nvme_ctrlr *nvme_ctrlr;
+
+ fprintf(fp, "\n[Nvme]");
+ fprintf(fp, "\n"
+ "# NVMe Device Whitelist\n"
+ "# Users may specify which NVMe devices to claim by their transport id.\n"
+ "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n"
+ "# The second argument is the assigned name, which can be referenced from\n"
+ "# other sections in the configuration file. For NVMe devices, a namespace\n"
+ "# is automatically appended to each name in the format <YourName>nY, where\n"
+ "# Y is the NSID (starts at 1).\n");
+
+ TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ const char *trtype;
+
+ trtype = spdk_nvme_transport_id_trtype_str(nvme_ctrlr->trid.trtype);
+ if (!trtype) {
+ continue;
+ }
+
+ if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n",
+ trtype,
+ nvme_ctrlr->trid.traddr, nvme_ctrlr->name);
+ } else {
+ const char *adrfam;
+
+ adrfam = spdk_nvme_transport_id_adrfam_str(nvme_ctrlr->trid.adrfam);
+
+ if (adrfam) {
+ fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n",
+ trtype, adrfam,
+ nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid,
+ nvme_ctrlr->trid.subnqn, nvme_ctrlr->name);
+ } else {
+ fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n",
+ trtype,
+ nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid,
+ nvme_ctrlr->trid.subnqn, nvme_ctrlr->name);
+ }
+
+ }
+ }
+
+ fprintf(fp, "\n"
+ "# The number of attempts per I/O when an I/O fails. Do not include\n"
+ "# this key to get the default behavior.\n");
+ fprintf(fp, "RetryCount %d\n", spdk_nvme_retry_count);
+ fprintf(fp, "\n"
+ "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n");
+ fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us);
+
+ fprintf(fp, "\n"
+ "# Action to take on command time out. Only valid when Timeout is greater\n"
+ "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n"
+ "# the command, or 'None' to just print a message but do nothing.\n"
+ "# Admin command timeouts will always result in a reset.\n");
+ switch (g_opts.action_on_timeout) {
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+ fprintf(fp, "ActionOnTimeout None\n");
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+ fprintf(fp, "ActionOnTimeout Reset\n");
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+ fprintf(fp, "ActionOnTimeout Abort\n");
+ break;
+ }
+
+ fprintf(fp, "\n"
+ "# Set how often the admin queue is polled for asynchronous events.\n"
+ "# Units in microseconds.\n");
+ fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us);
+ fprintf(fp, "\n"
+ "# Disable handling of hotplug (runtime insert and remove) events,\n"
+ "# users can set to Yes if want to enable it.\n"
+ "# Default: No\n");
+ fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No");
+ fprintf(fp, "\n"
+ "# Set how often the hotplug is processed for insert and remove events."
+ "# Units in microseconds.\n");
+ fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us);
+ if (g_nvme_hostnqn) {
+ fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn);
+ }
+
+ fprintf(fp, "\n");
+}
+
+static int
+bdev_nvme_config_json(struct spdk_json_write_ctx *w)
+{
+ struct nvme_ctrlr *nvme_ctrlr;
+ struct spdk_nvme_transport_id *trid;
+ const char *action;
+
+ if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
+ action = "reset";
+ } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
+ action = "abort";
+ } else {
+ action = "none";
+ }
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "set_bdev_nvme_options");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "action_on_timeout", action);
+ spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
+ spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count);
+ spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
+ trid = &nvme_ctrlr->trid;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_nvme_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", nvme_ctrlr->name);
+ spdk_bdev_nvme_dump_trid_json(trid, w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+
+ /* Dump as last parameter to give all NVMe bdevs chance to be constructed
+ * before enabling hotplug poller.
+ */
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "set_bdev_nvme_hotplug");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
+ spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return 0;
+}
+
+struct spdk_nvme_ctrlr *
+spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
+{
+ if (!bdev || bdev->module != &nvme_if) {
+ return NULL;
+ }
+
+ return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ctrlr->ctrlr;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME)
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.h b/src/spdk/lib/bdev/nvme/bdev_nvme.h
new file mode 100644
index 00000000..b8c458e8
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme.h
@@ -0,0 +1,112 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_NVME_H
+#define SPDK_BDEV_NVME_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+#include "spdk/nvme.h"
+#include "spdk/bdev_module.h"
+
+#define NVME_MAX_CONTROLLERS 1024
+
+enum spdk_bdev_timeout_action {
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0,
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET,
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT,
+};
+
+struct spdk_bdev_nvme_opts {
+ enum spdk_bdev_timeout_action action_on_timeout;
+ uint64_t timeout_us;
+ uint32_t retry_count;
+ uint64_t nvme_adminq_poll_period_us;
+};
+
+struct nvme_ctrlr {
+ /**
+ * points to pinned, physically contiguous memory region;
+ * contains 4KB IDENTIFY structure for controller which is
+ * target for CONTROLLER IDENTIFY command during initialization
+ */
+ struct spdk_nvme_ctrlr *ctrlr;
+ struct spdk_nvme_transport_id trid;
+ char *name;
+ int ref;
+ uint32_t num_ns;
+ /** Array of bdevs indexed by nsid - 1 */
+ struct nvme_bdev *bdevs;
+
+ struct spdk_poller *adminq_timer_poller;
+
+ /** linked list pointer for device list */
+ TAILQ_ENTRY(nvme_ctrlr) tailq;
+};
+
+struct nvme_bdev {
+ struct spdk_bdev disk;
+ struct nvme_ctrlr *nvme_ctrlr;
+ uint32_t id;
+ bool active;
+ struct spdk_nvme_ns *ns;
+};
+
+void spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid,
+ struct spdk_json_write_ctx *w);
+
+struct spdk_nvme_qpair *spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
+struct nvme_ctrlr *spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name);
+struct nvme_ctrlr *spdk_bdev_nvme_first_ctrlr(void);
+struct nvme_ctrlr *spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev);
+void spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
+int spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
+int spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx);
+
+int spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+ const char *base_name,
+ const char **names, size_t *count,
+ const char *hostnqn);
+struct spdk_nvme_ctrlr *spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev);
+
+/**
+ * Delete NVMe controller with all bdevs on top of it.
+ * Requires to pass name of NVMe controller.
+ *
+ * \param name NVMe controller name
+ * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found
+ */
+int spdk_bdev_nvme_delete(const char *name);
+
+#endif // SPDK_BDEV_NVME_H
diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c
new file mode 100644
index 00000000..0312a756
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c
@@ -0,0 +1,740 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/bdev_module.h"
+
+struct open_descriptors {
+ void *desc;
+ struct spdk_bdev *bdev;
+ TAILQ_ENTRY(open_descriptors) tqlst;
+};
+typedef TAILQ_HEAD(, open_descriptors) open_descriptors_t;
+
+static int
+rpc_decode_action_on_timeout(const struct spdk_json_val *val, void *out)
+{
+ enum spdk_bdev_timeout_action *action = out;
+
+ if (spdk_json_strequal(val, "none") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE;
+ } else if (spdk_json_strequal(val, "abort") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+ } else if (spdk_json_strequal(val, "reset") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: action_on_timeout\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {
+ {"action_on_timeout", offsetof(struct spdk_bdev_nvme_opts, action_on_timeout), rpc_decode_action_on_timeout, true},
+ {"timeout_us", offsetof(struct spdk_bdev_nvme_opts, timeout_us), spdk_json_decode_uint64, true},
+ {"retry_count", offsetof(struct spdk_bdev_nvme_opts, retry_count), spdk_json_decode_uint32, true},
+ {"nvme_adminq_poll_period_us", offsetof(struct spdk_bdev_nvme_opts, nvme_adminq_poll_period_us), spdk_json_decode_uint64, true},
+};
+
+static void
+spdk_rpc_set_bdev_nvme_options(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct spdk_bdev_nvme_opts opts;
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ spdk_bdev_nvme_get_opts(&opts);
+ if (params && spdk_json_decode_object(params, rpc_bdev_nvme_options_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_options_decoders),
+ &opts)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = spdk_bdev_nvme_set_opts(&opts);
+ if (rc) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w != NULL) {
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ }
+
+ return;
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("set_bdev_nvme_options", spdk_rpc_set_bdev_nvme_options, SPDK_RPC_STARTUP)
+
+struct rpc_bdev_nvme_hotplug {
+ bool enabled;
+ uint64_t period_us;
+};
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_hotplug_decoders[] = {
+ {"enable", offsetof(struct rpc_bdev_nvme_hotplug, enabled), spdk_json_decode_bool, false},
+ {"period_us", offsetof(struct rpc_bdev_nvme_hotplug, period_us), spdk_json_decode_uint64, true},
+};
+
+static void
+rpc_set_bdev_nvme_hotplug_done(void *ctx)
+{
+ struct spdk_jsonrpc_request *request = ctx;
+ struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request);
+
+ if (w != NULL) {
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ }
+}
+
+static void
+spdk_rpc_set_bdev_nvme_hotplug(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_hotplug req = {false, 0};
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_hotplug_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_hotplug_decoders), &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = spdk_bdev_nvme_set_hotplug(req.enabled, req.period_us, rpc_set_bdev_nvme_hotplug_done,
+ request);
+ if (rc) {
+ goto invalid;
+ }
+
+ return;
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("set_bdev_nvme_hotplug", spdk_rpc_set_bdev_nvme_hotplug, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_nvme {
+ char *name;
+ char *trtype;
+ char *adrfam;
+ char *traddr;
+ char *trsvcid;
+ char *subnqn;
+ char *hostnqn;
+};
+
+static void
+free_rpc_construct_nvme(struct rpc_construct_nvme *req)
+{
+ free(req->name);
+ free(req->trtype);
+ free(req->adrfam);
+ free(req->traddr);
+ free(req->trsvcid);
+ free(req->subnqn);
+ free(req->hostnqn);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_nvme_decoders[] = {
+ {"name", offsetof(struct rpc_construct_nvme, name), spdk_json_decode_string},
+ {"trtype", offsetof(struct rpc_construct_nvme, trtype), spdk_json_decode_string},
+ {"traddr", offsetof(struct rpc_construct_nvme, traddr), spdk_json_decode_string},
+
+ {"adrfam", offsetof(struct rpc_construct_nvme, adrfam), spdk_json_decode_string, true},
+ {"trsvcid", offsetof(struct rpc_construct_nvme, trsvcid), spdk_json_decode_string, true},
+ {"subnqn", offsetof(struct rpc_construct_nvme, subnqn), spdk_json_decode_string, true},
+ {"hostnqn", offsetof(struct rpc_construct_nvme, hostnqn), spdk_json_decode_string, true}
+};
+
+#define NVME_MAX_BDEVS_PER_RPC 128
+
+static void
+spdk_rpc_construct_nvme_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_nvme req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_nvme_transport_id trid = {};
+ const char *names[NVME_MAX_BDEVS_PER_RPC];
+ size_t count;
+ size_t i;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_nvme_decoders,
+ SPDK_COUNTOF(rpc_construct_nvme_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ /* Parse trtype */
+ rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, req.trtype);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to parse trtype: %s\n", req.trtype);
+ goto invalid;
+ }
+
+ /* Parse traddr */
+ snprintf(trid.traddr, sizeof(trid.traddr), "%s", req.traddr);
+
+ /* Parse adrfam */
+ if (req.adrfam) {
+ rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, req.adrfam);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to parse adrfam: %s\n", req.adrfam);
+ goto invalid;
+ }
+ }
+
+ /* Parse trsvcid */
+ if (req.trsvcid) {
+ snprintf(trid.trsvcid, sizeof(trid.trsvcid), "%s", req.trsvcid);
+ }
+
+ /* Parse subnqn */
+ if (req.subnqn) {
+ snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", req.subnqn);
+ }
+
+ count = NVME_MAX_BDEVS_PER_RPC;
+ if (spdk_bdev_nvme_create(&trid, req.name, names, &count, req.hostnqn)) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_nvme(&req);
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+ for (i = 0; i < count; i++) {
+ spdk_json_write_string(w, names[i]);
+ }
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+ free_rpc_construct_nvme(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_construct_nvme(&req);
+}
+SPDK_RPC_REGISTER("construct_nvme_bdev", spdk_rpc_construct_nvme_bdev, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_dump_nvme_controller_info(struct spdk_json_write_ctx *w,
+ struct nvme_ctrlr *nvme_ctrlr)
+{
+ struct spdk_nvme_transport_id *trid;
+
+ trid = &nvme_ctrlr->trid;
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "name", nvme_ctrlr->name);
+
+ spdk_json_write_named_object_begin(w, "trid");
+ spdk_bdev_nvme_dump_trid_json(trid, w);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+struct rpc_get_nvme_controllers {
+ char *name;
+};
+
+static void
+free_rpc_get_nvme_controllers(struct rpc_get_nvme_controllers *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_nvme_controllers_decoders[] = {
+ {"name", offsetof(struct rpc_get_nvme_controllers, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_nvme_controllers(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_nvme_controllers req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_ctrlr *ctrlr = NULL;
+
+ if (params && spdk_json_decode_object(params, rpc_get_nvme_controllers_decoders,
+ SPDK_COUNTOF(rpc_get_nvme_controllers_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.name) {
+ ctrlr = spdk_bdev_nvme_lookup_ctrlr(req.name);
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("ctrlr '%s' does not exist\n", req.name);
+ goto invalid;
+ }
+ }
+
+ free_rpc_get_nvme_controllers(&req);
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+
+ if (ctrlr != NULL) {
+ spdk_rpc_dump_nvme_controller_info(w, ctrlr);
+ } else {
+ for (ctrlr = spdk_bdev_nvme_first_ctrlr(); ctrlr; ctrlr = spdk_bdev_nvme_next_ctrlr(ctrlr)) {
+ spdk_rpc_dump_nvme_controller_info(w, ctrlr);
+ }
+ }
+
+ spdk_json_write_array_end(w);
+
+ spdk_jsonrpc_end_result(request, w);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+ free_rpc_get_nvme_controllers(&req);
+}
+SPDK_RPC_REGISTER("get_nvme_controllers", spdk_rpc_get_nvme_controllers, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_nvme {
+ char *name;
+};
+
+static void
+free_rpc_delete_nvme(struct rpc_delete_nvme *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_nvme_decoders[] = {
+ {"name", offsetof(struct rpc_delete_nvme, name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_delete_nvme_ctrlr(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_nvme req = {NULL};
+ struct spdk_json_write_ctx *w;
+ int rc = 0;
+
+ if (spdk_json_decode_object(params, rpc_delete_nvme_decoders,
+ SPDK_COUNTOF(rpc_delete_nvme_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = spdk_bdev_nvme_delete(req.name);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ free_rpc_delete_nvme(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_delete_nvme(&req);
+}
+SPDK_RPC_REGISTER("delete_nvme_controller", spdk_rpc_delete_nvme_ctrlr, SPDK_RPC_RUNTIME)
+
+struct rpc_apply_firmware {
+ char *filename;
+ char *bdev_name;
+};
+
+static void
+free_rpc_apply_firmware(struct rpc_apply_firmware *req)
+{
+ free(req->filename);
+ free(req->bdev_name);
+}
+
+static const struct spdk_json_object_decoder rpc_apply_firmware_decoders[] = {
+ {"filename", offsetof(struct rpc_apply_firmware, filename), spdk_json_decode_string},
+ {"bdev_name", offsetof(struct rpc_apply_firmware, bdev_name), spdk_json_decode_string},
+};
+
+struct firmware_update_info {
+ void *fw_image;
+ void *p;
+ unsigned int size;
+ unsigned int size_remaining;
+ unsigned int offset;
+ unsigned int transfer;
+
+ void *desc;
+ struct spdk_io_channel *ch;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_nvme_ctrlr *ctrlr;
+ open_descriptors_t desc_head;
+ struct rpc_apply_firmware *req;
+};
+
+static void
+apply_firmware_cleanup(void *cb_arg)
+{
+ struct open_descriptors *opt, *tmp;
+ struct firmware_update_info *firm_ctx = cb_arg;
+
+ if (!firm_ctx) {
+ return;
+ }
+
+ if (firm_ctx->fw_image) {
+ spdk_dma_free(firm_ctx->fw_image);
+ }
+
+ if (firm_ctx->req) {
+ free_rpc_apply_firmware(firm_ctx->req);
+ free(firm_ctx->req);
+ }
+ TAILQ_FOREACH_SAFE(opt, &firm_ctx->desc_head, tqlst, tmp) {
+ TAILQ_REMOVE(&firm_ctx->desc_head, opt, tqlst);
+ spdk_bdev_close(opt->desc);
+ free(opt);
+ }
+ free(firm_ctx);
+}
+
+static void
+apply_firmware_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ int rc;
+ struct spdk_json_write_ctx *w;
+ struct firmware_update_info *firm_ctx = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware commit failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((rc = spdk_nvme_ctrlr_reset(firm_ctx->ctrlr)) != 0) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Controller reset failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if (!(w = spdk_jsonrpc_begin_result(firm_ctx->request))) {
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ spdk_json_write_string(w, "firmware commit succeeded. Controller reset in progress.");
+ spdk_jsonrpc_end_result(firm_ctx->request, w);
+ apply_firmware_cleanup(firm_ctx);
+}
+
+static void
+apply_firmware_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_nvme_cmd cmd = {};
+ struct spdk_nvme_fw_commit fw_commit;
+ int slot = 0;
+ int rc;
+ struct firmware_update_info *firm_ctx = cb_arg;
+ enum spdk_nvme_fw_commit_action commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG;
+
+ if (!success) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware download failed .");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->p += firm_ctx->transfer;
+ firm_ctx->offset += firm_ctx->transfer;
+ firm_ctx->size_remaining -= firm_ctx->transfer;
+
+ switch (firm_ctx->size_remaining) {
+ case 0:
+ /* firmware download completed. Commit firmware */
+ memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
+ fw_commit.fs = slot;
+ fw_commit.ca = commit_action;
+
+ cmd.opc = SPDK_NVME_OPC_FIRMWARE_COMMIT;
+ memcpy(&cmd.cdw10, &fw_commit, sizeof(uint32_t));
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, NULL, 0,
+ apply_firmware_complete_reset, firm_ctx);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware commit failed.");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ break;
+ default:
+ firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+ cmd.opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+ cmd.cdw10 = (firm_ctx->transfer >> 2) - 1;
+ cmd.cdw11 = firm_ctx->offset >> 2;
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, firm_ctx->p,
+ firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware download failed.");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ break;
+ }
+}
+
+static void
+spdk_rpc_apply_nvme_firmware(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ int rc;
+ int fd = -1;
+ struct stat fw_stat;
+ struct spdk_nvme_ctrlr *ctrlr;
+ char msg[1024];
+ struct spdk_bdev *bdev;
+ struct spdk_bdev *bdev2;
+ struct open_descriptors *opt;
+ struct spdk_bdev_desc *desc;
+ struct spdk_nvme_cmd *cmd;
+ struct firmware_update_info *firm_ctx;
+
+ firm_ctx = malloc(sizeof(struct firmware_update_info));
+ if (!firm_ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ return;
+ }
+ firm_ctx->fw_image = NULL;
+ TAILQ_INIT(&firm_ctx->desc_head);
+ firm_ctx->request = request;
+
+ firm_ctx->req = malloc(sizeof(struct rpc_apply_firmware));
+ if (!firm_ctx->req) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ free(firm_ctx);
+ return;
+ }
+
+ if (spdk_json_decode_object(params, rpc_apply_firmware_decoders,
+ SPDK_COUNTOF(rpc_apply_firmware_decoders), firm_ctx->req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed.");
+ free(firm_ctx->req);
+ free(firm_ctx);
+ return;
+ }
+
+ if ((bdev = spdk_bdev_get_by_name(firm_ctx->req->bdev_name)) == NULL) {
+ snprintf(msg, sizeof(msg), "bdev %s were not found", firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((ctrlr = spdk_bdev_nvme_get_ctrlr(bdev)) == NULL) {
+ snprintf(msg, sizeof(msg), "Controller information for %s were not found.",
+ firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ firm_ctx->ctrlr = ctrlr;
+
+ for (bdev2 = spdk_bdev_first(); bdev2; bdev2 = spdk_bdev_next(bdev2)) {
+
+ if (spdk_bdev_nvme_get_ctrlr(bdev2) != ctrlr) {
+ continue;
+ }
+
+ if (!(opt = malloc(sizeof(struct open_descriptors)))) {
+ snprintf(msg, sizeof(msg), "Memory allocation error.");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((rc = spdk_bdev_open(bdev2, true, NULL, NULL, &desc)) != 0) {
+ snprintf(msg, sizeof(msg), "Device %s is in use.", firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ free(opt);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ opt->desc = desc;
+ opt->bdev = bdev;
+ TAILQ_INSERT_TAIL(&firm_ctx->desc_head, opt, tqlst);
+ }
+
+ /*
+ * find a descriptor associated with our bdev
+ */
+ firm_ctx->desc = NULL;
+ TAILQ_FOREACH(opt, &firm_ctx->desc_head, tqlst) {
+ if (opt->bdev == bdev) {
+ firm_ctx->desc = opt->desc;
+ break;
+ }
+ }
+
+ if (!firm_ctx->desc) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "No descriptor were found.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->ch = spdk_bdev_get_io_channel(firm_ctx->desc);
+ if (!firm_ctx->ch) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "No channels were found.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ fd = open(firm_ctx->req->filename, O_RDONLY);
+ if (fd < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "open file failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ rc = fstat(fd, &fw_stat);
+ if (rc < 0) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "fstat failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->size = fw_stat.st_size;
+ if (fw_stat.st_size % 4) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Firmware image size is not multiple of 4.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->fw_image = spdk_dma_zmalloc(firm_ctx->size, 4096, NULL);
+ if (!firm_ctx->fw_image) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ firm_ctx->p = firm_ctx->fw_image;
+
+ if (read(fd, firm_ctx->p, firm_ctx->size) != ((ssize_t)(firm_ctx->size))) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Read firmware image failed!");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ close(fd);
+
+ firm_ctx->offset = 0;
+ firm_ctx->size_remaining = firm_ctx->size;
+ firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+
+ cmd = malloc(sizeof(struct spdk_nvme_cmd));
+ if (!cmd) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ memset(cmd, 0, sizeof(struct spdk_nvme_cmd));
+ cmd->opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+ cmd->cdw10 = (firm_ctx->transfer >> 2) - 1;
+ cmd->cdw11 = firm_ctx->offset >> 2;
+
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, cmd, firm_ctx->p,
+ firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+ if (rc) {
+ free(cmd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Read firmware image failed!");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+}
+SPDK_RPC_REGISTER("apply_nvme_firmware", spdk_rpc_apply_nvme_firmware, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/nvme/nvme_rpc.c b/src/spdk/lib/bdev/nvme/nvme_rpc.c
new file mode 100644
index 00000000..b49a7d42
--- /dev/null
+++ b/src/spdk/lib/bdev/nvme/nvme_rpc.c
@@ -0,0 +1,487 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_nvme.h"
+#include "spdk/base64.h"
+
+enum spdk_nvme_rpc_type {
+ NVME_ADMIN_CMD = 1,
+ NVME_IO_CMD,
+};
+
+struct rpc_send_nvme_cmd_req {
+ char *name;
+ int cmd_type;
+ int data_direction;
+ uint32_t timeout_ms;
+ uint32_t data_len;
+ uint32_t md_len;
+
+ struct spdk_nvme_cmd *cmdbuf;
+ char *data;
+ char *md;
+};
+
+struct rpc_send_nvme_cmd_resp {
+ char *cpl_text;
+ char *data_text;
+ char *md_text;
+};
+
+struct rpc_send_nvme_cmd_ctx {
+ struct spdk_jsonrpc_request *jsonrpc_request;
+ struct rpc_send_nvme_cmd_req req;
+ struct rpc_send_nvme_cmd_resp resp;
+ struct nvme_ctrlr *nvme_ctrlr;
+ struct spdk_io_channel *ctrlr_io_ch;
+};
+
+static void
+free_rpc_send_nvme_cmd_ctx(struct rpc_send_nvme_cmd_ctx *ctx)
+{
+ assert(ctx != NULL);
+
+ free(ctx->req.name);
+ free(ctx->req.cmdbuf);
+ spdk_dma_free(ctx->req.data);
+ spdk_dma_free(ctx->req.md);
+ free(ctx->resp.cpl_text);
+ free(ctx->resp.data_text);
+ free(ctx->resp.md_text);
+ free(ctx);
+}
+
+static int
+rpc_send_nvme_cmd_resp_construct(struct rpc_send_nvme_cmd_resp *resp,
+ struct rpc_send_nvme_cmd_req *req,
+ const struct spdk_nvme_cpl *cpl)
+{
+ resp->cpl_text = malloc(spdk_base64_get_encoded_strlen(sizeof(*cpl)) + 1);
+ if (!resp->cpl_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->cpl_text, cpl, sizeof(*cpl));
+
+ if (req->data_direction == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ if (req->data_len) {
+ resp->data_text = malloc(spdk_base64_get_encoded_strlen(req->data_len) + 1);
+ if (!resp->data_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->data_text, req->data, req->data_len);
+ }
+ if (req->md_len) {
+ resp->md_text = malloc(spdk_base64_get_encoded_strlen(req->md_len) + 1);
+ if (!resp->md_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->md_text, req->md, req->md_len);
+ }
+ }
+
+ return 0;
+}
+
+static void
+spdk_rpc_send_nvme_cmd_complete(struct rpc_send_nvme_cmd_ctx *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_jsonrpc_request *request = ctx->jsonrpc_request;
+ struct spdk_json_write_ctx *w;
+ int ret;
+
+ ret = rpc_send_nvme_cmd_resp_construct(&ctx->resp, &ctx->req, cpl);
+ if (ret) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ spdk_strerror(-ret));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ goto out;
+ }
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "cpl", ctx->resp.cpl_text);
+
+ if (ctx->resp.data_text) {
+ spdk_json_write_named_string(w, "data", ctx->resp.data_text);
+ }
+
+ if (ctx->resp.md_text) {
+ spdk_json_write_named_string(w, "metadata", ctx->resp.md_text);
+ }
+
+ spdk_json_write_object_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_send_nvme_cmd_ctx(ctx);
+ return;
+}
+
+static void
+nvme_rpc_bdev_nvme_cb(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct rpc_send_nvme_cmd_ctx *ctx = (struct rpc_send_nvme_cmd_ctx *)ref;
+
+ if (ctx->ctrlr_io_ch) {
+ spdk_put_io_channel(ctx->ctrlr_io_ch);
+ ctx->ctrlr_io_ch = NULL;
+ }
+
+ spdk_rpc_send_nvme_cmd_complete(ctx, cpl);
+}
+
+static int
+nvme_rpc_admin_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t nbytes, uint32_t timeout_ms)
+{
+ struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr;
+ int ret;
+
+ ret = spdk_nvme_ctrlr_cmd_admin_raw(_nvme_ctrlr->ctrlr, cmd, buf,
+ nbytes, nvme_rpc_bdev_nvme_cb, ctx);
+
+ return ret;
+}
+
+static int
+nvme_rpc_io_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t nbytes, void *md_buf, uint32_t md_len,
+ uint32_t timeout_ms)
+{
+ struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr;
+ struct spdk_nvme_qpair *io_qpair;
+ int ret;
+
+ ctx->ctrlr_io_ch = spdk_get_io_channel(_nvme_ctrlr->ctrlr);
+ io_qpair = spdk_bdev_nvme_get_io_qpair(ctx->ctrlr_io_ch);
+
+ ret = spdk_nvme_ctrlr_cmd_io_raw_with_md(_nvme_ctrlr->ctrlr, io_qpair,
+ cmd, buf, nbytes, md_buf, nvme_rpc_bdev_nvme_cb, ctx);
+ if (ret) {
+ spdk_put_io_channel(ctx->ctrlr_io_ch);
+ }
+
+ return ret;
+
+}
+
+static int
+rpc_send_nvme_cmd_exec(struct rpc_send_nvme_cmd_ctx *ctx)
+{
+ struct rpc_send_nvme_cmd_req *req = &ctx->req;
+ int ret = -EINVAL;
+
+ switch (req->cmd_type) {
+ case NVME_ADMIN_CMD:
+ ret = nvme_rpc_admin_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+ req->data_len, req->timeout_ms);
+ break;
+ case NVME_IO_CMD:
+ ret = nvme_rpc_io_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+ req->data_len, req->md, req->md_len, req->timeout_ms);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+rpc_decode_cmd_type(const struct spdk_json_val *val, void *out)
+{
+ int *cmd_type = out;
+
+ if (spdk_json_strequal(val, "admin") == true) {
+ *cmd_type = NVME_ADMIN_CMD;
+ } else if (spdk_json_strequal(val, "io") == true) {
+ *cmd_type = NVME_IO_CMD;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: cmd_type\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+rpc_decode_data_direction(const struct spdk_json_val *val, void *out)
+{
+ int *data_direction = out;
+
+ if (spdk_json_strequal(val, "h2c") == true) {
+ *data_direction = SPDK_NVME_DATA_HOST_TO_CONTROLLER;
+ } else if (spdk_json_strequal(val, "c2h") == true) {
+ *data_direction = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: data_direction\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+rpc_decode_cmdbuf(const struct spdk_json_val *val, void *out)
+{
+ char *text = NULL;
+ size_t text_strlen, raw_len;
+ struct spdk_nvme_cmd *cmdbuf, **_cmdbuf = out;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+
+ text_strlen = strlen(text);
+ raw_len = spdk_base64_get_decoded_len(text_strlen);
+ cmdbuf = malloc(raw_len);
+ if (!cmdbuf) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = spdk_base64_urlsafe_decode(cmdbuf, &raw_len, text);
+ if (rc) {
+ goto out;
+ }
+ if (raw_len != sizeof(*cmdbuf)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ *_cmdbuf = cmdbuf;
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_data(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+ char *text = NULL;
+ size_t text_strlen;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+ text_strlen = strlen(text);
+
+ if (req->data_len) {
+ /* data_len is decoded by param "data_len" */
+ if (req->data_len != spdk_base64_get_decoded_len(text_strlen)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ req->data_len = spdk_base64_get_decoded_len(text_strlen);
+ req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL);
+ if (!req->data) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ rc = spdk_base64_urlsafe_decode(req->data, (size_t *)&req->data_len, text);
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_data_len(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+ uint32_t data_len;
+ int rc;
+
+ rc = spdk_json_decode_uint32(val, &data_len);
+ if (rc) {
+ return rc;
+ }
+
+ if (req->data_len) {
+ /* data_len is decoded by param "data" */
+ if (req->data_len != data_len) {
+ rc = -EINVAL;
+ }
+ } else {
+ req->data_len = data_len;
+ req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL);
+ if (!req->data) {
+ rc = -ENOMEM;
+ }
+ }
+
+ return rc;
+}
+
+static int
+rpc_decode_metadata(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+ char *text = NULL;
+ size_t text_strlen;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return rc = val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+ text_strlen = strlen(text);
+
+ if (req->md_len) {
+ /* md_len is decoded by param "metadata_len" */
+ if (req->md_len != spdk_base64_get_decoded_len(text_strlen)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ req->md_len = spdk_base64_get_decoded_len(text_strlen);
+ req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL);
+ if (!req->md) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ rc = spdk_base64_urlsafe_decode(req->md, (size_t *)&req->md_len, text);
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_metadata_len(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out;
+ uint32_t md_len;
+ int rc;
+
+ rc = spdk_json_decode_uint32(val, &md_len);
+ if (rc) {
+ return rc;
+ }
+
+ if (req->md_len) {
+ /* md_len is decoded by param "metadata" */
+ if (req->md_len != md_len) {
+ rc = -EINVAL;
+ }
+ } else {
+ req->md_len = md_len;
+ req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL);
+ if (!req->md) {
+ rc = -ENOMEM;
+ }
+ }
+
+ return rc;
+}
+
+static const struct spdk_json_object_decoder rpc_send_nvme_cmd_req_decoders[] = {
+ {"name", offsetof(struct rpc_send_nvme_cmd_req, name), spdk_json_decode_string},
+ {"cmd_type", offsetof(struct rpc_send_nvme_cmd_req, cmd_type), rpc_decode_cmd_type},
+ {"data_direction", offsetof(struct rpc_send_nvme_cmd_req, data_direction), rpc_decode_data_direction},
+ {"cmdbuf", offsetof(struct rpc_send_nvme_cmd_req, cmdbuf), rpc_decode_cmdbuf},
+ {"timeout_ms", offsetof(struct rpc_send_nvme_cmd_req, timeout_ms), spdk_json_decode_uint32, true},
+ {"data_len", 0, rpc_decode_data_len, true},
+ {"metadata_len", 0, rpc_decode_metadata_len, true},
+ {"data", 0, rpc_decode_data, true},
+ {"metadata", 0, rpc_decode_metadata, true},
+};
+
+static void
+spdk_rpc_send_nvme_cmd(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_send_nvme_cmd_ctx *ctx;
+ int ret, error_code;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ SPDK_ERRLOG("Failed at Malloc ctx\n");
+ error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+ ret = -ENOMEM;
+ goto invalid;
+ }
+
+ if (spdk_json_decode_object(params, rpc_send_nvme_cmd_req_decoders,
+ SPDK_COUNTOF(rpc_send_nvme_cmd_req_decoders),
+ &ctx->req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ctx->nvme_ctrlr = spdk_bdev_nvme_lookup_ctrlr(ctx->req.name);
+ if (ctx->nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed at device lookup\n");
+ error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ctx->jsonrpc_request = request;
+
+ ret = rpc_send_nvme_cmd_exec(ctx);
+ if (ret < 0) {
+ SPDK_NOTICELOG("Failed at rpc_send_nvme_cmd_exec\n");
+ error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+ goto invalid;
+ }
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, error_code, spdk_strerror(-ret));
+ free_rpc_send_nvme_cmd_ctx(ctx);
+ return;
+}
+SPDK_RPC_REGISTER("send_nvme_cmd", spdk_rpc_send_nvme_cmd, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/part.c b/src/spdk/lib/bdev/part.c
new file mode 100644
index 00000000..0cb4759b
--- /dev/null
+++ b/src/spdk/lib/bdev/part.c
@@ -0,0 +1,373 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Common code for partition-like virtual bdevs.
+ */
+
+#include "spdk/bdev.h"
+#include "spdk/log.h"
+#include "spdk/string.h"
+
+#include "spdk/bdev_module.h"
+
+struct spdk_bdev_part_base {
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ uint32_t ref;
+ uint32_t channel_size;
+ spdk_bdev_part_base_free_fn base_free_fn;
+ void *ctx;
+ bool claimed;
+ struct spdk_bdev_module *module;
+ struct spdk_bdev_fn_table *fn_table;
+ struct bdev_part_tailq *tailq;
+ spdk_io_channel_create_cb ch_create_cb;
+ spdk_io_channel_destroy_cb ch_destroy_cb;
+};
+
+struct spdk_bdev *
+spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->bdev;
+}
+
+struct spdk_bdev_desc *
+spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->desc;
+}
+
+struct bdev_part_tailq *
+spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->tailq;
+}
+
+void *
+spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->ctx;
+}
+
+void
+spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
+{
+ if (base->desc) {
+ spdk_bdev_close(base->desc);
+ base->desc = NULL;
+ }
+
+ if (base->base_free_fn != NULL) {
+ base->base_free_fn(base->ctx);
+ }
+
+ free(base);
+}
+
+static void
+spdk_bdev_part_free_cb(void *io_device)
+{
+ struct spdk_bdev_part *part = io_device;
+ struct spdk_bdev_part_base *base;
+
+ assert(part);
+ assert(part->internal.base);
+
+ base = part->internal.base;
+
+ TAILQ_REMOVE(base->tailq, part, tailq);
+
+ if (__sync_sub_and_fetch(&base->ref, 1) == 0) {
+ spdk_bdev_module_release_bdev(base->bdev);
+ spdk_bdev_part_base_free(base);
+ }
+
+ spdk_bdev_destruct_done(&part->internal.bdev, 0);
+ free(part->internal.bdev.name);
+ free(part->internal.bdev.product_name);
+ free(part);
+}
+
+int
+spdk_bdev_part_free(struct spdk_bdev_part *part)
+{
+ spdk_io_device_unregister(part, spdk_bdev_part_free_cb);
+
+ /* Return 1 to indicate that this is an asynchronous operation that isn't complete
+ * until spdk_bdev_destruct_done is called */
+ return 1;
+}
+
+void
+spdk_bdev_part_base_hotremove(struct spdk_bdev *base_bdev, struct bdev_part_tailq *tailq)
+{
+ struct spdk_bdev_part *part, *tmp;
+
+ TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
+ if (part->internal.base->bdev == base_bdev) {
+ spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
+ }
+ }
+}
+
+static bool
+spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
+{
+ struct spdk_bdev_part *part = _part;
+
+ return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
+ io_type);
+}
+
+static struct spdk_io_channel *
+spdk_bdev_part_get_io_channel(void *_part)
+{
+ struct spdk_bdev_part *part = _part;
+
+ return spdk_get_io_channel(part);
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
+{
+ return &part->internal.bdev;
+}
+
+struct spdk_bdev_part_base *
+spdk_bdev_part_get_base(struct spdk_bdev_part *part)
+{
+ return part->internal.base;
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
+{
+ return part->internal.base->bdev;
+}
+
+uint64_t
+spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
+{
+ return part->internal.offset_blocks;
+}
+
+static void
+spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *part_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+ spdk_bdev_io_complete(part_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+int
+spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev_part *part = ch->part;
+ struct spdk_io_channel *base_ch = ch->base_ch;
+ struct spdk_bdev_desc *base_desc = part->internal.base->desc;
+ uint64_t offset;
+ int rc = 0;
+
+ /* Modify the I/O to adjust for the offset within the base bdev. */
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+ rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, offset,
+ bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+ rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, offset,
+ bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+ rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+ spdk_bdev_part_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+ rc = spdk_bdev_unmap_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+ spdk_bdev_part_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
+ rc = spdk_bdev_flush_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
+ spdk_bdev_part_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ rc = spdk_bdev_reset(base_desc, base_ch,
+ spdk_bdev_part_complete_io, bdev_io);
+ break;
+ default:
+ SPDK_ERRLOG("split: unknown I/O type %d\n", bdev_io->type);
+ return SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ return rc;
+}
+
+static int
+spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+ struct spdk_bdev_part_channel *ch = ctx_buf;
+
+ ch->part = part;
+ ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
+ if (ch->base_ch == NULL) {
+ return -1;
+ }
+
+ if (part->internal.base->ch_create_cb) {
+ return part->internal.base->ch_create_cb(io_device, ctx_buf);
+ } else {
+ return 0;
+ }
+}
+
+static void
+spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+ struct spdk_bdev_part_channel *ch = ctx_buf;
+
+ if (part->internal.base->ch_destroy_cb) {
+ part->internal.base->ch_destroy_cb(io_device, ctx_buf);
+ }
+ spdk_put_io_channel(ch->base_ch);
+}
+
+struct spdk_bdev_part_base *
+ spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
+ spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
+ struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
+ spdk_bdev_part_base_free_fn free_fn, void *ctx,
+ uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
+ spdk_io_channel_destroy_cb ch_destroy_cb)
+{
+ int rc;
+ struct spdk_bdev_part_base *base;
+
+ base = calloc(1, sizeof(*base));
+ if (!base) {
+ SPDK_ERRLOG("Memory allocation failure\n");
+ return NULL;
+ }
+ fn_table->get_io_channel = spdk_bdev_part_get_io_channel;
+ fn_table->io_type_supported = spdk_bdev_part_io_type_supported;
+
+ base->bdev = bdev;
+ base->desc = NULL;
+ base->ref = 0;
+ base->module = module;
+ base->fn_table = fn_table;
+ base->tailq = tailq;
+ base->base_free_fn = free_fn;
+ base->ctx = ctx;
+ base->claimed = false;
+ base->channel_size = channel_size;
+ base->ch_create_cb = ch_create_cb;
+ base->ch_destroy_cb = ch_destroy_cb;
+
+ rc = spdk_bdev_open(bdev, false, remove_cb, bdev, &base->desc);
+ if (rc) {
+ spdk_bdev_part_base_free(base);
+ SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+ return NULL;
+ }
+
+ return base;
+}
+
+int
+spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
+ char *name, uint64_t offset_blocks, uint64_t num_blocks,
+ char *product_name)
+{
+ part->internal.bdev.blocklen = base->bdev->blocklen;
+ part->internal.bdev.blockcnt = num_blocks;
+ part->internal.offset_blocks = offset_blocks;
+
+ part->internal.bdev.write_cache = base->bdev->write_cache;
+ part->internal.bdev.need_aligned_buffer = base->bdev->need_aligned_buffer;
+ part->internal.bdev.ctxt = part;
+ part->internal.bdev.module = base->module;
+ part->internal.bdev.fn_table = base->fn_table;
+
+ part->internal.bdev.name = strdup(name);
+ part->internal.bdev.product_name = strdup(product_name);
+
+ if (part->internal.bdev.name == NULL) {
+ SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
+ return -1;
+ } else if (part->internal.bdev.product_name == NULL) {
+ free(part->internal.bdev.name);
+ SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
+ spdk_bdev_get_name(base->bdev));
+ return -1;
+ }
+
+ __sync_fetch_and_add(&base->ref, 1);
+ part->internal.base = base;
+
+ if (!base->claimed) {
+ int rc;
+
+ rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
+ if (rc) {
+ SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
+ free(part->internal.bdev.name);
+ free(part->internal.bdev.product_name);
+ return -1;
+ }
+ base->claimed = true;
+ }
+
+ spdk_io_device_register(part, spdk_bdev_part_channel_create_cb,
+ spdk_bdev_part_channel_destroy_cb,
+ base->channel_size,
+ name);
+
+ spdk_vbdev_register(&part->internal.bdev, &base->bdev, 1);
+ TAILQ_INSERT_TAIL(base->tailq, part, tailq);
+
+ return 0;
+}
diff --git a/src/spdk/lib/bdev/passthru/Makefile b/src/spdk/lib/bdev/passthru/Makefile
new file mode 100644
index 00000000..5a2a383a
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/Makefile
@@ -0,0 +1,42 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+
+C_SRCS = vbdev_passthru.c vbdev_passthru_rpc.c
+LIBNAME = vbdev_passthru
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru.c b/src/spdk/lib/bdev/passthru/vbdev_passthru.c
new file mode 100644
index 00000000..4e3dacfc
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru.c
@@ -0,0 +1,671 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a simple example of a virtual block device module that passes IO
+ * down to a bdev (or bdevs) that its configured to attach to.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "vbdev_passthru.h"
+#include "spdk/rpc.h"
+#include "spdk/env.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+
+static int vbdev_passthru_init(void);
+static void vbdev_passthru_get_spdk_running_config(FILE *fp);
+static int vbdev_passthru_get_ctx_size(void);
+static void vbdev_passthru_examine(struct spdk_bdev *bdev);
+static void vbdev_passthru_finish(void);
+
+static struct spdk_bdev_module passthru_if = {
+ .name = "passthru",
+ .module_init = vbdev_passthru_init,
+ .config_text = vbdev_passthru_get_spdk_running_config,
+ .get_ctx_size = vbdev_passthru_get_ctx_size,
+ .examine_config = vbdev_passthru_examine,
+ .module_fini = vbdev_passthru_finish
+};
+
+SPDK_BDEV_MODULE_REGISTER(&passthru_if)
+
+/* List of pt_bdev names and their base bdevs via configuration file.
+ * Used so we can parse the conf once at init and use this list in examine().
+ */
+struct bdev_names {
+ char *vbdev_name;
+ char *bdev_name;
+ TAILQ_ENTRY(bdev_names) link;
+};
+static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
+
+/* List of virtual bdevs and associated info for each. */
+struct vbdev_passthru {
+ struct spdk_bdev *base_bdev; /* the thing we're attaching to */
+ struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */
+ struct spdk_bdev pt_bdev; /* the PT virtual bdev */
+ TAILQ_ENTRY(vbdev_passthru) link;
+};
+static TAILQ_HEAD(, vbdev_passthru) g_pt_nodes = TAILQ_HEAD_INITIALIZER(g_pt_nodes);
+
+/* The pt vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
+ * If this vbdev needed to implement a poller or a queue for IO, this is where those things
+ * would be defined. This passthru bdev doesn't actually need to allocate a channel, it could
+ * simply pass back the channel of the bdev underneath it but for example purposes we will
+ * present its own to the upper layers.
+ */
+struct pt_io_channel {
+ struct spdk_io_channel *base_ch; /* IO channel of base device */
+};
+
+/* Just for fun, this pt_bdev module doesn't need it but this is essentially a per IO
+ * context that we get handed by the bdev layer.
+ */
+struct passthru_bdev_io {
+ uint8_t test;
+
+ /* bdev related */
+ struct spdk_io_channel *ch;
+
+ /* for bdev_io_wait */
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static void
+vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
+
+/* Called after we've unregistered following a hot remove callback.
+ * Our finish entry point will be called next.
+ */
+static int
+vbdev_passthru_destruct(void *ctx)
+{
+ struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+ /* Unclaim the underlying bdev. */
+ spdk_bdev_module_release_bdev(pt_node->base_bdev);
+
+ /* Close the underlying bdev. */
+ spdk_bdev_close(pt_node->base_desc);
+
+ /* Done with this pt_node. */
+ TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+ free(pt_node->pt_bdev.name);
+ free(pt_node);
+ return 0;
+}
+
+/* Completion callback for IO that were issued from this bdev. The original bdev_io
+ * is passed in as an arg so we'll complete that one with the appropriate status
+ * and then free the one that this module issued.
+ */
+static void
+_pt_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *orig_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+ struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)orig_io->driver_ctx;
+
+ /* We setup this value in the submission routine, just showing here that it is
+ * passed back to us.
+ */
+ if (io_ctx->test != 0x5a) {
+ SPDK_ERRLOG("Error, original IO device_ctx is wrong! 0x%x\n",
+ io_ctx->test);
+ }
+
+ /* Complete the original IO and then free the one that we created here
+ * as a result of issuing an IO via submit_reqeust.
+ */
+ spdk_bdev_io_complete(orig_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+vbdev_passthru_resubmit_io(void *arg)
+{
+ struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
+ struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+
+ vbdev_passthru_submit_request(io_ctx->ch, bdev_io);
+}
+
+static void
+vbdev_passthru_queue_io(struct spdk_bdev_io *bdev_io)
+{
+ struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+ int rc;
+
+ io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
+ io_ctx->bdev_io_wait.cb_fn = vbdev_passthru_resubmit_io;
+ io_ctx->bdev_io_wait.cb_arg = bdev_io;
+
+ rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->ch, &io_ctx->bdev_io_wait);
+ if (rc != 0) {
+ SPDK_ERRLOG("Queue io failed in vbdev_passthru_queue_io, rc=%d.\n", rc);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+/* Callback for getting a buf from the bdev pool in the event that the caller passed
+ * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
+ * beneath us before we're done with it. That won't happen in this example but it could
+ * if this example were used as a template for something more complex.
+ */
+static void
+pt_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru,
+ pt_bdev);
+ struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
+
+ spdk_bdev_readv_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks, _pt_complete_io,
+ bdev_io);
+}
+
+/* Called when someone above submits IO to this pt vbdev. We're simply passing it on here
+ * via SPDK IO calls which in turn allocate another bdev IO and call our cpl callback provided
+ * below along with the original bdiv_io so that we can complete it once this IO completes.
+ */
+static void
+vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru, pt_bdev);
+ struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
+ struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
+ int rc = 0;
+
+ /* Setup a per IO context value; we don't do anything with it in the vbdev other
+ * than confirm we get the same thing back in the completion callback just to
+ * demonstrate.
+ */
+ io_ctx->test = 0x5a;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, pt_read_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ rc = spdk_bdev_writev_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks, _pt_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ rc = spdk_bdev_write_zeroes_blocks(pt_node->base_desc, pt_ch->base_ch,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ _pt_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ rc = spdk_bdev_unmap_blocks(pt_node->base_desc, pt_ch->base_ch,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ _pt_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ rc = spdk_bdev_flush_blocks(pt_node->base_desc, pt_ch->base_ch,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ _pt_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ rc = spdk_bdev_reset(pt_node->base_desc, pt_ch->base_ch,
+ _pt_complete_io, bdev_io);
+ break;
+ default:
+ SPDK_ERRLOG("passthru: unknown I/O type %d\n", bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+ if (rc != 0) {
+ if (rc == -ENOMEM) {
+ SPDK_ERRLOG("No memory, start to queue io for passthru.\n");
+ io_ctx->ch = ch;
+ vbdev_passthru_queue_io(bdev_io);
+ } else {
+ SPDK_ERRLOG("ERROR on bdev_io submission!\n");
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+/* We'll just call the base bdev and let it answer however if we were more
+ * restrictive for some reason (or less) we could get the response back
+ * and modify according to our purposes.
+ */
+static bool
+vbdev_passthru_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+ return spdk_bdev_io_type_supported(pt_node->base_bdev, io_type);
+}
+
+/* We supplied this as an entry point for upper layers who want to communicate to this
+ * bdev. This is how they get a channel. We are passed the same context we provided when
+ * we created our PT vbdev in examine() which, for this bdev, is the address of one of
+ * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
+ * struct and we'll keep it in our PT node.
+ */
+static struct spdk_io_channel *
+vbdev_passthru_get_io_channel(void *ctx)
+{
+ struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+ struct spdk_io_channel *pt_ch = NULL;
+
+ /* The IO channel code will allocate a channel for us which consists of
+ * the SPDK channel structure plus the size of our pt_io_channel struct
+ * that we passed in when we registered our IO device. It will then call
+ * our channel create callback to populate any elements that we need to
+ * update.
+ */
+ pt_ch = spdk_get_io_channel(pt_node);
+
+ return pt_ch;
+}
+
+static int
+vbdev_passthru_info_config_json(void *ctx, struct spdk_json_write_ctx *write_ctx)
+{
+ struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
+
+ /* This is the output for get_bdevs() for this vbdev */
+ spdk_json_write_name(write_ctx, "passthru");
+ spdk_json_write_object_begin(write_ctx);
+
+ spdk_json_write_name(write_ctx, "pt_bdev_name");
+ spdk_json_write_string(write_ctx, spdk_bdev_get_name(&pt_node->pt_bdev));
+
+ spdk_json_write_name(write_ctx, "base_bdev_name");
+ spdk_json_write_string(write_ctx, spdk_bdev_get_name(pt_node->base_bdev));
+
+ spdk_json_write_object_end(write_ctx);
+
+ return 0;
+}
+
+/* We provide this callback for the SPDK channel code to create a channel using
+ * the channel struct we provided in our module get_io_channel() entry point. Here
+ * we get and save off an underlying base channel of the device below us so that
+ * we can communicate with the base bdev on a per channel basis. If we needed
+ * our own poller for this vbdev, we'd register it here.
+ */
+static int
+pt_bdev_ch_create_cb(void *io_device, void *ctx_buf)
+{
+ struct pt_io_channel *pt_ch = ctx_buf;
+ struct vbdev_passthru *pt_node = io_device;
+
+ pt_ch->base_ch = spdk_bdev_get_io_channel(pt_node->base_desc);
+
+ return 0;
+}
+
+/* We provide this callback for the SPDK channel code to destroy a channel
+ * created with our create callback. We just need to undo anything we did
+ * when we created. If this bdev used its own poller, we'd unregsiter it here.
+ */
+static void
+pt_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct pt_io_channel *pt_ch = ctx_buf;
+
+ spdk_put_io_channel(pt_ch->base_ch);
+}
+
+/* Create the passthru association from the bdev and vbdev name and insert
+ * on the global list. */
+static int
+vbdev_passthru_insert_name(const char *bdev_name, const char *vbdev_name)
+{
+ struct bdev_names *name;
+
+ name = calloc(1, sizeof(struct bdev_names));
+ if (!name) {
+ SPDK_ERRLOG("could not allocate bdev_names\n");
+ return -ENOMEM;
+ }
+
+ name->bdev_name = strdup(bdev_name);
+ if (!name->bdev_name) {
+ SPDK_ERRLOG("could not allocate name->bdev_name\n");
+ free(name);
+ return -ENOMEM;
+ }
+
+ name->vbdev_name = strdup(vbdev_name);
+ if (!name->vbdev_name) {
+ SPDK_ERRLOG("could not allocate name->vbdev_name\n");
+ free(name->bdev_name);
+ free(name);
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
+
+ return 0;
+}
+
+/* On init, just parse config file and build list of pt vbdevs and bdev name pairs. */
+static int
+vbdev_passthru_init(void)
+{
+ struct spdk_conf_section *sp = NULL;
+ const char *conf_bdev_name = NULL;
+ const char *conf_vbdev_name = NULL;
+ struct bdev_names *name;
+ int i, rc;
+
+ sp = spdk_conf_find_section(NULL, "Passthru");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ for (i = 0; ; i++) {
+ if (!spdk_conf_section_get_nval(sp, "PT", i)) {
+ break;
+ }
+
+ conf_bdev_name = spdk_conf_section_get_nmval(sp, "PT", i, 0);
+ if (!conf_bdev_name) {
+ SPDK_ERRLOG("Passthru configuration missing bdev name\n");
+ break;
+ }
+
+ conf_vbdev_name = spdk_conf_section_get_nmval(sp, "PT", i, 1);
+ if (!conf_vbdev_name) {
+ SPDK_ERRLOG("Passthru configuration missing pt_bdev name\n");
+ break;
+ }
+
+ rc = vbdev_passthru_insert_name(conf_bdev_name, conf_vbdev_name);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+ TAILQ_FOREACH(name, &g_bdev_names, link) {
+ SPDK_NOTICELOG("conf parse matched: %s\n", name->bdev_name);
+ }
+ return 0;
+}
+
+/* Called when the entire module is being torn down. */
+static void
+vbdev_passthru_finish(void)
+{
+ struct bdev_names *name;
+
+ while ((name = TAILQ_FIRST(&g_bdev_names))) {
+ TAILQ_REMOVE(&g_bdev_names, name, link);
+ free(name->bdev_name);
+ free(name->vbdev_name);
+ free(name);
+ }
+}
+
+/* During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_passthru_get_ctx_size(void)
+{
+ return sizeof(struct passthru_bdev_io);
+}
+
+/* Called when SPDK wants to save the current config of this vbdev module to
+ * a file.
+ */
+static void
+vbdev_passthru_get_spdk_running_config(FILE *fp)
+{
+ struct bdev_names *names = NULL;
+
+ fprintf(fp, "\n[Passthru]\n");
+ TAILQ_FOREACH(names, &g_bdev_names, link) {
+ fprintf(fp, " PT %s %s\n", names->bdev_name, names->vbdev_name);
+ }
+ fprintf(fp, "\n");
+}
+
+/* Called when SPDK wants to output the bdev specific methods. */
+static void
+vbdev_passthru_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev, struct vbdev_passthru, pt_bdev);
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_passthru_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(pt_node->base_bdev));
+ spdk_json_write_named_string(w, "passthru_bdev_name", spdk_bdev_get_name(bdev));
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+/* When we register our bdev this is how we specify our entry points. */
+static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = {
+ .destruct = vbdev_passthru_destruct,
+ .submit_request = vbdev_passthru_submit_request,
+ .io_type_supported = vbdev_passthru_io_type_supported,
+ .get_io_channel = vbdev_passthru_get_io_channel,
+ .dump_info_json = vbdev_passthru_info_config_json,
+ .write_config_json = vbdev_passthru_write_json_config,
+};
+
+/* Called when the underlying base bdev goes away. */
+static void
+vbdev_passthru_base_bdev_hotremove_cb(void *ctx)
+{
+ struct vbdev_passthru *pt_node, *tmp;
+ struct spdk_bdev *bdev_find = ctx;
+
+ TAILQ_FOREACH_SAFE(pt_node, &g_pt_nodes, link, tmp) {
+ if (bdev_find == pt_node->base_bdev) {
+ spdk_bdev_unregister(&pt_node->pt_bdev, NULL, NULL);
+ }
+ }
+}
+
+/* Create and register the passthru vbdev if we find it in our list of bdev names.
+ * This can be called either by the examine path or RPC method.
+ */
+static void
+vbdev_passthru_register(struct spdk_bdev *bdev)
+{
+ struct bdev_names *name;
+ struct vbdev_passthru *pt_node;
+ int rc;
+
+ /* Check our list of names from config versus this bdev and if
+ * there's a match, create the pt_node & bdev accordingly.
+ */
+ TAILQ_FOREACH(name, &g_bdev_names, link) {
+ if (strcmp(name->bdev_name, bdev->name) != 0) {
+ continue;
+ }
+
+ SPDK_NOTICELOG("Match on %s\n", bdev->name);
+ pt_node = calloc(1, sizeof(struct vbdev_passthru));
+ if (!pt_node) {
+ SPDK_ERRLOG("could not allocate pt_node\n");
+ break;
+ }
+
+ /* The base bdev that we're attaching to. */
+ pt_node->base_bdev = bdev;
+ pt_node->pt_bdev.name = strdup(name->vbdev_name);
+ if (!pt_node->pt_bdev.name) {
+ SPDK_ERRLOG("could not allocate pt_bdev name\n");
+ free(pt_node);
+ break;
+ }
+ pt_node->pt_bdev.product_name = "passthru";
+
+ /* Copy some properties from the underlying base bdev. */
+ pt_node->pt_bdev.write_cache = bdev->write_cache;
+ pt_node->pt_bdev.need_aligned_buffer = bdev->need_aligned_buffer;
+ pt_node->pt_bdev.optimal_io_boundary = bdev->optimal_io_boundary;
+ pt_node->pt_bdev.blocklen = bdev->blocklen;
+ pt_node->pt_bdev.blockcnt = bdev->blockcnt;
+
+ /* This is the context that is passed to us when the bdev
+ * layer calls in so we'll save our pt_bdev node here.
+ */
+ pt_node->pt_bdev.ctxt = pt_node;
+ pt_node->pt_bdev.fn_table = &vbdev_passthru_fn_table;
+ pt_node->pt_bdev.module = &passthru_if;
+ TAILQ_INSERT_TAIL(&g_pt_nodes, pt_node, link);
+
+ spdk_io_device_register(pt_node, pt_bdev_ch_create_cb, pt_bdev_ch_destroy_cb,
+ sizeof(struct pt_io_channel),
+ name->bdev_name);
+ SPDK_NOTICELOG("io_device created at: 0x%p\n", pt_node);
+
+ rc = spdk_bdev_open(bdev, true, vbdev_passthru_base_bdev_hotremove_cb,
+ bdev, &pt_node->base_desc);
+ if (rc) {
+ SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
+ TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+ free(pt_node->pt_bdev.name);
+ free(pt_node);
+ break;
+ }
+ SPDK_NOTICELOG("bdev opened\n");
+
+ rc = spdk_bdev_module_claim_bdev(bdev, pt_node->base_desc, pt_node->pt_bdev.module);
+ if (rc) {
+ SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
+ spdk_bdev_close(pt_node->base_desc);
+ TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+ free(pt_node->pt_bdev.name);
+ free(pt_node);
+ break;
+ }
+ SPDK_NOTICELOG("bdev claimed\n");
+
+ rc = spdk_vbdev_register(&pt_node->pt_bdev, &bdev, 1);
+ if (rc) {
+ SPDK_ERRLOG("could not register pt_bdev\n");
+ spdk_bdev_close(pt_node->base_desc);
+ TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
+ free(pt_node->pt_bdev.name);
+ free(pt_node);
+ break;
+ }
+ SPDK_NOTICELOG("pt_bdev registered\n");
+ SPDK_NOTICELOG("created pt_bdev for: %s\n", name->vbdev_name);
+ }
+}
+
+/* Create the passthru disk from the given bdev and vbdev name. */
+int
+create_passthru_disk(const char *bdev_name, const char *vbdev_name)
+{
+ struct spdk_bdev *bdev = NULL;
+ int rc = 0;
+
+ bdev = spdk_bdev_get_by_name(bdev_name);
+ if (!bdev) {
+ return -1;
+ }
+
+ rc = vbdev_passthru_insert_name(bdev_name, vbdev_name);
+ if (rc != 0) {
+ return rc;
+ }
+
+ vbdev_passthru_register(bdev);
+
+ return 0;
+}
+
+void
+delete_passthru_disk(struct spdk_bdev *bdev, spdk_delete_passthru_complete cb_fn, void *cb_arg)
+{
+ struct bdev_names *name;
+
+ if (!bdev || bdev->module != &passthru_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ /* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
+ * vbdev does not get re-created if the same bdev is constructed at some other time,
+ * unless the underlying bdev was hot-removed.
+ */
+ TAILQ_FOREACH(name, &g_bdev_names, link) {
+ if (strcmp(name->vbdev_name, bdev->name) == 0) {
+ TAILQ_REMOVE(&g_bdev_names, name, link);
+ free(name->bdev_name);
+ free(name->vbdev_name);
+ free(name);
+ break;
+ }
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+/* Because we specified this function in our pt bdev function table when we
+ * registered our pt bdev, we'll get this call anytime a new bdev shows up.
+ * Here we need to decide if we care about it and if so what to do. We
+ * parsed the config file at init so we check the new bdev against the list
+ * we built up at that time and if the user configured us to attach to this
+ * bdev, here's where we do it.
+ */
+static void
+vbdev_passthru_examine(struct spdk_bdev *bdev)
+{
+ vbdev_passthru_register(bdev);
+
+ spdk_bdev_module_examine_done(&passthru_if);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_passthru", SPDK_LOG_VBDEV_PASSTHRU)
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru.h b/src/spdk/lib/bdev/passthru/vbdev_passthru.h
new file mode 100644
index 00000000..5705c4ed
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru.h
@@ -0,0 +1,62 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_PASSTHRU_H
+#define SPDK_VBDEV_PASSTHRU_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_passthru_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new pass through bdev.
+ *
+ * \param bdev_name Bdev on which pass through vbdev will be created.
+ * \param vbdev_name Vbdev name.
+ * \return 0 on success, other on failure.
+ */
+int create_passthru_disk(const char *bdev_name, const char *vbdev_name);
+
+/**
+ * Delete passthru bdev.
+ *
+ * \param bdev Pointer to pass through bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void delete_passthru_disk(struct spdk_bdev *bdev, spdk_delete_passthru_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_VBDEV_PASSTHRU_H */
diff --git a/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c b/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c
new file mode 100644
index 00000000..9f0f9521
--- /dev/null
+++ b/src/spdk/lib/bdev/passthru/vbdev_passthru_rpc.c
@@ -0,0 +1,160 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vbdev_passthru.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+/* Structure to hold the parameters for this RPC method. */
+struct rpc_construct_passthru {
+ char *base_bdev_name;
+ char *passthru_bdev_name;
+};
+
+/* Free the allocated memory resource after the RPC handling. */
+static void
+free_rpc_construct_passthru(struct rpc_construct_passthru *r)
+{
+ free(r->base_bdev_name);
+ free(r->passthru_bdev_name);
+}
+
+/* Structure to decode the input parameters for this RPC method. */
+static const struct spdk_json_object_decoder rpc_construct_passthru_decoders[] = {
+ {"base_bdev_name", offsetof(struct rpc_construct_passthru, base_bdev_name), spdk_json_decode_string},
+ {"passthru_bdev_name", offsetof(struct rpc_construct_passthru, passthru_bdev_name), spdk_json_decode_string},
+};
+
+/* Decode the parameters for this RPC method and properly construct the passthru
+ * device. Error status returned in the failed cases.
+ */
+static void
+spdk_rpc_construct_passthru_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_passthru req = {NULL};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_passthru_decoders,
+ SPDK_COUNTOF(rpc_construct_passthru_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_PASSTHRU, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ rc = create_passthru_disk(req.base_bdev_name, req.passthru_bdev_name);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_passthru(&req);
+ return;
+ }
+
+ spdk_json_write_string(w, req.passthru_bdev_name);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_construct_passthru(&req);
+ return;
+
+invalid:
+ free_rpc_construct_passthru(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+}
+SPDK_RPC_REGISTER("construct_passthru_bdev", spdk_rpc_construct_passthru_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_passthru {
+ char *name;
+};
+
+static void
+free_rpc_delete_passthru(struct rpc_delete_passthru *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_passthru_decoders[] = {
+ {"name", offsetof(struct rpc_delete_passthru, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_passthru_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_passthru_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_passthru req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_passthru_decoders,
+ SPDK_COUNTOF(rpc_delete_passthru_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ delete_passthru_disk(bdev, _spdk_rpc_delete_passthru_bdev_cb, request);
+
+ free_rpc_delete_passthru(&req);
+
+ return;
+
+invalid:
+ free_rpc_delete_passthru(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_passthru_bdev", spdk_rpc_delete_passthru_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/pmem/Makefile b/src/spdk/lib/bdev/pmem/Makefile
new file mode 100644
index 00000000..19f0da8c
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_pmem.c bdev_pmem_rpc.c
+LIBNAME = bdev_pmem
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem.c b/src/spdk/lib/bdev/pmem/bdev_pmem.c
new file mode 100644
index 00000000..9238e085
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem.c
@@ -0,0 +1,465 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/conf.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+#include "spdk/rpc.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_pmem.h"
+#include "libpmemblk.h"
+
+struct pmem_disk {
+ struct spdk_bdev disk;
+ PMEMblkpool *pool;
+ char pmem_file[NAME_MAX];
+ TAILQ_ENTRY(pmem_disk) tailq;
+};
+
+static TAILQ_HEAD(, pmem_disk) g_pmem_disks = TAILQ_HEAD_INITIALIZER(g_pmem_disks);
+
+static int bdev_pmem_initialize(void);
+static void bdev_pmem_finish(void);
+
+static struct spdk_bdev_module pmem_if = {
+ .name = "pmem",
+ .module_init = bdev_pmem_initialize,
+ .module_fini = bdev_pmem_finish,
+ .async_fini = true,
+
+};
+
+SPDK_BDEV_MODULE_REGISTER(&pmem_if)
+
+typedef int(*spdk_bdev_pmem_io_request)(PMEMblkpool *pbp, void *buf, long long blockno);
+
+static int
+_bdev_pmem_submit_io_read(PMEMblkpool *pbp, void *buf, long long blockno)
+{
+ return pmemblk_read(pbp, buf, blockno);
+}
+
+static int
+_bdev_pmem_submit_io_write(PMEMblkpool *pbp, void *buf, long long blockno)
+{
+ return pmemblk_write(pbp, buf, blockno);
+}
+
+static int
+bdev_pmem_destruct(void *ctx)
+{
+ struct pmem_disk *pdisk = ctx;
+
+ TAILQ_REMOVE(&g_pmem_disks, pdisk, tailq);
+ free(pdisk->disk.name);
+ pmemblk_close(pdisk->pool);
+ free(pdisk);
+
+ return 0;
+}
+
+static int
+bdev_pmem_check_iov_len(struct iovec *iovs, int iovcnt, size_t num_blocks, uint32_t block_size)
+{
+ size_t nbytes = num_blocks * block_size;
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ if (spdk_unlikely(iovs[i].iov_base == NULL && iovs[i].iov_len != 0)) {
+ return -1;
+ }
+
+ if (nbytes <= iovs[i].iov_len) {
+ return 0;
+ }
+
+ if (spdk_unlikely(iovs[i].iov_len % block_size != 0)) {
+ return -1;
+ }
+
+ nbytes -= iovs[i].iov_len;
+ }
+
+ return -1;
+}
+
+static void
+bdev_pmem_submit_io(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
+ struct spdk_io_channel *ch,
+ struct iovec *iov, int iovcnt,
+ uint64_t offset_blocks, size_t num_blocks, uint32_t block_size,
+ spdk_bdev_pmem_io_request fn)
+{
+ int rc;
+ size_t nbytes, offset, len;
+ enum spdk_bdev_io_status status;
+
+ rc = bdev_pmem_check_iov_len(iov, iovcnt, num_blocks, block_size);
+ if (rc) {
+ status = SPDK_BDEV_IO_STATUS_FAILED;
+ goto end;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "io %lu bytes from offset %#lx\n",
+ num_blocks, offset_blocks);
+
+ for (nbytes = num_blocks * block_size; nbytes > 0; iov++) {
+ len = spdk_min(iov->iov_len, nbytes);
+ nbytes -= len;
+
+ offset = 0;
+ while (offset != len) {
+ rc = fn(pdisk->pool, iov->iov_base + offset, offset_blocks);
+ if (rc != 0) {
+ SPDK_ERRLOG("pmemblk io failed: %d (%s)\n", errno, pmemblk_errormsg());
+ status = SPDK_BDEV_IO_STATUS_FAILED;
+ goto end;
+ }
+
+ offset += block_size;
+ offset_blocks++;
+ }
+ }
+
+ assert(num_blocks == offset_blocks - bdev_io->u.bdev.offset_blocks);
+ status = SPDK_BDEV_IO_STATUS_SUCCESS;
+end:
+
+ spdk_bdev_io_complete(bdev_io, status);
+}
+
+static void
+bdev_pmem_write_zeros(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
+ struct spdk_io_channel *ch, uint64_t offset_blocks,
+ uint64_t num_blocks, uint32_t block_size)
+{
+ int rc;
+ enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ while (num_blocks > 0) {
+ rc = pmemblk_set_zero(pdisk->pool, offset_blocks);
+ if (rc != 0) {
+ SPDK_ERRLOG("pmemblk_set_zero failed: %d (%s)\n", errno, pmemblk_errormsg());
+ status = SPDK_BDEV_IO_STATUS_FAILED;
+ break;
+ }
+ offset_blocks++;
+ num_blocks--;
+ }
+ spdk_bdev_io_complete(bdev_io, status);
+}
+
+static void
+bdev_pmem_io_get_buf_cb(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
+{
+ bdev_pmem_submit_io(bdev_io,
+ bdev_io->bdev->ctxt,
+ channel,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->bdev->blocklen,
+ _bdev_pmem_submit_io_read);
+}
+
+static void
+bdev_pmem_submit_request(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_pmem_io_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_pmem_submit_io(bdev_io,
+ bdev_io->bdev->ctxt,
+ channel,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->bdev->blocklen,
+ _bdev_pmem_submit_io_write);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ bdev_pmem_write_zeros(bdev_io,
+ bdev_io->bdev->ctxt,
+ channel,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->bdev->blocklen);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ break;
+ default:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_pmem_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_pmem_get_io_channel(void *ctx)
+{
+ return spdk_get_io_channel(&g_pmem_disks);
+}
+
+static int
+bdev_pmem_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct pmem_disk *pdisk = ctx;
+
+ spdk_json_write_name(w, "pmem");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_name(w, "pmem_file");
+ spdk_json_write_string(w, pdisk->pmem_file);
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static int
+bdev_pmem_create_cb(void *io_device, void *ctx_buf)
+{
+ return 0;
+}
+
+static void
+bdev_pmem_destroy_cb(void *io_device, void *ctx_buf)
+{
+}
+
+static void
+bdev_pmem_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct pmem_disk *disk = bdev->ctxt;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_pmem_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_string(w, "pmem_file", disk->pmem_file);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table pmem_fn_table = {
+ .destruct = bdev_pmem_destruct,
+ .submit_request = bdev_pmem_submit_request,
+ .io_type_supported = bdev_pmem_io_type_supported,
+ .get_io_channel = bdev_pmem_get_io_channel,
+ .dump_info_json = bdev_pmem_dump_info_json,
+ .write_config_json = bdev_pmem_write_config_json,
+};
+
+int
+spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev)
+{
+ uint64_t num_blocks;
+ uint32_t block_size;
+ struct pmem_disk *pdisk;
+ int rc;
+
+ *bdev = NULL;
+
+ if (name == NULL) {
+ SPDK_ERRLOG("Missing name parameter for spdk_create_pmem_disk()\n");
+ return EINVAL;
+ }
+
+ if (pmemblk_check(pmem_file, 0) != 1) {
+ SPDK_ERRLOG("Pool '%s' check failed: %s\n", pmem_file, pmemblk_errormsg());
+ return EIO;
+ }
+
+ pdisk = calloc(1, sizeof(*pdisk));
+ if (!pdisk) {
+ return ENOMEM;
+ }
+
+ snprintf(pdisk->pmem_file, sizeof(pdisk->pmem_file), "%s", pmem_file);
+ pdisk->pool = pmemblk_open(pmem_file, 0);
+ if (!pdisk->pool) {
+ SPDK_ERRLOG("Opening pmem pool '%s' failed: %d\n", pmem_file, errno);
+ free(pdisk);
+ return errno;
+ }
+
+ block_size = pmemblk_bsize(pdisk->pool);
+ num_blocks = pmemblk_nblock(pdisk->pool);
+
+ if (block_size == 0) {
+ SPDK_ERRLOG("Block size must be more than 0 bytes\n");
+ pmemblk_close(pdisk->pool);
+ free(pdisk);
+ return EINVAL;
+ }
+
+ if (num_blocks == 0) {
+ SPDK_ERRLOG("Disk must be more than 0 blocks\n");
+ pmemblk_close(pdisk->pool);
+ free(pdisk);
+ return EINVAL;
+ }
+
+ pdisk->disk.name = strdup(name);
+ if (!pdisk->disk.name) {
+ pmemblk_close(pdisk->pool);
+ free(pdisk);
+ return ENOMEM;
+ }
+
+ pdisk->disk.product_name = "pmemblk disk";
+ pdisk->disk.write_cache = 0;
+ pdisk->disk.blocklen = block_size;
+ pdisk->disk.blockcnt = num_blocks;
+
+ pdisk->disk.ctxt = pdisk;
+ pdisk->disk.fn_table = &pmem_fn_table;
+ pdisk->disk.module = &pmem_if;
+
+ rc = spdk_bdev_register(&pdisk->disk);
+ if (rc) {
+ pmemblk_close(pdisk->pool);
+ free(pdisk->disk.name);
+ free(pdisk);
+ return rc;
+ }
+
+ TAILQ_INSERT_TAIL(&g_pmem_disks, pdisk, tailq);
+
+ *bdev = &pdisk->disk;
+
+ return 0;
+}
+
+void
+spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &pmem_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static void
+bdev_pmem_read_conf(void)
+{
+ struct spdk_conf_section *sp;
+ struct spdk_bdev *bdev;
+ const char *pmem_file;
+ const char *bdev_name;
+ int i;
+
+ sp = spdk_conf_find_section(NULL, "Pmem");
+ if (sp == NULL) {
+ return;
+ }
+
+ for (i = 0; ; i++) {
+ if (!spdk_conf_section_get_nval(sp, "Blk", i)) {
+ break;
+ }
+
+ pmem_file = spdk_conf_section_get_nmval(sp, "Blk", i, 0);
+ if (pmem_file == NULL) {
+ SPDK_ERRLOG("Pmem: missing filename\n");
+ continue;
+ }
+
+ bdev_name = spdk_conf_section_get_nmval(sp, "Blk", i, 1);
+ if (bdev_name == NULL) {
+ SPDK_ERRLOG("Pmem: missing bdev name\n");
+ continue;
+ }
+
+ spdk_create_pmem_disk(pmem_file, bdev_name, &bdev);
+ }
+}
+
+static int
+bdev_pmem_initialize(void)
+{
+ const char *err = pmemblk_check_version(PMEMBLK_MAJOR_VERSION, PMEMBLK_MINOR_VERSION);
+
+ if (err != NULL) {
+ SPDK_ERRLOG("Invalid libpmemblk version (expected %d.%d): %s\n", PMEMBLK_MAJOR_VERSION,
+ PMEMBLK_MINOR_VERSION, err);
+ return -1;
+ }
+
+ spdk_io_device_register(&g_pmem_disks, bdev_pmem_create_cb, bdev_pmem_destroy_cb, 0, "pmem_bdev");
+
+ bdev_pmem_read_conf();
+
+ return 0;
+
+}
+
+static void
+bdev_pmem_finish_done(void *io_device)
+{
+ spdk_bdev_module_finish_done();
+}
+
+static void
+bdev_pmem_finish(void)
+{
+ spdk_io_device_unregister(&g_pmem_disks, bdev_pmem_finish_done);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_pmem", SPDK_LOG_BDEV_PMEM)
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem.h b/src/spdk/lib/bdev/pmem/bdev_pmem.h
new file mode 100644
index 00000000..7814166c
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem.h
@@ -0,0 +1,64 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_PMEM_H
+#define SPDK_BDEV_PMEM_H
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_pmem_complete)(void *cb_arg, int bdeverrno);
+
+/**
+ * Create new pmem bdev.
+ *
+ * \param pmem_file Pointer to pmem pool file.
+ * \param name Bdev name.
+ * \param bdev output parameter for bdev when operation is successful.
+ * \return 0 on success.
+ * -EIO if pool check failed
+ * -EINVAL if input parameters check failed
+ * -ENOMEM if buffer cannot be allocated
+ */
+int spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev);
+
+/**
+ * Delete pmem bdev.
+ *
+ * \param bdev Pointer to pmem bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_BDEV_PMEM_H */
diff --git a/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c b/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c
new file mode 100644
index 00000000..3156cffb
--- /dev/null
+++ b/src/spdk/lib/bdev/pmem/bdev_pmem_rpc.c
@@ -0,0 +1,350 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_pmem.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "libpmemblk.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_construct_pmem {
+ char *pmem_file;
+ char *name;
+};
+
+static void
+free_rpc_construct_pmem_bdev(struct rpc_construct_pmem *req)
+{
+ free(req->pmem_file);
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_pmem_decoders[] = {
+ {"pmem_file", offsetof(struct rpc_construct_pmem, pmem_file), spdk_json_decode_string},
+ {"name", offsetof(struct rpc_construct_pmem, name), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_construct_pmem_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_pmem req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_pmem_decoders,
+ SPDK_COUNTOF(rpc_construct_pmem_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+ rc = EINVAL;
+ goto invalid;
+ }
+ rc = spdk_create_pmem_disk(req.pmem_file, req.name, &bdev);
+ if (rc != 0) {
+ goto invalid;
+ }
+ if (bdev == NULL) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_construct_pmem_bdev(&req);
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+
+ free_rpc_construct_pmem_bdev(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(rc));
+ free_rpc_construct_pmem_bdev(&req);
+}
+SPDK_RPC_REGISTER("construct_pmem_bdev", spdk_rpc_construct_pmem_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_pmem {
+ char *name;
+};
+
+static void
+free_rpc_delete_pmem(struct rpc_delete_pmem *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_pmem_decoders[] = {
+ {"name", offsetof(struct rpc_delete_pmem, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_pmem_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_pmem_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_pmem req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_pmem_decoders,
+ SPDK_COUNTOF(rpc_delete_pmem_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ spdk_delete_pmem_disk(bdev, _spdk_rpc_delete_pmem_bdev_cb, request);
+ free_rpc_delete_pmem(&req);
+ return;
+
+invalid:
+ free_rpc_delete_pmem(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_pmem_bdev", spdk_rpc_delete_pmem_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_create_pmem_pool {
+ char *pmem_file;
+ uint64_t num_blocks;
+ uint32_t block_size;
+};
+
+static const struct spdk_json_object_decoder rpc_create_pmem_pool_decoders[] = {
+ {"pmem_file", offsetof(struct rpc_create_pmem_pool, pmem_file), spdk_json_decode_string},
+ {"num_blocks", offsetof(struct rpc_create_pmem_pool, num_blocks), spdk_json_decode_uint64},
+ {"block_size", offsetof(struct rpc_create_pmem_pool, block_size), spdk_json_decode_uint32},
+};
+
+static void
+free_rpc_create_pmem_pool(struct rpc_create_pmem_pool *req)
+{
+ free(req->pmem_file);
+}
+
+static void
+spdk_rpc_create_pmem_pool(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_create_pmem_pool req = {};
+ struct spdk_json_write_ctx *w;
+ uint64_t pool_size;
+ PMEMblkpool *pbp;
+
+ if (spdk_json_decode_object(params, rpc_create_pmem_pool_decoders,
+ SPDK_COUNTOF(rpc_create_pmem_pool_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ /* libpmemblk pool has to contain at least 256 blocks */
+ if (req.num_blocks < 256) {
+ goto invalid;
+ }
+
+ pool_size = req.num_blocks * req.block_size;
+ if (pool_size < PMEMBLK_MIN_POOL) {
+ goto invalid;
+ }
+
+ pbp = pmemblk_create(req.pmem_file, req.block_size, pool_size, 0666);
+ if (pbp == NULL) {
+ goto invalid;
+ }
+
+ pmemblk_close(pbp);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_create_pmem_pool(&req);
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_create_pmem_pool(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_create_pmem_pool(&req);
+}
+SPDK_RPC_REGISTER("create_pmem_pool", spdk_rpc_create_pmem_pool, SPDK_RPC_RUNTIME)
+
+struct rpc_pmem_pool_info {
+ char *pmem_file;
+};
+
+static const struct spdk_json_object_decoder rpc_pmem_pool_info_decoders[] = {
+ {"pmem_file", offsetof(struct rpc_pmem_pool_info, pmem_file), spdk_json_decode_string},
+};
+
+static void
+free_rpc_pmem_pool_info(struct rpc_pmem_pool_info *req)
+{
+ free(req->pmem_file);
+}
+
+static void
+spdk_rpc_pmem_pool_info(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_pmem_pool_info req = {};
+ struct spdk_json_write_ctx *w;
+ size_t num_blocks, block_size;
+ PMEMblkpool *pbp;
+
+ if (spdk_json_decode_object(params, rpc_pmem_pool_info_decoders,
+ SPDK_COUNTOF(rpc_pmem_pool_info_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ pbp = pmemblk_open(req.pmem_file, 0);
+ if (pbp == NULL) {
+ goto invalid;
+ }
+
+ block_size = pmemblk_bsize(pbp);
+ num_blocks = pmemblk_nblock(pbp);
+
+
+ pmemblk_close(pbp);
+
+ /* Check pmem pool consistency */
+ if (pmemblk_check(req.pmem_file, block_size) != 1) {
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_pmem_pool_info(&req);
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+ spdk_json_write_object_begin(w);
+ spdk_json_write_name(w, "num_blocks");
+ spdk_json_write_uint64(w, num_blocks);
+ spdk_json_write_name(w, "block_size");
+ spdk_json_write_uint64(w, block_size);
+ spdk_json_write_object_end(w);
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_pmem_pool_info(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_pmem_pool_info(&req);
+}
+SPDK_RPC_REGISTER("pmem_pool_info", spdk_rpc_pmem_pool_info, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_pmem_pool {
+ char *pmem_file;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_pmem_pool_decoders[] = {
+ {"pmem_file", offsetof(struct rpc_delete_pmem_pool, pmem_file), spdk_json_decode_string},
+};
+
+static void
+free_rpc_delete_pmem_pool(struct rpc_delete_pmem_pool *req)
+{
+ free(req->pmem_file);
+}
+
+static void
+spdk_rpc_delete_pmem_pool(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_pmem_pool req = {};
+ struct spdk_json_write_ctx *w;
+
+ if (spdk_json_decode_object(params, rpc_delete_pmem_pool_decoders,
+ SPDK_COUNTOF(rpc_delete_pmem_pool_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ /* Check if file is actually pmem pool */
+ if (pmemblk_check(req.pmem_file, 0) != 1) {
+ goto invalid;
+ }
+
+ unlink(req.pmem_file);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_delete_pmem_pool(&req);
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_delete_pmem_pool(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_delete_pmem_pool(&req);
+}
+SPDK_RPC_REGISTER("delete_pmem_pool", spdk_rpc_delete_pmem_pool, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/raid/Makefile b/src/spdk/lib/bdev/raid/Makefile
new file mode 100644
index 00000000..8332399d
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/Makefile
@@ -0,0 +1,41 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
+C_SRCS = bdev_raid.c bdev_raid_rpc.c
+LIBNAME = vbdev_raid
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/raid/bdev_raid.c b/src/spdk/lib/bdev/raid/bdev_raid.c
new file mode 100644
index 00000000..51fa94ec
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid.c
@@ -0,0 +1,1624 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_raid.h"
+#include "spdk/env.h"
+#include "spdk/io_channel.h"
+#include "spdk/conf.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+#include "spdk/string.h"
+
+static bool g_shutdown_started = false;
+
+/* raid bdev config as read from config file */
+struct raid_config g_spdk_raid_config = {
+ .raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_spdk_raid_config.raid_bdev_config_head),
+};
+
+/*
+ * List of raid bdev in configured list, these raid bdevs are registered with
+ * bdev layer
+ */
+struct spdk_raid_configured_tailq g_spdk_raid_bdev_configured_list;
+
+/* List of raid bdev in configuring list */
+struct spdk_raid_configuring_tailq g_spdk_raid_bdev_configuring_list;
+
+/* List of all raid bdevs */
+struct spdk_raid_all_tailq g_spdk_raid_bdev_list;
+
+/* List of all raid bdevs that are offline */
+struct spdk_raid_offline_tailq g_spdk_raid_bdev_offline_list;
+
+/* Function declarations */
+static void raid_bdev_examine(struct spdk_bdev *bdev);
+static int raid_bdev_init(void);
+static void raid_bdev_waitq_io_process(void *ctx);
+static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev);
+
+
+/*
+ * brief:
+ * raid_bdev_create_cb function is a cb function for raid bdev which creates the
+ * hierarchy from raid bdev to base bdev io channels. It will be called per core
+ * params:
+ * io_device - pointer to raid bdev io device represented by raid_bdev
+ * ctx_buf - pointer to context buffer for raid bdev io channel
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_create_cb(void *io_device, void *ctx_buf)
+{
+ struct raid_bdev *raid_bdev = io_device;
+ struct raid_bdev_io_channel *raid_ch = ctx_buf;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch);
+
+ assert(raid_bdev != NULL);
+ assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
+
+ raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs,
+ sizeof(struct spdk_io_channel *));
+ if (!raid_ch->base_channel) {
+ SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
+ return -ENOMEM;
+ }
+ for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ /*
+ * Get the spdk_io_channel for all the base bdevs. This is used during
+ * split logic to send the respective child bdev ios to respective base
+ * bdev io channel.
+ */
+ raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
+ raid_bdev->base_bdev_info[i].desc);
+ if (!raid_ch->base_channel[i]) {
+ for (uint32_t j = 0; j < i; j++) {
+ spdk_put_io_channel(raid_ch->base_channel[j]);
+ }
+ free(raid_ch->base_channel);
+ SPDK_ERRLOG("Unable to create io channel for base bdev\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
+ * hierarchy from raid bdev to base bdev io channels. It will be called per core
+ * params:
+ * io_device - pointer to raid bdev io device represented by raid_bdev
+ * ctx_buf - pointer to context buffer for raid bdev io channel
+ * returns:
+ * none
+ */
+static void
+raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct raid_bdev_io_channel *raid_ch = ctx_buf;
+ struct raid_bdev *raid_bdev = io_device;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n");
+
+ assert(raid_bdev != NULL);
+ assert(raid_ch != NULL);
+ assert(raid_ch->base_channel);
+ for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ /* Free base bdev channels */
+ assert(raid_ch->base_channel[i] != NULL);
+ spdk_put_io_channel(raid_ch->base_channel[i]);
+ raid_ch->base_channel[i] = NULL;
+ }
+ free(raid_ch->base_channel);
+ raid_ch->base_channel = NULL;
+}
+
+/*
+ * brief:
+ * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
+ * structures.
+ * params:
+ * raid_bdev - pointer to raid_bdev
+ * returns:
+ * none
+ */
+void
+raid_bdev_cleanup(struct raid_bdev *raid_bdev)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
+ raid_bdev,
+ raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
+ if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
+ TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+ } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
+ TAILQ_REMOVE(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+ } else {
+ assert(0);
+ }
+ TAILQ_REMOVE(&g_spdk_raid_bdev_list, raid_bdev, global_link);
+ free(raid_bdev->bdev.name);
+ raid_bdev->bdev.name = NULL;
+ assert(raid_bdev->base_bdev_info);
+ free(raid_bdev->base_bdev_info);
+ raid_bdev->base_bdev_info = NULL;
+ if (raid_bdev->config) {
+ raid_bdev->config->raid_bdev = NULL;
+ }
+ free(raid_bdev);
+}
+
+/*
+ * brief:
+ * free resource of base bdev for raid bdev
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * base_bdev_slot - position to base bdev in raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+void
+raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint32_t base_bdev_slot)
+{
+ struct raid_base_bdev_info *info;
+
+ info = &raid_bdev->base_bdev_info[base_bdev_slot];
+
+ spdk_bdev_module_release_bdev(info->bdev);
+ spdk_bdev_close(info->desc);
+ info->desc = NULL;
+ info->bdev = NULL;
+
+ assert(raid_bdev->num_base_bdevs_discovered);
+ raid_bdev->num_base_bdevs_discovered--;
+}
+
+/*
+ * brief:
+ * raid_bdev_destruct is the destruct function table pointer for raid bdev
+ * params:
+ * ctxt - pointer to raid_bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_destruct(void *ctxt)
+{
+ struct raid_bdev *raid_bdev = ctxt;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n");
+
+ raid_bdev->destruct_called = true;
+ for (uint16_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ /*
+ * Close all base bdev descriptors for which call has come from below
+ * layers. Also close the descriptors if we have started shutdown.
+ */
+ if (g_shutdown_started ||
+ ((raid_bdev->base_bdev_info[i].remove_scheduled == true) &&
+ (raid_bdev->base_bdev_info[i].bdev != NULL))) {
+ raid_bdev_free_base_bdev_resource(raid_bdev, i);
+ }
+ }
+
+ if (g_shutdown_started) {
+ TAILQ_REMOVE(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+ raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
+ TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+ spdk_io_device_unregister(raid_bdev, NULL);
+ }
+
+ if (raid_bdev->num_base_bdevs_discovered == 0) {
+ /* Free raid_bdev when there are no base bdevs left */
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n");
+ raid_bdev_cleanup(raid_bdev);
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_io_completion function is called by lower layers to notify raid
+ * module that particular bdev_io is completed.
+ * params:
+ * bdev_io - pointer to bdev io submitted to lower layers, like child io
+ * success - bdev_io status
+ * cb_arg - function callback context, like parent io pointer
+ * returns:
+ * none
+ */
+static void
+raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *parent_io = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (success) {
+ spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ } else {
+ spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+/*
+ * brief:
+ * raid_bdev_submit_rw_request function is used to submit I/O to the correct
+ * member disk
+ * params:
+ * bdev_io - parent bdev io
+ * start_strip - start strip number of this io
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip)
+{
+ struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+ struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+ struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+ uint64_t pd_strip;
+ uint32_t offset_in_strip;
+ uint64_t pd_lba;
+ uint64_t pd_blocks;
+ uint32_t pd_idx;
+ int ret = 0;
+
+ pd_strip = start_strip / raid_bdev->num_base_bdevs;
+ pd_idx = start_strip % raid_bdev->num_base_bdevs;
+ offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
+ pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
+ pd_blocks = bdev_io->u.bdev.num_blocks;
+ if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
+ SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
+ assert(0);
+ }
+
+ /*
+ * Submit child io to bdev layer with using base bdev descriptors, base
+ * bdev lba, base bdev child io length in blocks, buffer, completion
+ * function and function callback context
+ */
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
+ raid_ch->base_channel[pd_idx],
+ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ pd_lba, pd_blocks, raid_bdev_io_completion,
+ bdev_io);
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+ ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
+ raid_ch->base_channel[pd_idx],
+ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ pd_lba, pd_blocks, raid_bdev_io_completion,
+ bdev_io);
+ } else {
+ SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
+ assert(0);
+ }
+
+ return ret;
+}
+
+/*
+ * brief:
+ * get_curr_base_bdev_index function calculates the base bdev index
+ * params:
+ * raid_bdev - pointer to pooled bdev
+ * raid_io - pointer to parent io context
+ * returns:
+ * base bdev index
+ */
+static uint8_t
+get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_io)
+{
+ struct spdk_bdev_io *bdev_io;
+ uint64_t start_strip;
+
+ bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx);
+ start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+
+ return (start_strip % raid_bdev->num_base_bdevs);
+}
+
+/*
+ * brief:
+ * raid_bdev_io_submit_fail_process function processes the IO which failed to submit.
+ * It will try to queue the IOs after storing the context to bdev wait queue logic.
+ * params:
+ * bdev_io - pointer to bdev_io
+ * raid_io - pointer to raid bdev io
+ * ret - return code
+ * returns:
+ * none
+ */
+static void
+raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io,
+ struct raid_bdev_io *raid_io, int ret)
+{
+ struct raid_bdev_io_channel *raid_ch;
+ uint8_t pd_idx;
+
+ if (ret != -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ } else {
+ /* Queue the IO to bdev layer wait queue */
+ pd_idx = get_curr_base_bdev_index(raid_bdev, raid_io);
+ raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev;
+ raid_io->waitq_entry.cb_fn = raid_bdev_waitq_io_process;
+ raid_io->waitq_entry.cb_arg = raid_io;
+ raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+ if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev,
+ raid_ch->base_channel[pd_idx],
+ &raid_io->waitq_entry) != 0) {
+ SPDK_ERRLOG("bdev io waitq error, it should not happen\n");
+ assert(0);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+/*
+ * brief:
+ * raid_bdev_waitq_io_process function is the callback function
+ * registered by raid bdev module to bdev when bdev_io was unavailable.
+ * params:
+ * ctx - pointer to raid_bdev_io
+ * returns:
+ * none
+ */
+static void
+raid_bdev_waitq_io_process(void *ctx)
+{
+ struct raid_bdev_io *raid_io = ctx;
+ struct spdk_bdev_io *bdev_io;
+ struct raid_bdev *raid_bdev;
+ int ret;
+ uint64_t start_strip;
+
+ bdev_io = SPDK_CONTAINEROF(raid_io, struct spdk_bdev_io, driver_ctx);
+ /*
+ * Try to submit childs of parent bdev io. If failed due to resource
+ * crunch then break the loop and don't try to process other queued IOs.
+ */
+ raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+ start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+ ret = raid_bdev_submit_rw_request(bdev_io, start_strip);
+ if (ret != 0) {
+ raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret);
+ }
+}
+
+/*
+ * brief:
+ * raid_bdev_start_rw_request function is the submit_request function for
+ * read/write requests
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+raid_bdev_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct raid_bdev_io *raid_io;
+ struct raid_bdev *raid_bdev;
+ uint64_t start_strip = 0;
+ uint64_t end_strip = 0;
+ int ret;
+
+ raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+ raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+ raid_io->ch = ch;
+ start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
+ end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >>
+ raid_bdev->strip_size_shift;
+ if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
+ assert(false);
+ SPDK_ERRLOG("I/O spans strip boundary!\n");
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+ ret = raid_bdev_submit_rw_request(bdev_io, start_strip);
+ if (ret != 0) {
+ raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_io, ret);
+ }
+}
+
+/*
+ * brief:
+ * raid_bdev_reset_completion is the completion callback for member disk resets
+ * params:
+ * bdev_io - pointer to member disk reset bdev_io
+ * success - true if reset was successful, false if unsuccessful
+ * cb_arg - callback argument (parent reset bdev_io)
+ * returns:
+ * none
+ */
+static void
+raid_bdev_reset_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *parent_io = cb_arg;
+ struct raid_bdev *raid_bdev = (struct raid_bdev *)parent_io->bdev->ctxt;
+ struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ raid_io->base_bdev_reset_status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ raid_io->base_bdev_reset_completed++;
+ if (raid_io->base_bdev_reset_completed == raid_bdev->num_base_bdevs) {
+ spdk_bdev_io_complete(parent_io, raid_io->base_bdev_reset_status);
+ }
+}
+
+/*
+ * brief:
+ * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests
+ * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
+ * which case it will queue it for later submission
+ * params:
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+_raid_bdev_submit_reset_request_next(void *_bdev_io)
+{
+ struct spdk_bdev_io *bdev_io = _bdev_io;
+ struct raid_bdev_io *raid_io;
+ struct raid_bdev *raid_bdev;
+ struct raid_bdev_io_channel *raid_ch;
+ int ret;
+ uint8_t i;
+
+ raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
+ raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+ raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
+
+ while (raid_io->base_bdev_reset_submitted < raid_bdev->num_base_bdevs) {
+ i = raid_io->base_bdev_reset_submitted;
+ ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc,
+ raid_ch->base_channel[i],
+ raid_bdev_reset_completion, bdev_io);
+ if (ret == 0) {
+ raid_io->base_bdev_reset_submitted++;
+ } else if (ret == -ENOMEM) {
+ raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[i].bdev;
+ raid_io->waitq_entry.cb_fn = _raid_bdev_submit_reset_request_next;
+ raid_io->waitq_entry.cb_arg = bdev_io;
+ spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[i].bdev,
+ raid_ch->base_channel[i],
+ &raid_io->waitq_entry);
+ return;
+ } else {
+ assert(false);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+ }
+}
+
+/*
+ * brief:
+ * _raid_bdev_submit_reset_request function is the submit_request function for
+ * reset requests
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+_raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct raid_bdev_io *raid_io;
+
+ raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
+ raid_io->ch = ch;
+ raid_io->base_bdev_reset_submitted = 0;
+ raid_io->base_bdev_reset_completed = 0;
+ raid_io->base_bdev_reset_status = SPDK_BDEV_IO_STATUS_SUCCESS;
+ _raid_bdev_submit_reset_request_next(bdev_io);
+}
+
+/*
+ * brief:
+ * raid_bdev_submit_request function is the submit_request function pointer of
+ * raid bdev function table. This is used to submit the io on raid_bdev to below
+ * layers.
+ * params:
+ * ch - pointer to raid bdev io channel
+ * bdev_io - pointer to parent bdev_io on raid bdev device
+ * returns:
+ * none
+ */
+static void
+raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
+ spdk_bdev_io_get_buf(bdev_io, raid_bdev_start_rw_request,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ } else {
+ /* Just call it directly if iov_base is already populated. */
+ raid_bdev_start_rw_request(ch, bdev_io);
+ }
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ raid_bdev_start_rw_request(ch, bdev_io);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ // TODO: support flush if requirement comes
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ _raid_bdev_submit_reset_request(ch, bdev_io);
+ break;
+
+ default:
+ SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ break;
+ }
+
+}
+
+/*
+ * brief:
+ * raid_bdev_io_type_supported is the io_supported function for bdev function
+ * table which returns whether the particular io type is supported or not by
+ * raid bdev module
+ * params:
+ * ctx - pointer to raid bdev context
+ * type - io type
+ * returns:
+ * true - io_type is supported
+ * false - io_type is not supported
+ */
+static bool
+raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+/*
+ * brief:
+ * raid_bdev_get_io_channel is the get_io_channel function table pointer for
+ * raid bdev. This is used to return the io channel for this raid bdev
+ * params:
+ * ctxt - pointer to raid_bdev
+ * returns:
+ * pointer to io channel for raid bdev
+ */
+static struct spdk_io_channel *
+raid_bdev_get_io_channel(void *ctxt)
+{
+ struct raid_bdev *raid_bdev = ctxt;
+
+ return spdk_get_io_channel(raid_bdev);
+}
+
+/*
+ * brief:
+ * raid_bdev_dump_info_json is the function table pointer for raid bdev
+ * params:
+ * ctx - pointer to raid_bdev
+ * w - pointer to json context
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct raid_bdev *raid_bdev = ctx;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n");
+ assert(raid_bdev != NULL);
+
+ /* Dump the raid bdev configuration related information */
+ spdk_json_write_name(w, "raid");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
+ spdk_json_write_named_uint32(w, "state", raid_bdev->state);
+ spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level);
+ spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
+ spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
+ spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
+ spdk_json_write_name(w, "base_bdevs_list");
+ spdk_json_write_array_begin(w);
+ for (uint16_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ if (raid_bdev->base_bdev_info[i].bdev) {
+ spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name);
+ } else {
+ spdk_json_write_null(w);
+ }
+ }
+ spdk_json_write_array_end(w);
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_write_config_json is the function table pointer for raid bdev
+ * params:
+ * bdev - pointer to spdk_bdev
+ * w - pointer to json context
+ * returns:
+ * none
+ */
+static void
+raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct raid_bdev *raid_bdev = bdev->ctxt;
+ struct spdk_bdev *base;
+ uint16_t i;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_raid_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
+ spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level);
+
+ spdk_json_write_named_array_begin(w, "base_bdevs");
+ for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ base = raid_bdev->base_bdev_info[i].bdev;
+ if (base) {
+ spdk_json_write_string(w, base->name);
+ }
+ }
+ spdk_json_write_array_end(w);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+/* g_raid_bdev_fn_table is the function table for raid bdev */
+static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
+ .destruct = raid_bdev_destruct,
+ .submit_request = raid_bdev_submit_request,
+ .io_type_supported = raid_bdev_io_type_supported,
+ .get_io_channel = raid_bdev_get_io_channel,
+ .dump_info_json = raid_bdev_dump_info_json,
+ .write_config_json = raid_bdev_write_config_json,
+};
+
+/*
+ * brief:
+ * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
+ * params:
+ * raid_cfg - pointer to raid_bdev_config structure
+ * returns:
+ * none
+ */
+void
+raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
+{
+ uint32_t i;
+
+ TAILQ_REMOVE(&g_spdk_raid_config.raid_bdev_config_head, raid_cfg, link);
+ g_spdk_raid_config.total_raid_bdev--;
+
+ if (raid_cfg->base_bdev) {
+ for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+ free(raid_cfg->base_bdev[i].name);
+ }
+ free(raid_cfg->base_bdev);
+ }
+ free(raid_cfg->name);
+ free(raid_cfg);
+}
+
+/*
+ * brief:
+ * raid_bdev_free is the raid bdev function table function pointer. This is
+ * called on bdev free path
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_free(void)
+{
+ struct raid_bdev_config *raid_cfg, *tmp;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n");
+ TAILQ_FOREACH_SAFE(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link, tmp) {
+ raid_bdev_config_cleanup(raid_cfg);
+ }
+}
+
+/* brief
+ * raid_bdev_config_find_by_name is a helper function to find raid bdev config
+ * by name as key.
+ *
+ * params:
+ * raid_name - name for raid bdev.
+ */
+struct raid_bdev_config *
+raid_bdev_config_find_by_name(const char *raid_name)
+{
+ struct raid_bdev_config *raid_cfg;
+
+ TAILQ_FOREACH(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link) {
+ if (!strcmp(raid_cfg->name, raid_name)) {
+ return raid_cfg;
+ }
+ }
+
+ return raid_cfg;
+}
+
+/*
+ * brief
+ * raid_bdev_config_add function adds config for newly created raid bdev.
+ *
+ * params:
+ * raid_name - name for raid bdev.
+ * strip_size - strip size in KB
+ * num_base_bdevs - number of base bdevs.
+ * raid_level - raid level, only raid level 0 is supported.
+ * _raid_cfg - Pointer to newly added configuration
+ */
+int
+raid_bdev_config_add(const char *raid_name, int strip_size, int num_base_bdevs,
+ int raid_level, struct raid_bdev_config **_raid_cfg)
+{
+ struct raid_bdev_config *raid_cfg;
+
+ raid_cfg = raid_bdev_config_find_by_name(raid_name);
+ if (raid_cfg != NULL) {
+ SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
+ raid_name);
+ return -EEXIST;
+ }
+
+ if (spdk_u32_is_pow2(strip_size) == false) {
+ SPDK_ERRLOG("Invalid strip size %d\n", strip_size);
+ return -EINVAL;
+ }
+
+ if (num_base_bdevs <= 0) {
+ SPDK_ERRLOG("Invalid base device count %d\n", num_base_bdevs);
+ return -EINVAL;
+ }
+
+ if (raid_level != 0) {
+ SPDK_ERRLOG("invalid raid level %d, only raid level 0 is supported\n",
+ raid_level);
+ return -EINVAL;
+ }
+
+ raid_cfg = calloc(1, sizeof(*raid_cfg));
+ if (raid_cfg == NULL) {
+ SPDK_ERRLOG("unable to allocate memory\n");
+ return -ENOMEM;
+ }
+
+ raid_cfg->name = strdup(raid_name);
+ if (!raid_cfg->name) {
+ free(raid_cfg);
+ SPDK_ERRLOG("unable to allocate memory\n");
+ return -ENOMEM;
+ }
+ raid_cfg->strip_size = strip_size;
+ raid_cfg->num_base_bdevs = num_base_bdevs;
+ raid_cfg->raid_level = raid_level;
+
+ raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
+ if (raid_cfg->base_bdev == NULL) {
+ free(raid_cfg->name);
+ free(raid_cfg);
+ SPDK_ERRLOG("unable to allocate memory\n");
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&g_spdk_raid_config.raid_bdev_config_head, raid_cfg, link);
+ g_spdk_raid_config.total_raid_bdev++;
+
+ *_raid_cfg = raid_cfg;
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
+ *
+ * params:
+ * raid_cfg - pointer to raid bdev configuration
+ * base_bdev_name - name of base bdev
+ * slot - Position to add base bdev
+ */
+int
+raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
+ uint32_t slot)
+{
+ uint32_t i;
+ struct raid_bdev_config *tmp;
+
+ if (slot >= raid_cfg->num_base_bdevs) {
+ return -EINVAL;
+ }
+
+ TAILQ_FOREACH(tmp, &g_spdk_raid_config.raid_bdev_config_head, link) {
+ for (i = 0; i < tmp->num_base_bdevs; i++) {
+ if (tmp->base_bdev[i].name != NULL) {
+ if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
+ SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
+ base_bdev_name);
+ return -EEXIST;
+ }
+ }
+ }
+ }
+
+ raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
+ if (raid_cfg->base_bdev[slot].name == NULL) {
+ SPDK_ERRLOG("unable to allocate memory\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+/*
+ * brief:
+ * raid_bdev_parse_raid is used to parse the raid bdev from config file based on
+ * pre-defined raid bdev format in config file.
+ * Format of config file:
+ * [RAID1]
+ * Name raid1
+ * StripSize 64
+ * NumDevices 2
+ * RaidLevel 0
+ * Devices Nvme0n1 Nvme1n1
+ *
+ * [RAID2]
+ * Name raid2
+ * StripSize 64
+ * NumDevices 3
+ * RaidLevel 0
+ * Devices Nvme2n1 Nvme3n1 Nvme4n1
+ *
+ * params:
+ * conf_section - pointer to config section
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_parse_raid(struct spdk_conf_section *conf_section)
+{
+ const char *raid_name;
+ int strip_size;
+ int i, num_base_bdevs;
+ int raid_level;
+ const char *base_bdev_name;
+ struct raid_bdev_config *raid_cfg;
+ int rc;
+
+ raid_name = spdk_conf_section_get_val(conf_section, "Name");
+ if (raid_name == NULL) {
+ SPDK_ERRLOG("raid_name %s is null\n", raid_name);
+ return -EINVAL;
+ }
+
+ strip_size = spdk_conf_section_get_intval(conf_section, "StripSize");
+ num_base_bdevs = spdk_conf_section_get_intval(conf_section, "NumDevices");
+ raid_level = spdk_conf_section_get_intval(conf_section, "RaidLevel");
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %d %d %d\n", raid_name, strip_size, num_base_bdevs,
+ raid_level);
+
+ rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, raid_level,
+ &raid_cfg);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to add raid bdev config\n");
+ return rc;
+ }
+
+ for (i = 0; true; i++) {
+ base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i);
+ if (base_bdev_name == NULL) {
+ break;
+ }
+ if (i >= num_base_bdevs) {
+ raid_bdev_config_cleanup(raid_cfg);
+ SPDK_ERRLOG("Number of devices mentioned is more than count\n");
+ return -EINVAL;
+ }
+
+ rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i);
+ if (rc != 0) {
+ raid_bdev_config_cleanup(raid_cfg);
+ SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n");
+ return rc;
+ }
+ }
+
+ if (i != raid_cfg->num_base_bdevs) {
+ raid_bdev_config_cleanup(raid_cfg);
+ SPDK_ERRLOG("Number of devices mentioned is less than count\n");
+ return -EINVAL;
+ }
+
+ rc = raid_bdev_create(raid_cfg);
+ if (rc != 0) {
+ raid_bdev_config_cleanup(raid_cfg);
+ SPDK_ERRLOG("Failed to create raid bdev\n");
+ return rc;
+ }
+
+ rc = raid_bdev_add_base_devices(raid_cfg);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n");
+ /* Config is not removed in this case. */
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_parse_config is used to find the raid bdev config section and parse it
+ * Format of config file:
+ * params:
+ * none
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_parse_config(void)
+{
+ int ret;
+ struct spdk_conf_section *conf_section;
+
+ conf_section = spdk_conf_first_section(NULL);
+ while (conf_section != NULL) {
+ if (spdk_conf_section_match_prefix(conf_section, "RAID")) {
+ ret = raid_bdev_parse_raid(conf_section);
+ if (ret < 0) {
+ SPDK_ERRLOG("Unable to parse raid bdev section\n");
+ return ret;
+ }
+ }
+ conf_section = spdk_conf_next_section(conf_section);
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_fini_start is called when bdev layer is starting the
+ * shutdown process
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_fini_start(void)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n");
+ g_shutdown_started = true;
+}
+
+/*
+ * brief:
+ * raid_bdev_exit is called on raid bdev module exit time by bdev layer
+ * params:
+ * none
+ * returns:
+ * none
+ */
+static void
+raid_bdev_exit(void)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n");
+ raid_bdev_free();
+}
+
+/*
+ * brief:
+ * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
+ * module
+ * params:
+ * none
+ * returns:
+ * size of spdk_bdev_io context for raid
+ */
+static int
+raid_bdev_get_ctx_size(void)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n");
+ return sizeof(struct raid_bdev_io);
+}
+
+/*
+ * brief:
+ * raid_bdev_get_running_config is used to get the configuration options.
+ *
+ * params:
+ * fp - The pointer to a file that will be written to the configuration options.
+ * returns:
+ * none
+ */
+static void
+raid_bdev_get_running_config(FILE *fp)
+{
+ struct raid_bdev *raid_bdev;
+ struct spdk_bdev *base;
+ int index = 1;
+ uint16_t i;
+
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configured_list, state_link) {
+ fprintf(fp,
+ "\n"
+ "[RAID%d]\n"
+ " Name %s\n"
+ " StripSize %" PRIu32 "\n"
+ " NumDevices %hu\n"
+ " RaidLevel %hhu\n",
+ index, raid_bdev->bdev.name, raid_bdev->strip_size,
+ raid_bdev->num_base_bdevs, raid_bdev->raid_level);
+ fprintf(fp,
+ " Devices ");
+ for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ base = raid_bdev->base_bdev_info[i].bdev;
+ if (base) {
+ fprintf(fp,
+ "%s ",
+ base->name);
+ }
+ }
+ fprintf(fp,
+ "\n");
+ index++;
+ }
+}
+
+/*
+ * brief:
+ * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
+ * claimed by raid bdev or not.
+ * params:
+ * bdev_name - represents base bdev name
+ * _raid_cfg - pointer to raid bdev config parsed from config file
+ * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
+ * slot. This field is only valid if return value of this function is true
+ * returns:
+ * true - if bdev can be claimed
+ * false - if bdev can't be claimed
+ */
+static bool
+raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
+ uint32_t *base_bdev_slot)
+{
+ struct raid_bdev_config *raid_cfg;
+ uint32_t i;
+
+ TAILQ_FOREACH(raid_cfg, &g_spdk_raid_config.raid_bdev_config_head, link) {
+ for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+ /*
+ * Check if the base bdev name is part of raid bdev configuration.
+ * If match is found then return true and the slot information where
+ * this base bdev should be inserted in raid bdev
+ */
+ if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
+ *_raid_cfg = raid_cfg;
+ *base_bdev_slot = i;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+static struct spdk_bdev_module g_raid_if = {
+ .name = "raid",
+ .module_init = raid_bdev_init,
+ .fini_start = raid_bdev_fini_start,
+ .module_fini = raid_bdev_exit,
+ .get_ctx_size = raid_bdev_get_ctx_size,
+ .examine_config = raid_bdev_examine,
+ .config_text = raid_bdev_get_running_config,
+ .async_init = false,
+ .async_fini = false,
+};
+SPDK_BDEV_MODULE_REGISTER(&g_raid_if)
+
+/*
+ * brief:
+ * raid_bdev_init is the initialization function for raid bdev module
+ * params:
+ * none
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_init(void)
+{
+ int ret;
+
+ TAILQ_INIT(&g_spdk_raid_bdev_configured_list);
+ TAILQ_INIT(&g_spdk_raid_bdev_configuring_list);
+ TAILQ_INIT(&g_spdk_raid_bdev_list);
+ TAILQ_INIT(&g_spdk_raid_bdev_offline_list);
+
+ /* Parse config file for raids */
+ ret = raid_bdev_parse_config();
+ if (ret < 0) {
+ SPDK_ERRLOG("raid bdev init failed parsing\n");
+ raid_bdev_free();
+ return ret;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n");
+
+ return 0;
+}
+
+/*
+ * brief:
+ * raid_bdev_create allocates raid bdev based on passed configuration
+ * params:
+ * raid_cfg - configuration of raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+int
+raid_bdev_create(struct raid_bdev_config *raid_cfg)
+{
+ struct raid_bdev *raid_bdev;
+ struct spdk_bdev *raid_bdev_gen;
+
+ raid_bdev = calloc(1, sizeof(*raid_bdev));
+ if (!raid_bdev) {
+ SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
+ return -ENOMEM;
+ }
+
+ assert(raid_cfg->num_base_bdevs != 0);
+ raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
+ raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
+ sizeof(struct raid_base_bdev_info));
+ if (!raid_bdev->base_bdev_info) {
+ SPDK_ERRLOG("Unable able to allocate base bdev info\n");
+ free(raid_bdev);
+ return -ENOMEM;
+ }
+
+ raid_bdev->strip_size = raid_cfg->strip_size;
+ raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
+ raid_bdev->config = raid_cfg;
+
+ raid_bdev_gen = &raid_bdev->bdev;
+
+ raid_bdev_gen->name = strdup(raid_cfg->name);
+ if (!raid_bdev_gen->name) {
+ SPDK_ERRLOG("Unable to allocate name for raid\n");
+ free(raid_bdev->base_bdev_info);
+ free(raid_bdev);
+ return -ENOMEM;
+ }
+
+ raid_bdev_gen->product_name = "Pooled Device";
+ raid_bdev_gen->ctxt = raid_bdev;
+ raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
+ raid_bdev_gen->module = &g_raid_if;
+
+ TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+ TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_list, raid_bdev, global_link);
+
+ raid_cfg->raid_bdev = raid_bdev;
+
+ return 0;
+}
+
+/*
+ * brief
+ * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * bdev - pointer to base bdev
+ * base_bdev_slot - position to add base bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev,
+ uint32_t base_bdev_slot)
+{
+ struct spdk_bdev_desc *desc;
+ int rc;
+
+ rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc);
+ if (rc != 0) {
+ SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name);
+ return rc;
+ }
+
+ rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
+ if (rc != 0) {
+ SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
+ spdk_bdev_close(desc);
+ return rc;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name);
+
+ assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
+ assert(base_bdev_slot < raid_bdev->num_base_bdevs);
+
+ raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
+ raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
+ raid_bdev->num_base_bdevs_discovered++;
+ assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
+
+ return 0;
+}
+
+/*
+ * brief:
+ * If raid bdev config is complete, then only register the raid bdev to
+ * bdev layer and remove this raid bdev from configuring list and
+ * insert the raid bdev to configured list
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_configure(struct raid_bdev *raid_bdev)
+{
+ uint32_t blocklen;
+ uint64_t min_blockcnt;
+ struct spdk_bdev *raid_bdev_gen;
+ int rc = 0;
+
+ blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen;
+ min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt;
+ for (uint32_t i = 1; i < raid_bdev->num_base_bdevs; i++) {
+ /* Calculate minimum block count from all base bdevs */
+ if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) {
+ min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt;
+ }
+
+ /* Check blocklen for all base bdevs that it should be same */
+ if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) {
+ /*
+ * Assumption is that all the base bdevs for any raid bdev should
+ * have same blocklen
+ */
+ SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
+ return -EINVAL;
+ }
+ }
+
+ raid_bdev_gen = &raid_bdev->bdev;
+ raid_bdev_gen->write_cache = 0;
+ raid_bdev_gen->blocklen = blocklen;
+ raid_bdev_gen->ctxt = raid_bdev;
+ raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
+ raid_bdev_gen->module = &g_raid_if;
+ raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen;
+ raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
+ raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
+ if (raid_bdev->num_base_bdevs > 1) {
+ raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size;
+ raid_bdev_gen->split_on_optimal_io_boundary = true;
+ } else {
+ /* Do not need to split reads/writes on single bdev RAID modules. */
+ raid_bdev_gen->optimal_io_boundary = 0;
+ raid_bdev_gen->split_on_optimal_io_boundary = false;
+ }
+
+ /*
+ * RAID bdev logic is for striping so take the minimum block count based
+ * approach where total block count of raid bdev is the number of base
+ * bdev times the minimum block count of any base bdev
+ */
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n",
+ min_blockcnt,
+ raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
+ raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) <<
+ raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs;
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev);
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt,
+ raid_bdev_gen->blocklen);
+ if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
+ raid_bdev->state = RAID_BDEV_STATE_ONLINE;
+ spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
+ sizeof(struct raid_bdev_io_channel),
+ raid_bdev->bdev.name);
+ rc = spdk_bdev_register(raid_bdev_gen);
+ if (rc != 0) {
+ SPDK_ERRLOG("Unable to register pooled bdev and stay at configuring state\n");
+ spdk_io_device_unregister(raid_bdev, NULL);
+ raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
+ return rc;
+ }
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen);
+ TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, state_link);
+ TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n",
+ raid_bdev_gen->name, raid_bdev);
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * If raid bdev is online and registered, change the bdev state to
+ * configuring and unregister this raid device. Queue this raid device
+ * in configuring list
+ * params:
+ * raid_bdev - pointer to raid bdev
+ * returns:
+ * none
+ */
+static void
+raid_bdev_deconfigure(struct raid_bdev *raid_bdev)
+{
+ if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
+ return;
+ }
+
+ assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
+ TAILQ_REMOVE(&g_spdk_raid_bdev_configured_list, raid_bdev, state_link);
+ raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
+ assert(raid_bdev->num_base_bdevs_discovered);
+ TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, state_link);
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n");
+
+ spdk_io_device_unregister(raid_bdev, NULL);
+ spdk_bdev_unregister(&raid_bdev->bdev, NULL, NULL);
+}
+
+/*
+ * brief:
+ * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
+ * is removed. This function checks if this base bdev is part of any raid bdev
+ * or not. If yes, it takes necessary action on that particular raid bdev.
+ * params:
+ * ctx - pointer to base bdev pointer which got removed
+ * returns:
+ * none
+ */
+void
+raid_bdev_remove_base_bdev(void *ctx)
+{
+ struct spdk_bdev *base_bdev = ctx;
+ struct raid_bdev *raid_bdev;
+ uint16_t i;
+ bool found = false;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n");
+
+ /* Find the raid_bdev which has claimed this base_bdev */
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, global_link) {
+ for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
+ if (raid_bdev->base_bdev_info[i].bdev == base_bdev) {
+ found = true;
+ break;
+ }
+ }
+ if (found == true) {
+ break;
+ }
+ }
+
+ if (found == false) {
+ SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
+ return;
+ }
+
+ assert(raid_bdev != NULL);
+ assert(raid_bdev->base_bdev_info[i].bdev);
+ assert(raid_bdev->base_bdev_info[i].desc);
+ raid_bdev->base_bdev_info[i].remove_scheduled = true;
+
+ if ((raid_bdev->destruct_called == true ||
+ raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) &&
+ raid_bdev->base_bdev_info[i].bdev != NULL) {
+ /*
+ * As raid bdev is not registered yet or already unregistered, so cleanup
+ * should be done here itself
+ */
+ raid_bdev_free_base_bdev_resource(raid_bdev, i);
+ if (raid_bdev->num_base_bdevs_discovered == 0) {
+ /* Since there is no base bdev for this raid, so free the raid device */
+ raid_bdev_cleanup(raid_bdev);
+ return;
+ }
+ }
+
+ raid_bdev_deconfigure(raid_bdev);
+}
+
+/*
+ * brief:
+ * raid_bdev_add_base_device function is the actual function which either adds
+ * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
+ * the base device and keep the open descriptor.
+ * params:
+ * raid_cfg - pointer to raid bdev config
+ * bdev - pointer to base bdev
+ * base_bdev_slot - position to add base bdev
+ * returns:
+ * 0 - success
+ * non zero - failure
+ */
+static int
+raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev,
+ uint32_t base_bdev_slot)
+{
+ struct raid_bdev *raid_bdev;
+ int rc;
+
+ raid_bdev = raid_cfg->raid_bdev;
+ if (!raid_bdev) {
+ SPDK_ERRLOG("Raid bdev is not created yet '%s'\n", bdev->name);
+ return -ENODEV;
+ }
+
+ rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name);
+ return rc;
+ }
+
+ assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
+
+ if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
+ rc = raid_bdev_configure(raid_bdev);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to configure raid bdev\n");
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * brief:
+ * Add base bdevs to the raid bdev one by one. Skip any base bdev which doesn't
+ * exist or fails to add. If all base bdevs are successfully added, the raid bdev
+ * moves to the configured state and becomes available. Otherwise, the raid bdev
+ * stays at the configuring state with added base bdevs.
+ * params:
+ * raid_cfg - pointer to raid bdev config
+ * returns:
+ * 0 - The raid bdev moves to the configured state or stays at the configuring
+ * state with added base bdevs due to any nonexistent base bdev.
+ * non zero - Failed to add any base bdev and stays at the configuring state with
+ * added base bdevs.
+ */
+int
+raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
+{
+ struct spdk_bdev *base_bdev;
+ uint8_t i;
+ int rc = 0, _rc;
+
+ for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
+ base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
+ if (base_bdev == NULL) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n",
+ raid_cfg->base_bdev[i].name);
+ continue;
+ }
+
+ _rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i);
+ if (_rc != 0) {
+ SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
+ raid_cfg->base_bdev[i].name, raid_cfg->name,
+ spdk_strerror(-_rc));
+ if (rc == 0) {
+ rc = _rc;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/*
+ * brief:
+ * raid_bdev_examine function is the examine function call by the below layers
+ * like bdev_nvme layer. This function will check if this base bdev can be
+ * claimed by this raid bdev or not.
+ * params:
+ * bdev - pointer to base bdev
+ * returns:
+ * none
+ */
+static void
+raid_bdev_examine(struct spdk_bdev *bdev)
+{
+ struct raid_bdev_config *raid_cfg;
+ uint32_t base_bdev_slot;
+
+ if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
+ raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot);
+ } else {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n",
+ bdev->name);
+ }
+
+ spdk_bdev_module_examine_done(&g_raid_if);
+}
+
+/* Log component for bdev raid bdev module */
+SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID)
diff --git a/src/spdk/lib/bdev/raid/bdev_raid.h b/src/spdk/lib/bdev/raid/bdev_raid.h
new file mode 100644
index 00000000..39f055ed
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid.h
@@ -0,0 +1,225 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_RAID_INTERNAL_H
+#define SPDK_BDEV_RAID_INTERNAL_H
+
+#include "spdk/bdev_module.h"
+
+/*
+ * Raid state describes the state of the raid. This raid bdev can be either in
+ * configured list or configuring list
+ */
+enum raid_bdev_state {
+ /* raid bdev is ready and is seen by upper layers */
+ RAID_BDEV_STATE_ONLINE,
+
+ /*
+ * raid bdev is configuring, not all underlying bdevs are present.
+ * And can't be seen by upper layers.
+ */
+ RAID_BDEV_STATE_CONFIGURING,
+
+ /*
+ * In offline state, raid bdev layer will complete all incoming commands without
+ * submitting to underlying base nvme bdevs
+ */
+ RAID_BDEV_STATE_OFFLINE,
+
+ /* raid bdev max, new states should be added before this */
+ RAID_BDEV_MAX
+};
+
+/*
+ * raid_base_bdev_info contains information for the base bdevs which are part of some
+ * raid. This structure contains the per base bdev information. Whatever is
+ * required per base device for raid bdev will be kept here
+ */
+struct raid_base_bdev_info {
+ /* pointer to base spdk bdev */
+ struct spdk_bdev *bdev;
+
+ /* pointer to base bdev descriptor opened by raid bdev */
+ struct spdk_bdev_desc *desc;
+
+ /*
+ * When underlying base device calls the hot plug function on drive removal,
+ * this flag will be set and later after doing some processing, base device
+ * descriptor will be closed
+ */
+ bool remove_scheduled;
+};
+
+/*
+ * raid_bdev is the single entity structure which contains SPDK block device
+ * and the information related to any raid bdev either configured or
+ * in configuring list. io device is created on this.
+ */
+struct raid_bdev {
+ /* raid bdev device, this will get registered in bdev layer */
+ struct spdk_bdev bdev;
+
+ /* link of raid bdev to link it to configured, configuring or offline list */
+ TAILQ_ENTRY(raid_bdev) state_link;
+
+ /* link of raid bdev to link it to global raid bdev list */
+ TAILQ_ENTRY(raid_bdev) global_link;
+
+ /* pointer to config file entry */
+ struct raid_bdev_config *config;
+
+ /* array of base bdev info */
+ struct raid_base_bdev_info *base_bdev_info;
+
+ /* strip size of raid bdev in blocks */
+ uint32_t strip_size;
+
+ /* strip size bit shift for optimized calculation */
+ uint32_t strip_size_shift;
+
+ /* block length bit shift for optimized calculation */
+ uint32_t blocklen_shift;
+
+ /* state of raid bdev */
+ enum raid_bdev_state state;
+
+ /* number of base bdevs comprising raid bdev */
+ uint16_t num_base_bdevs;
+
+ /* number of base bdevs discovered */
+ uint16_t num_base_bdevs_discovered;
+
+ /* Raid Level of this raid bdev */
+ uint8_t raid_level;
+
+ /* Set to true if destruct is called for this raid bdev */
+ bool destruct_called;
+};
+
+/*
+ * raid_bdev_io is the context part of bdev_io. It contains the information
+ * related to bdev_io for a pooled bdev
+ */
+struct raid_bdev_io {
+ /* WaitQ entry, used only in waitq logic */
+ struct spdk_bdev_io_wait_entry waitq_entry;
+
+ /* Original channel for this IO, used in queuing logic */
+ struct spdk_io_channel *ch;
+
+ /* Used for tracking progress on resets sent to member disks. */
+ uint8_t base_bdev_reset_submitted;
+ uint8_t base_bdev_reset_completed;
+ uint8_t base_bdev_reset_status;
+};
+
+/*
+ * raid_base_bdev_config is the per base bdev data structure which contains
+ * information w.r.t to per base bdev during parsing config
+ */
+struct raid_base_bdev_config {
+ /* base bdev name from config file */
+ char *name;
+};
+
+/*
+ * raid_bdev_config contains the raid bdev config related information after
+ * parsing the config file
+ */
+struct raid_bdev_config {
+ /* base bdev config per underlying bdev */
+ struct raid_base_bdev_config *base_bdev;
+
+ /* Points to already created raid bdev */
+ struct raid_bdev *raid_bdev;
+
+ char *name;
+
+ /* strip size of this raid bdev in kilo bytes */
+ uint32_t strip_size;
+
+ /* number of base bdevs */
+ uint8_t num_base_bdevs;
+
+ /* raid level */
+ uint8_t raid_level;
+
+ TAILQ_ENTRY(raid_bdev_config) link;
+};
+
+/*
+ * raid_config is the top level structure representing the raid bdev config as read
+ * from config file for all raids
+ */
+struct raid_config {
+ /* raid bdev context from config file */
+ TAILQ_HEAD(, raid_bdev_config) raid_bdev_config_head;
+
+ /* total raid bdev from config file */
+ uint8_t total_raid_bdev;
+};
+
+/*
+ * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
+ * contains the relationship of raid bdev io channel with base bdev io channels.
+ */
+struct raid_bdev_io_channel {
+ /* Array of IO channels of base bdevs */
+ struct spdk_io_channel **base_channel;
+};
+
+/* TAIL heads for various raid bdev lists */
+TAILQ_HEAD(spdk_raid_configured_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_configuring_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_all_tailq, raid_bdev);
+TAILQ_HEAD(spdk_raid_offline_tailq, raid_bdev);
+
+extern struct spdk_raid_configured_tailq g_spdk_raid_bdev_configured_list;
+extern struct spdk_raid_configuring_tailq g_spdk_raid_bdev_configuring_list;
+extern struct spdk_raid_all_tailq g_spdk_raid_bdev_list;
+extern struct spdk_raid_offline_tailq g_spdk_raid_bdev_offline_list;
+extern struct raid_config g_spdk_raid_config;
+
+int raid_bdev_create(struct raid_bdev_config *raid_cfg);
+void raid_bdev_remove_base_bdev(void *ctx);
+int raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg);
+void raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint32_t slot);
+void raid_bdev_cleanup(struct raid_bdev *raid_bdev);
+int raid_bdev_config_add(const char *raid_name, int strip_size, int num_base_bdevs,
+ int raid_level, struct raid_bdev_config **_raid_cfg);
+int raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg,
+ const char *base_bdev_name, uint32_t slot);
+void raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg);
+struct raid_bdev_config *raid_bdev_config_find_by_name(const char *raid_name);
+
+#endif // SPDK_BDEV_RAID_INTERNAL_H
diff --git a/src/spdk/lib/bdev/raid/bdev_raid_rpc.c b/src/spdk/lib/bdev/raid/bdev_raid_rpc.c
new file mode 100644
index 00000000..00b3bc9d
--- /dev/null
+++ b/src/spdk/lib/bdev/raid/bdev_raid_rpc.c
@@ -0,0 +1,408 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/bdev.h"
+#include "bdev_raid.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+#include "spdk/env.h"
+
+#define RPC_MAX_BASE_BDEVS 255
+
+SPDK_LOG_REGISTER_COMPONENT("raidrpc", SPDK_LOG_RAID_RPC)
+
+/*
+ * Input structure for get_raid_bdevs RPC
+ */
+struct rpc_get_raid_bdevs {
+ /* category - all or online or configuring or offline */
+ char *category;
+};
+
+/*
+ * brief:
+ * free_rpc_get_raids function frees RPC get_raids related parameters
+ * params:
+ * req - pointer to RPC request
+ * returns:
+ * none
+ */
+static void
+free_rpc_get_raid_bdevs(struct rpc_get_raid_bdevs *req)
+{
+ free(req->category);
+}
+
+/*
+ * Decoder object for RPC get_raids
+ */
+static const struct spdk_json_object_decoder rpc_get_raid_bdevs_decoders[] = {
+ {"category", offsetof(struct rpc_get_raid_bdevs, category), spdk_json_decode_string},
+};
+
+/*
+ * brief:
+ * spdk_rpc_get_raids function is the RPC for get_raids. This is used to list
+ * all the raid bdev names based on the input category requested. Category should be
+ * one of "all", "online", "configuring" or "offline". "all" means all the raids
+ * whether they are online or configuring or offline. "online" is the raid bdev which
+ * is registered with bdev layer. "configuring" is the raid bdev which does not have
+ * full configuration discovered yet. "offline" is the raid bdev which is not
+ * registered with bdev as of now and it has encountered any error or user has
+ * requested to offline the raid.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_get_raid_bdevs(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ struct rpc_get_raid_bdevs req = {};
+ struct spdk_json_write_ctx *w;
+ struct raid_bdev *raid_bdev;
+
+ if (spdk_json_decode_object(params, rpc_get_raid_bdevs_decoders,
+ SPDK_COUNTOF(rpc_get_raid_bdevs_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_get_raid_bdevs(&req);
+ return;
+ }
+
+ if (!(strcmp(req.category, "all") == 0 ||
+ strcmp(req.category, "online") == 0 ||
+ strcmp(req.category, "configuring") == 0 ||
+ strcmp(req.category, "offline") == 0)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_get_raid_bdevs(&req);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free_rpc_get_raid_bdevs(&req);
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+
+ /* Get raid bdev list based on the category requested */
+ if (strcmp(req.category, "all") == 0) {
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, global_link) {
+ spdk_json_write_string(w, raid_bdev->bdev.name);
+ }
+ } else if (strcmp(req.category, "online") == 0) {
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configured_list, state_link) {
+ spdk_json_write_string(w, raid_bdev->bdev.name);
+ }
+ } else if (strcmp(req.category, "configuring") == 0) {
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configuring_list, state_link) {
+ spdk_json_write_string(w, raid_bdev->bdev.name);
+ }
+ } else {
+ TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_offline_list, state_link) {
+ spdk_json_write_string(w, raid_bdev->bdev.name);
+ }
+ }
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_get_raid_bdevs(&req);
+}
+SPDK_RPC_REGISTER("get_raid_bdevs", spdk_rpc_get_raid_bdevs, SPDK_RPC_RUNTIME)
+
+/*
+ * Base bdevs in RPC construct_raid
+ */
+struct rpc_construct_raid_base_bdevs {
+ /* Number of base bdevs */
+ size_t num_base_bdevs;
+
+ /* List of base bdevs names */
+ char *base_bdevs[RPC_MAX_BASE_BDEVS];
+};
+
+/*
+ * Input structure for RPC construct_raid
+ */
+struct rpc_construct_raid_bdev {
+ /* Raid bdev name */
+ char *name;
+
+ /* RAID strip size */
+ uint32_t strip_size;
+
+ /* RAID raid level */
+ uint8_t raid_level;
+
+ /* Base bdevs information */
+ struct rpc_construct_raid_base_bdevs base_bdevs;
+};
+
+/*
+ * brief:
+ * free_rpc_construct_raid_bdev function is to free RPC construct_raid_bdev related parameters
+ * params:
+ * req - pointer to RPC request
+ * returns:
+ * none
+ */
+static void
+free_rpc_construct_raid_bdev(struct rpc_construct_raid_bdev *req)
+{
+ free(req->name);
+ for (size_t i = 0; i < req->base_bdevs.num_base_bdevs; i++) {
+ free(req->base_bdevs.base_bdevs[i]);
+ }
+}
+
+/*
+ * Decoder function for RPC construct_raid_bdev to decode base bdevs list
+ */
+static int
+decode_base_bdevs(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_construct_raid_base_bdevs *base_bdevs = out;
+ return spdk_json_decode_array(val, spdk_json_decode_string, base_bdevs->base_bdevs,
+ RPC_MAX_BASE_BDEVS, &base_bdevs->num_base_bdevs, sizeof(char *));
+}
+
+/*
+ * Decoder object for RPC construct_raid
+ */
+static const struct spdk_json_object_decoder rpc_construct_raid_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_construct_raid_bdev, name), spdk_json_decode_string},
+ {"strip_size", offsetof(struct rpc_construct_raid_bdev, strip_size), spdk_json_decode_uint32},
+ {"raid_level", offsetof(struct rpc_construct_raid_bdev, raid_level), spdk_json_decode_uint32},
+ {"base_bdevs", offsetof(struct rpc_construct_raid_bdev, base_bdevs), decode_base_bdevs},
+};
+
+/*
+ * brief:
+ * spdk_rpc_construct_raid_bdev function is the RPC for construct_raids. It takes
+ * input as raid bdev name, raid level, strip size in KB and list of base bdev names.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_construct_raid_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_raid_bdev req = {};
+ struct spdk_json_write_ctx *w;
+ struct raid_bdev_config *raid_cfg;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_raid_bdev_decoders,
+ SPDK_COUNTOF(rpc_construct_raid_bdev_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ free_rpc_construct_raid_bdev(&req);
+ return;
+ }
+
+ rc = raid_bdev_config_add(req.name, req.strip_size, req.base_bdevs.num_base_bdevs, req.raid_level,
+ &raid_cfg);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to add RAID bdev config %s: %s",
+ req.name, spdk_strerror(-rc));
+ free_rpc_construct_raid_bdev(&req);
+ return;
+ }
+
+ for (size_t i = 0; i < req.base_bdevs.num_base_bdevs; i++) {
+ rc = raid_bdev_config_add_base_bdev(raid_cfg, req.base_bdevs.base_bdevs[i], i);
+ if (rc != 0) {
+ raid_bdev_config_cleanup(raid_cfg);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to add base bdev %s to RAID bdev config %s: %s",
+ req.base_bdevs.base_bdevs[i], req.name,
+ spdk_strerror(-rc));
+ free_rpc_construct_raid_bdev(&req);
+ return;
+ }
+ }
+
+ rc = raid_bdev_create(raid_cfg);
+ if (rc != 0) {
+ raid_bdev_config_cleanup(raid_cfg);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to create RAID bdev %s: %s",
+ req.name, spdk_strerror(-rc));
+ free_rpc_construct_raid_bdev(&req);
+ return;
+ }
+
+ rc = raid_bdev_add_base_devices(raid_cfg);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to add any base bdev to RAID bdev %s: %s",
+ req.name, spdk_strerror(-rc));
+ free_rpc_construct_raid_bdev(&req);
+ return;
+ }
+
+ free_rpc_construct_raid_bdev(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("construct_raid_bdev", spdk_rpc_construct_raid_bdev, SPDK_RPC_RUNTIME)
+
+/*
+ * Input structure for RPC destroy_raid
+ */
+struct rpc_destroy_raid_bdev {
+ /* raid bdev name */
+ char *name;
+};
+
+/*
+ * brief:
+ * free_rpc_destroy_raid_bdev function is used to free RPC destroy_raid_bdev related parameters
+ * params:
+ * req - pointer to RPC request
+ * params:
+ * none
+ */
+static void
+free_rpc_destroy_raid_bdev(struct rpc_destroy_raid_bdev *req)
+{
+ free(req->name);
+}
+
+/*
+ * Decoder object for RPC destroy_raid
+ */
+static const struct spdk_json_object_decoder rpc_destroy_raid_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_destroy_raid_bdev, name), spdk_json_decode_string},
+};
+
+/*
+ * brief:
+ * Since destroying raid_bdev is asynchronous operation, so this function is
+ * used to check if raid bdev still exists. If raid bdev is still there it will create
+ * event and check later, otherwise it will proceed with cleanup
+ * params:
+ * arg - pointer to raid bdev cfg
+ * returns:
+ * none
+ */
+static void
+raid_bdev_config_destroy(void *arg)
+{
+ struct raid_bdev_config *raid_cfg = arg;
+
+ assert(raid_cfg != NULL);
+ if (raid_cfg->raid_bdev != NULL) {
+ /*
+ * If raid bdev exists for this config, wait for raid bdev to get
+ * destroyed and come back later
+ */
+ spdk_thread_send_msg(spdk_get_thread(), raid_bdev_config_destroy,
+ raid_cfg);
+ } else {
+ raid_bdev_config_cleanup(raid_cfg);
+ }
+}
+
+/*
+ * brief:
+ * spdk_rpc_destroy_raid_bdev function is the RPC for destroy_raid. It takes raid
+ * name as input and destroy that raid bdev including freeing the base bdev
+ * resources.
+ * params:
+ * requuest - pointer to json rpc request
+ * params - pointer to request parameters
+ * returns:
+ * none
+ */
+static void
+spdk_rpc_destroy_raid_bdev(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ struct rpc_destroy_raid_bdev req = {};
+ struct spdk_json_write_ctx *w;
+ struct raid_bdev_config *raid_cfg = NULL;
+ struct spdk_bdev *base_bdev;
+
+ if (spdk_json_decode_object(params, rpc_destroy_raid_bdev_decoders,
+ SPDK_COUNTOF(rpc_destroy_raid_bdev_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid parameters");
+ free_rpc_destroy_raid_bdev(&req);
+ return;
+ }
+
+ raid_cfg = raid_bdev_config_find_by_name(req.name);
+ if (raid_cfg == NULL) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "raid bdev %s is not found in config", req.name);
+ free_rpc_destroy_raid_bdev(&req);
+ return;
+ }
+
+ /* Remove all the base bdevs from this raid bdev before destroying the raid bdev */
+ for (uint32_t i = 0; i < raid_cfg->num_base_bdevs; i++) {
+ base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
+ if (base_bdev != NULL) {
+ raid_bdev_remove_base_bdev(base_bdev);
+ }
+ }
+
+ raid_bdev_config_destroy(raid_cfg);
+
+ free_rpc_destroy_raid_bdev(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("destroy_raid_bdev", spdk_rpc_destroy_raid_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/rbd/Makefile b/src/spdk/lib/bdev/rbd/Makefile
new file mode 100644
index 00000000..e7c97aca
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_rbd.c bdev_rbd_rpc.c
+LIBNAME = bdev_rbd
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd.c b/src/spdk/lib/bdev/rbd/bdev_rbd.c
new file mode 100644
index 00000000..34c2466b
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd.c
@@ -0,0 +1,740 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_rbd.h"
+
+#include <rbd/librbd.h>
+#include <rados/librados.h>
+#include <sys/eventfd.h>
+
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/bdev.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#define SPDK_RBD_QUEUE_DEPTH 128
+
+static int bdev_rbd_count = 0;
+
+#define BDEV_RBD_POLL_US 50
+
+struct bdev_rbd {
+ struct spdk_bdev disk;
+ char *rbd_name;
+ char *pool_name;
+ rbd_image_info_t info;
+ TAILQ_ENTRY(bdev_rbd) tailq;
+ struct spdk_poller *reset_timer;
+ struct spdk_bdev_io *reset_bdev_io;
+};
+
+struct bdev_rbd_io_channel {
+ rados_ioctx_t io_ctx;
+ rados_t cluster;
+ struct pollfd pfd;
+ rbd_image_t image;
+ struct bdev_rbd *disk;
+ struct spdk_poller *poller;
+};
+
+struct bdev_rbd_io {
+ uint64_t remaining_len;
+ int num_segments;
+ bool failed;
+};
+
+static void
+bdev_rbd_free(struct bdev_rbd *rbd)
+{
+ if (!rbd) {
+ return;
+ }
+
+ free(rbd->disk.name);
+ free(rbd->rbd_name);
+ free(rbd->pool_name);
+ free(rbd);
+}
+
+static int
+bdev_rados_context_init(const char *rbd_pool_name, rados_t *cluster,
+ rados_ioctx_t *io_ctx)
+{
+ int ret;
+
+ ret = rados_create(cluster, NULL);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados_t struct\n");
+ return -1;
+ }
+
+ ret = rados_conf_read_file(*cluster, NULL);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to read conf file\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+
+ ret = rados_connect(*cluster);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to connect to rbd_pool\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+
+ ret = rados_ioctx_create(*cluster, rbd_pool_name, io_ctx);
+
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create ioctx\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+bdev_rbd_init(const char *rbd_pool_name, const char *rbd_name, rbd_image_info_t *info)
+{
+ int ret;
+ rados_t cluster = NULL;
+ rados_ioctx_t io_ctx = NULL;
+ rbd_image_t image = NULL;
+
+ ret = bdev_rados_context_init(rbd_pool_name, &cluster, &io_ctx);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados context for rbd_pool=%s\n",
+ rbd_pool_name);
+ return -1;
+ }
+
+ ret = rbd_open(io_ctx, rbd_name, &image, NULL);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to open specified rbd device\n");
+ goto err;
+ }
+ ret = rbd_stat(image, info, sizeof(*info));
+ rbd_close(image);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to stat specified rbd device\n");
+ goto err;
+ }
+
+ rados_ioctx_destroy(io_ctx);
+ return 0;
+err:
+ rados_ioctx_destroy(io_ctx);
+ rados_shutdown(cluster);
+ return -1;
+}
+
+static void
+bdev_rbd_exit(rbd_image_t image)
+{
+ rbd_flush(image);
+ rbd_close(image);
+}
+
+static void
+bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
+{
+ /* Doing nothing here */
+}
+
+static int
+bdev_rbd_start_aio(rbd_image_t image, struct spdk_bdev_io *bdev_io,
+ void *buf, uint64_t offset, size_t len)
+{
+ int ret;
+ rbd_completion_t comp;
+
+ ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
+ &comp);
+ if (ret < 0) {
+ return -1;
+ }
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ ret = rbd_aio_read(image, offset, len,
+ buf, comp);
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+ ret = rbd_aio_write(image, offset, len,
+ buf, comp);
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
+ ret = rbd_aio_flush(image, comp);
+ }
+
+ if (ret < 0) {
+ rbd_aio_release(comp);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bdev_rbd_library_init(void);
+
+static int
+bdev_rbd_get_ctx_size(void)
+{
+ return sizeof(struct bdev_rbd_io);
+}
+
+static struct spdk_bdev_module rbd_if = {
+ .name = "rbd",
+ .module_init = bdev_rbd_library_init,
+ .get_ctx_size = bdev_rbd_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(&rbd_if)
+
+static int64_t
+bdev_rbd_rw(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+ struct spdk_bdev_io *bdev_io, struct iovec *iov,
+ int iovcnt, size_t len, uint64_t offset)
+{
+ struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+ struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+ size_t remaining = len;
+ int i, rc;
+
+ rbd_io->remaining_len = 0;
+ rbd_io->num_segments = 0;
+ rbd_io->failed = false;
+
+ for (i = 0; i < iovcnt && remaining > 0; i++) {
+ size_t seg_len = spdk_min(remaining, iov[i].iov_len);
+
+ rc = bdev_rbd_start_aio(rbdio_ch->image, bdev_io, iov[i].iov_base, offset, seg_len);
+ if (rc) {
+ /*
+ * This bdev_rbd_start_aio() call failed, but if any previous ones were
+ * submitted, we need to wait for them to finish.
+ */
+ if (rbd_io->num_segments == 0) {
+ /* No previous I/O submitted - return error code immediately. */
+ return rc;
+ }
+
+ /* Return and wait for outstanding I/O to complete. */
+ rbd_io->failed = true;
+ return 0;
+ }
+
+ rbd_io->num_segments++;
+ rbd_io->remaining_len += seg_len;
+
+ offset += seg_len;
+ remaining -= seg_len;
+ }
+
+ return 0;
+}
+
+static int64_t
+bdev_rbd_flush(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+ struct spdk_bdev_io *bdev_io, uint64_t offset, uint64_t nbytes)
+{
+ struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+
+ return bdev_rbd_start_aio(rbdio_ch->image, bdev_io, NULL, offset, nbytes);
+}
+
+static int
+bdev_rbd_reset_timer(void *arg)
+{
+ struct bdev_rbd *disk = arg;
+
+ /*
+ * TODO: This should check if any I/O is still in flight before completing the reset.
+ * For now, just complete after the timer expires.
+ */
+ spdk_bdev_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ spdk_poller_unregister(&disk->reset_timer);
+ disk->reset_bdev_io = NULL;
+
+ return -1;
+}
+
+static int
+bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io)
+{
+ /*
+ * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
+ * timer to wait for in-flight I/O to complete.
+ */
+ assert(disk->reset_bdev_io == NULL);
+ disk->reset_bdev_io = bdev_io;
+ disk->reset_timer = spdk_poller_register(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000);
+
+ return 0;
+}
+
+static int
+bdev_rbd_destruct(void *ctx)
+{
+ struct bdev_rbd *rbd = ctx;
+
+ spdk_io_device_unregister(rbd, NULL);
+
+ bdev_rbd_free(rbd);
+ return 0;
+}
+
+static void bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ int ret;
+
+ ret = bdev_rbd_rw(bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+ if (ret != 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static int _bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ return bdev_rbd_rw((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return bdev_rbd_flush((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ bdev_io);
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+static void bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_rbd_submit_request(ch, bdev_io) < 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_rbd_io_poll(void *arg)
+{
+ struct bdev_rbd_io_channel *ch = arg;
+ int i, io_status, rc;
+ rbd_completion_t comps[SPDK_RBD_QUEUE_DEPTH];
+ struct spdk_bdev_io *bdev_io;
+ struct bdev_rbd_io *rbd_io;
+
+ rc = poll(&ch->pfd, 1, 0);
+
+ /* check the return value of poll since we have only one fd for each channel */
+ if (rc != 1) {
+ return 0;
+ }
+
+ rc = rbd_poll_io_events(ch->image, comps, SPDK_RBD_QUEUE_DEPTH);
+ for (i = 0; i < rc; i++) {
+ bdev_io = rbd_aio_get_arg(comps[i]);
+ rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+ io_status = rbd_aio_get_return_value(comps[i]);
+
+ assert(rbd_io->num_segments > 0);
+ rbd_io->num_segments--;
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ if (io_status > 0) {
+ /* For reads, io_status is the length */
+ rbd_io->remaining_len -= io_status;
+ }
+
+ if (rbd_io->num_segments == 0 && rbd_io->remaining_len != 0) {
+ rbd_io->failed = true;
+ }
+ } else {
+ /* For others, 0 means success */
+ if (io_status != 0) {
+ rbd_io->failed = true;
+ }
+ }
+
+ rbd_aio_release(comps[i]);
+
+ if (rbd_io->num_segments == 0) {
+ spdk_bdev_io_complete(bdev_io,
+ rbd_io->failed ? SPDK_BDEV_IO_STATUS_FAILED : SPDK_BDEV_IO_STATUS_SUCCESS);
+ }
+ }
+
+ return rc;
+}
+
+static void
+bdev_rbd_free_channel(struct bdev_rbd_io_channel *ch)
+{
+ if (!ch) {
+ return;
+ }
+
+ if (ch->image) {
+ bdev_rbd_exit(ch->image);
+ }
+
+ if (ch->io_ctx) {
+ rados_ioctx_destroy(ch->io_ctx);
+ }
+
+ if (ch->cluster) {
+ rados_shutdown(ch->cluster);
+ }
+
+ if (ch->pfd.fd >= 0) {
+ close(ch->pfd.fd);
+ }
+}
+
+static void *
+bdev_rbd_handle(void *arg)
+{
+ struct bdev_rbd_io_channel *ch = arg;
+ void *ret = arg;
+
+ if (rbd_open(ch->io_ctx, ch->disk->rbd_name, &ch->image, NULL) < 0) {
+ SPDK_ERRLOG("Failed to open specified rbd device\n");
+ ret = NULL;
+ }
+
+ return ret;
+}
+
+static int
+bdev_rbd_create_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_rbd_io_channel *ch = ctx_buf;
+ int ret;
+
+ ch->disk = io_device;
+ ch->image = NULL;
+ ch->io_ctx = NULL;
+ ch->pfd.fd = -1;
+
+ ret = bdev_rados_context_init(ch->disk->pool_name, &ch->cluster, &ch->io_ctx);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados context for rbd_pool=%s\n",
+ ch->disk->pool_name);
+ goto err;
+ }
+
+ if (spdk_call_unaffinitized(bdev_rbd_handle, ch) == NULL) {
+ goto err;
+ }
+
+ ch->pfd.fd = eventfd(0, EFD_NONBLOCK);
+ if (ch->pfd.fd < 0) {
+ SPDK_ERRLOG("Failed to get eventfd\n");
+ goto err;
+ }
+
+ ch->pfd.events = POLLIN;
+ ret = rbd_set_image_notification(ch->image, ch->pfd.fd, EVENT_TYPE_EVENTFD);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to set rbd image notification\n");
+ goto err;
+ }
+
+ ch->poller = spdk_poller_register(bdev_rbd_io_poll, ch, BDEV_RBD_POLL_US);
+
+ return 0;
+
+err:
+ bdev_rbd_free_channel(ch);
+ return -1;
+}
+
+static void
+bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_rbd_io_channel *io_channel = ctx_buf;
+
+ bdev_rbd_free_channel(io_channel);
+
+ spdk_poller_unregister(&io_channel->poller);
+}
+
+static struct spdk_io_channel *
+bdev_rbd_get_io_channel(void *ctx)
+{
+ struct bdev_rbd *rbd_bdev = ctx;
+
+ return spdk_get_io_channel(rbd_bdev);
+}
+
+static int
+bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct bdev_rbd *rbd_bdev = ctx;
+
+ spdk_json_write_name(w, "rbd");
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "pool_name");
+ spdk_json_write_string(w, rbd_bdev->pool_name);
+
+ spdk_json_write_name(w, "rbd_name");
+ spdk_json_write_string(w, rbd_bdev->rbd_name);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct bdev_rbd *rbd = bdev->ctxt;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_rbd_bdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
+ spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
+ spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table rbd_fn_table = {
+ .destruct = bdev_rbd_destruct,
+ .submit_request = bdev_rbd_submit_request,
+ .io_type_supported = bdev_rbd_io_type_supported,
+ .get_io_channel = bdev_rbd_get_io_channel,
+ .dump_info_json = bdev_rbd_dump_info_json,
+ .write_config_json = bdev_rbd_write_config_json,
+};
+
+struct spdk_bdev *
+spdk_bdev_rbd_create(const char *name, const char *pool_name, const char *rbd_name,
+ uint32_t block_size)
+{
+ struct bdev_rbd *rbd;
+ int ret;
+
+ if ((pool_name == NULL) || (rbd_name == NULL)) {
+ return NULL;
+ }
+
+ rbd = calloc(1, sizeof(struct bdev_rbd));
+ if (rbd == NULL) {
+ SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
+ return NULL;
+ }
+
+ rbd->rbd_name = strdup(rbd_name);
+ if (!rbd->rbd_name) {
+ bdev_rbd_free(rbd);
+ return NULL;
+ }
+
+ rbd->pool_name = strdup(pool_name);
+ if (!rbd->pool_name) {
+ bdev_rbd_free(rbd);
+ return NULL;
+ }
+
+ ret = bdev_rbd_init(rbd->pool_name, rbd_name, &rbd->info);
+ if (ret < 0) {
+ bdev_rbd_free(rbd);
+ SPDK_ERRLOG("Failed to init rbd device\n");
+ return NULL;
+ }
+
+ if (name) {
+ rbd->disk.name = strdup(name);
+ } else {
+ rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
+ }
+ if (!rbd->disk.name) {
+ bdev_rbd_free(rbd);
+ return NULL;
+ }
+ rbd->disk.product_name = "Ceph Rbd Disk";
+ bdev_rbd_count++;
+
+ rbd->disk.write_cache = 0;
+ rbd->disk.blocklen = block_size;
+ rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
+ rbd->disk.ctxt = rbd;
+ rbd->disk.fn_table = &rbd_fn_table;
+ rbd->disk.module = &rbd_if;
+
+ SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
+
+ spdk_io_device_register(rbd, bdev_rbd_create_cb,
+ bdev_rbd_destroy_cb,
+ sizeof(struct bdev_rbd_io_channel),
+ rbd_name);
+ ret = spdk_bdev_register(&rbd->disk);
+ if (ret) {
+ spdk_io_device_unregister(rbd, NULL);
+ bdev_rbd_free(rbd);
+ return NULL;
+ }
+
+ return &rbd->disk;
+}
+
+void
+spdk_bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &rbd_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+static int
+bdev_rbd_library_init(void)
+{
+ int i, rc = 0;
+ const char *val;
+ const char *pool_name;
+ const char *rbd_name;
+ uint32_t block_size;
+
+ struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Ceph");
+
+ if (sp == NULL) {
+ /*
+ * Ceph section not found. Do not initialize any rbd LUNS.
+ */
+ goto end;
+ }
+
+ /* Init rbd block devices */
+ for (i = 0; ; i++) {
+ val = spdk_conf_section_get_nval(sp, "Ceph", i);
+ if (val == NULL) {
+ break;
+ }
+
+ /* get the Rbd_pool name */
+ pool_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 0);
+ if (pool_name == NULL) {
+ SPDK_ERRLOG("Ceph%d: rbd pool name needs to be provided\n", i);
+ rc = -1;
+ goto end;
+ }
+
+ rbd_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 1);
+ if (rbd_name == NULL) {
+ SPDK_ERRLOG("Ceph%d: format error\n", i);
+ rc = -1;
+ goto end;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "Ceph", i, 2);
+
+ if (val == NULL) {
+ block_size = 512; /* default value */
+ } else {
+ block_size = (int)strtol(val, NULL, 10);
+ if (block_size & 0x1ff) {
+ SPDK_ERRLOG("current block_size = %d, it should be multiple of 512\n",
+ block_size);
+ rc = -1;
+ goto end;
+ }
+ }
+
+ if (spdk_bdev_rbd_create(NULL, pool_name, rbd_name, block_size) == NULL) {
+ rc = -1;
+ goto end;
+ }
+ }
+
+end:
+ return rc;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_rbd", SPDK_LOG_BDEV_RBD)
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd.h b/src/spdk/lib/bdev/rbd/bdev_rbd.h
new file mode 100644
index 00000000..dd2448e1
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_RBD_H
+#define SPDK_BDEV_RBD_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno);
+
+struct spdk_bdev *spdk_bdev_rbd_create(const char *name, const char *pool_name,
+ const char *rbd_name, uint32_t block_size);
+/**
+ * Delete rbd bdev.
+ *
+ * \param bdev Pointer to rbd bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void spdk_bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn,
+ void *cb_arg);
+
+#endif // SPDK_BDEV_RBD_H
diff --git a/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c b/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c
new file mode 100644
index 00000000..745a90ed
--- /dev/null
+++ b/src/spdk/lib/bdev/rbd/bdev_rbd_rpc.c
@@ -0,0 +1,157 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_rbd.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_rbd {
+ char *name;
+ char *pool_name;
+ char *rbd_name;
+ uint32_t block_size;
+};
+
+static void
+free_rpc_construct_rbd(struct rpc_construct_rbd *req)
+{
+ free(req->name);
+ free(req->pool_name);
+ free(req->rbd_name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_rbd_decoders[] = {
+ {"name", offsetof(struct rpc_construct_rbd, name), spdk_json_decode_string, true},
+ {"pool_name", offsetof(struct rpc_construct_rbd, pool_name), spdk_json_decode_string},
+ {"rbd_name", offsetof(struct rpc_construct_rbd, rbd_name), spdk_json_decode_string},
+ {"block_size", offsetof(struct rpc_construct_rbd, block_size), spdk_json_decode_uint32},
+};
+
+static void
+spdk_rpc_construct_rbd_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_rbd req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_construct_rbd_decoders,
+ SPDK_COUNTOF(rpc_construct_rbd_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RBD, "spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_rbd_create(req.name, req.pool_name, req.rbd_name, req.block_size);
+ if (bdev == NULL) {
+ goto invalid;
+ }
+
+ free_rpc_construct_rbd(&req);
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_construct_rbd(&req);
+}
+SPDK_RPC_REGISTER("construct_rbd_bdev", spdk_rpc_construct_rbd_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_rbd {
+ char *name;
+};
+
+static void
+free_rpc_delete_rbd(struct rpc_delete_rbd *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_rbd_decoders[] = {
+ {"name", offsetof(struct rpc_delete_rbd, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_rbd_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_rbd_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_rbd req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_delete_rbd_decoders,
+ SPDK_COUNTOF(rpc_delete_rbd_decoders),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ rc = -ENODEV;
+ goto invalid;
+ }
+
+ spdk_bdev_rbd_delete(bdev, _spdk_rpc_delete_rbd_bdev_cb, request);
+ free_rpc_delete_rbd(&req);
+ return;
+
+invalid:
+ free_rpc_delete_rbd(&req);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("delete_rbd_bdev", spdk_rpc_delete_rbd_bdev, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/rpc/Makefile b/src/spdk/lib/bdev/rpc/Makefile
new file mode 100644
index 00000000..4c1fcc0c
--- /dev/null
+++ b/src/spdk/lib/bdev/rpc/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_rpc.c
+LIBNAME = bdev_rpc
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/rpc/bdev_rpc.c b/src/spdk/lib/bdev/rpc/bdev_rpc.c
new file mode 100644
index 00000000..1989f6d2
--- /dev/null
+++ b/src/spdk/lib/bdev/rpc/bdev_rpc.c
@@ -0,0 +1,587 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+
+struct rpc_get_bdevs_iostat_ctx {
+ int bdev_count;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_json_write_ctx *w;
+};
+
+static void
+spdk_rpc_get_bdevs_iostat_cb(struct spdk_bdev *bdev,
+ struct spdk_bdev_io_stat *stat, void *cb_arg, int rc)
+{
+ struct rpc_get_bdevs_iostat_ctx *ctx = cb_arg;
+ struct spdk_json_write_ctx *w = ctx->w;
+ const char *bdev_name;
+
+ if (rc != 0) {
+ goto done;
+ }
+
+ bdev_name = spdk_bdev_get_name(bdev);
+ if (bdev_name != NULL) {
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "name");
+ spdk_json_write_string(w, bdev_name);
+
+ spdk_json_write_name(w, "bytes_read");
+ spdk_json_write_uint64(w, stat->bytes_read);
+
+ spdk_json_write_name(w, "num_read_ops");
+ spdk_json_write_uint64(w, stat->num_read_ops);
+
+ spdk_json_write_name(w, "bytes_written");
+ spdk_json_write_uint64(w, stat->bytes_written);
+
+ spdk_json_write_name(w, "num_write_ops");
+ spdk_json_write_uint64(w, stat->num_write_ops);
+
+ spdk_json_write_name(w, "read_latency_ticks");
+ spdk_json_write_uint64(w, stat->read_latency_ticks);
+
+ spdk_json_write_name(w, "write_latency_ticks");
+ spdk_json_write_uint64(w, stat->write_latency_ticks);
+
+ if (spdk_bdev_get_qd_sampling_period(bdev)) {
+ spdk_json_write_name(w, "queue_depth_polling_period");
+ spdk_json_write_uint64(w, spdk_bdev_get_qd_sampling_period(bdev));
+
+ spdk_json_write_name(w, "queue_depth");
+ spdk_json_write_uint64(w, spdk_bdev_get_qd(bdev));
+
+ spdk_json_write_name(w, "io_time");
+ spdk_json_write_uint64(w, spdk_bdev_get_io_time(bdev));
+
+ spdk_json_write_name(w, "weighted_io_time");
+ spdk_json_write_uint64(w, spdk_bdev_get_weighted_io_time(bdev));
+ }
+
+ spdk_json_write_object_end(w);
+ }
+
+done:
+ free(stat);
+ if (--ctx->bdev_count == 0) {
+ spdk_json_write_array_end(ctx->w);
+ spdk_jsonrpc_end_result(ctx->request, ctx->w);
+ free(ctx);
+ }
+}
+
+struct rpc_get_bdevs_iostat {
+ char *name;
+};
+
+static void
+free_rpc_get_bdevs_iostat(struct rpc_get_bdevs_iostat *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_bdevs_iostat_decoders[] = {
+ {"name", offsetof(struct rpc_get_bdevs_iostat, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_bdevs_iostat(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_bdevs_iostat req = {};
+ struct spdk_bdev *bdev = NULL;
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev_io_stat *stat;
+ struct rpc_get_bdevs_iostat_ctx *ctx;
+
+ if (params != NULL) {
+ if (spdk_json_decode_object(params, rpc_get_bdevs_iostat_decoders,
+ SPDK_COUNTOF(rpc_get_bdevs_iostat_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.name) {
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ goto invalid;
+ }
+ }
+ }
+
+ free_rpc_get_bdevs_iostat(&req);
+
+ ctx = calloc(1, sizeof(struct rpc_get_bdevs_iostat_ctx));
+ if (ctx == NULL) {
+ SPDK_ERRLOG("Failed to allocate rpc_get_bdevs_iostat_ctx struct\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "No memory left");
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ free(ctx);
+ return;
+ }
+
+ /*
+ * Increment initial bdev_count so that it will never reach 0 in the middle
+ * of iterating.
+ */
+ ctx->bdev_count++;
+ ctx->request = request;
+ ctx->w = w;
+
+ spdk_json_write_array_begin(w);
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_name(w, "tick_rate");
+ spdk_json_write_uint64(w, spdk_get_ticks_hz());
+ spdk_json_write_object_end(w);
+
+ if (bdev != NULL) {
+ stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
+ if (stat == NULL) {
+ SPDK_ERRLOG("Failed to allocate rpc_get_bdevs_iostat_ctx struct\n");
+ } else {
+ ctx->bdev_count++;
+ spdk_bdev_get_device_stat(bdev, stat, spdk_rpc_get_bdevs_iostat_cb, ctx);
+ }
+ } else {
+ for (bdev = spdk_bdev_first(); bdev != NULL; bdev = spdk_bdev_next(bdev)) {
+ stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
+ if (stat == NULL) {
+ SPDK_ERRLOG("Failed to allocate spdk_bdev_io_stat struct\n");
+ break;
+ }
+ ctx->bdev_count++;
+ spdk_bdev_get_device_stat(bdev, stat, spdk_rpc_get_bdevs_iostat_cb, ctx);
+ }
+ }
+
+ if (--ctx->bdev_count == 0) {
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+ free(ctx);
+ }
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+ free_rpc_get_bdevs_iostat(&req);
+}
+SPDK_RPC_REGISTER("get_bdevs_iostat", spdk_rpc_get_bdevs_iostat, SPDK_RPC_RUNTIME)
+
+static void
+spdk_rpc_dump_bdev_info(struct spdk_json_write_ctx *w,
+ struct spdk_bdev *bdev)
+{
+ struct spdk_bdev_alias *tmp;
+ uint64_t qos_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+ int i;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "name");
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+
+ spdk_json_write_name(w, "aliases");
+ spdk_json_write_array_begin(w);
+
+ TAILQ_FOREACH(tmp, spdk_bdev_get_aliases(bdev), tailq) {
+ spdk_json_write_string(w, tmp->alias);
+ }
+
+ spdk_json_write_array_end(w);
+
+ spdk_json_write_name(w, "product_name");
+ spdk_json_write_string(w, spdk_bdev_get_product_name(bdev));
+
+ spdk_json_write_name(w, "block_size");
+ spdk_json_write_uint32(w, spdk_bdev_get_block_size(bdev));
+
+ spdk_json_write_name(w, "num_blocks");
+ spdk_json_write_uint64(w, spdk_bdev_get_num_blocks(bdev));
+
+ if (!spdk_mem_all_zero(&bdev->uuid, sizeof(bdev->uuid))) {
+ char uuid_str[SPDK_UUID_STRING_LEN];
+
+ spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
+ spdk_json_write_named_string(w, "uuid", uuid_str);
+ }
+
+ spdk_json_write_name(w, "assigned_rate_limits");
+ spdk_json_write_object_begin(w);
+ spdk_bdev_get_qos_rate_limits(bdev, qos_limits);
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ spdk_json_write_name(w, spdk_bdev_get_qos_rpc_type(i));
+ spdk_json_write_uint64(w, qos_limits[i]);
+ }
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_name(w, "claimed");
+ spdk_json_write_bool(w, (bdev->internal.claim_module != NULL));
+
+ spdk_json_write_name(w, "supported_io_types");
+ spdk_json_write_object_begin(w);
+ spdk_json_write_name(w, "read");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ));
+ spdk_json_write_name(w, "write");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE));
+ spdk_json_write_name(w, "unmap");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP));
+ spdk_json_write_name(w, "write_zeroes");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES));
+ spdk_json_write_name(w, "flush");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH));
+ spdk_json_write_name(w, "reset");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_RESET));
+ spdk_json_write_name(w, "nvme_admin");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN));
+ spdk_json_write_name(w, "nvme_io");
+ spdk_json_write_bool(w, spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO));
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_name(w, "driver_specific");
+ spdk_json_write_object_begin(w);
+ spdk_bdev_dump_info_json(bdev, w);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+struct rpc_get_bdevs {
+ char *name;
+};
+
+static void
+free_rpc_get_bdevs(struct rpc_get_bdevs *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_get_bdevs_decoders[] = {
+ {"name", offsetof(struct rpc_get_bdevs, name), spdk_json_decode_string, true},
+};
+
+static void
+spdk_rpc_get_bdevs(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_get_bdevs req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev = NULL;
+
+ if (params && spdk_json_decode_object(params, rpc_get_bdevs_decoders,
+ SPDK_COUNTOF(rpc_get_bdevs_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.name) {
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ goto invalid;
+ }
+ }
+
+ free_rpc_get_bdevs(&req);
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_array_begin(w);
+
+ if (bdev != NULL) {
+ spdk_rpc_dump_bdev_info(w, bdev);
+ } else {
+ for (bdev = spdk_bdev_first(); bdev != NULL; bdev = spdk_bdev_next(bdev)) {
+ spdk_rpc_dump_bdev_info(w, bdev);
+ }
+ }
+
+ spdk_json_write_array_end(w);
+
+ spdk_jsonrpc_end_result(request, w);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+
+ free_rpc_get_bdevs(&req);
+}
+SPDK_RPC_REGISTER("get_bdevs", spdk_rpc_get_bdevs, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_bdev {
+ char *name;
+};
+
+static void
+free_rpc_delete_bdev(struct rpc_delete_bdev *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_delete_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_delete_bdev, name), spdk_json_decode_string},
+};
+
+static void
+_spdk_rpc_delete_bdev_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_delete_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_delete_bdev req = {};
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_delete_bdev_decoders,
+ SPDK_COUNTOF(rpc_delete_bdev_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.name == NULL) {
+ SPDK_ERRLOG("missing name param\n");
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ goto invalid;
+ }
+
+ spdk_bdev_unregister(bdev, _spdk_rpc_delete_bdev_cb, request);
+
+ free_rpc_delete_bdev(&req);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_delete_bdev(&req);
+}
+SPDK_RPC_REGISTER("delete_bdev", spdk_rpc_delete_bdev, SPDK_RPC_RUNTIME)
+
+struct rpc_set_bdev_qd_sampling_period {
+ char *name;
+ uint64_t period;
+};
+
+static void
+free_rpc_set_bdev_qd_sampling_period(struct rpc_set_bdev_qd_sampling_period *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder
+ rpc_set_bdev_qd_sampling_period_decoders[] = {
+ {"name", offsetof(struct rpc_set_bdev_qd_sampling_period, name), spdk_json_decode_string},
+ {"period", offsetof(struct rpc_set_bdev_qd_sampling_period, period), spdk_json_decode_uint64},
+};
+
+static void
+spdk_rpc_set_bdev_qd_sampling_period(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_set_bdev_qd_sampling_period req = {0};
+ struct spdk_bdev *bdev;
+ struct spdk_json_write_ctx *w;
+
+ req.period = UINT64_MAX;
+
+ if (spdk_json_decode_object(params, rpc_set_bdev_qd_sampling_period_decoders,
+ SPDK_COUNTOF(rpc_set_bdev_qd_sampling_period_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ if (req.name) {
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ goto invalid;
+ }
+ } else {
+ SPDK_ERRLOG("Missing name param\n");
+ goto invalid;
+ }
+
+ if (req.period == UINT64_MAX) {
+ SPDK_ERRLOG("Missing period param");
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_bdev_set_qd_sampling_period(bdev, req.period);
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+ free_rpc_set_bdev_qd_sampling_period(&req);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ free_rpc_set_bdev_qd_sampling_period(&req);
+ return;
+}
+SPDK_RPC_REGISTER("set_bdev_qd_sampling_period",
+ spdk_rpc_set_bdev_qd_sampling_period,
+ SPDK_RPC_RUNTIME)
+
+struct rpc_set_bdev_qos_limit {
+ char *name;
+ uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
+};
+
+static void
+free_rpc_set_bdev_qos_limit(struct rpc_set_bdev_qos_limit *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_set_bdev_qos_limit_decoders[] = {
+ {"name", offsetof(struct rpc_set_bdev_qos_limit, name), spdk_json_decode_string},
+ {
+ "rw_ios_per_sec", offsetof(struct rpc_set_bdev_qos_limit,
+ limits[SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT]),
+ spdk_json_decode_uint64, true
+ },
+ {
+ "rw_mbytes_per_sec", offsetof(struct rpc_set_bdev_qos_limit,
+ limits[SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT]),
+ spdk_json_decode_uint64, true
+ },
+};
+
+static void
+spdk_rpc_set_bdev_qos_limit_complete(void *cb_arg, int status)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ if (status != 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Failed to configure rate limit: %s",
+ spdk_strerror(-status));
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_set_bdev_qos_limit(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_set_bdev_qos_limit req = {NULL, {UINT64_MAX, UINT64_MAX}};
+ struct spdk_bdev *bdev;
+ bool valid_limit = false;
+ int i;
+
+ if (spdk_json_decode_object(params, rpc_set_bdev_qos_limit_decoders,
+ SPDK_COUNTOF(rpc_set_bdev_qos_limit_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ goto invalid;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Bdev does not exist");
+ goto exit;
+ }
+
+ for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
+ if (req.limits[i] != UINT64_MAX) {
+ valid_limit = true;
+ }
+ }
+
+ if (valid_limit == false) {
+ SPDK_ERRLOG("no rate limits specified\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "No rate limits specified");
+ goto exit;
+ }
+
+ free_rpc_set_bdev_qos_limit(&req);
+ spdk_bdev_set_qos_rate_limits(bdev, req.limits, spdk_rpc_set_bdev_qos_limit_complete, request);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+exit:
+ free_rpc_set_bdev_qos_limit(&req);
+}
+
+SPDK_RPC_REGISTER("set_bdev_qos_limit", spdk_rpc_set_bdev_qos_limit, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/scsi_nvme.c b/src/spdk/lib/bdev/scsi_nvme.c
new file mode 100644
index 00000000..385b9036
--- /dev/null
+++ b/src/spdk/lib/bdev/scsi_nvme.c
@@ -0,0 +1,261 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016 FUJITSU LIMITED, All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/bdev_module.h"
+
+#include "spdk/nvme_spec.h"
+
+void
+spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io, int *sc, int *sk,
+ int *asc, int *ascq)
+{
+ int nvme_sct = bdev_io->internal.error.nvme.sct;
+ int nvme_sc = bdev_io->internal.error.nvme.sc;
+
+ switch (nvme_sct) {
+ case SPDK_NVME_SCT_GENERIC:
+ switch (nvme_sc) {
+ case SPDK_NVME_SC_SUCCESS:
+ *sc = SPDK_SCSI_STATUS_GOOD;
+ *sk = SPDK_SCSI_SENSE_NO_SENSE;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_INVALID_OPCODE:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_INVALID_COMMAND_OPERATION_CODE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_INVALID_FIELD:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_DATA_TRANSFER_ERROR:
+ case SPDK_NVME_SC_CAPACITY_EXCEEDED:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_ABORTED_POWER_LOSS:
+ *sc = SPDK_SCSI_STATUS_TASK_ABORTED;
+ *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+ *asc = SPDK_SCSI_ASC_WARNING;
+ *ascq = SPDK_SCSI_ASCQ_POWER_LOSS_EXPECTED;
+ break;
+ case SPDK_NVME_SC_INTERNAL_DEVICE_ERROR:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_HARDWARE_ERROR;
+ *asc = SPDK_SCSI_ASC_INTERNAL_TARGET_FAILURE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_ABORTED_BY_REQUEST:
+ case SPDK_NVME_SC_ABORTED_SQ_DELETION:
+ case SPDK_NVME_SC_ABORTED_FAILED_FUSED:
+ case SPDK_NVME_SC_ABORTED_MISSING_FUSED:
+ *sc = SPDK_SCSI_STATUS_TASK_ABORTED;
+ *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_ACCESS_DENIED;
+ *ascq = SPDK_SCSI_ASCQ_INVALID_LU_IDENTIFIER;
+ break;
+ case SPDK_NVME_SC_LBA_OUT_OF_RANGE:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_NAMESPACE_NOT_READY:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_NOT_READY;
+ *asc = SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_RESERVATION_CONFLICT:
+ *sc = SPDK_SCSI_STATUS_RESERVATION_CONFLICT;
+ *sk = SPDK_SCSI_SENSE_NO_SENSE;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_COMMAND_ID_CONFLICT:
+ case SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR:
+ case SPDK_NVME_SC_INVALID_SGL_SEG_DESCRIPTOR:
+ case SPDK_NVME_SC_INVALID_NUM_SGL_DESCIRPTORS:
+ case SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID:
+ case SPDK_NVME_SC_METADATA_SGL_LENGTH_INVALID:
+ case SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID:
+ case SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF:
+ case SPDK_NVME_SC_INVALID_PRP_OFFSET:
+ case SPDK_NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED:
+ case SPDK_NVME_SC_INVALID_SGL_OFFSET:
+ case SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT:
+ case SPDK_NVME_SC_KEEP_ALIVE_EXPIRED:
+ case SPDK_NVME_SC_KEEP_ALIVE_INVALID:
+ case SPDK_NVME_SC_FORMAT_IN_PROGRESS:
+ default:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ }
+ break;
+ case SPDK_NVME_SCT_COMMAND_SPECIFIC:
+ switch (nvme_sc) {
+ case SPDK_NVME_SC_COMPLETION_QUEUE_INVALID:
+ case SPDK_NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_INVALID_FORMAT:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_FORMAT_COMMAND_FAILED;
+ *ascq = SPDK_SCSI_ASCQ_FORMAT_COMMAND_FAILED;
+ break;
+ case SPDK_NVME_SC_CONFLICTING_ATTRIBUTES:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_DATA_PROTECT;
+ *asc = SPDK_SCSI_ASC_WRITE_PROTECTED;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER:
+ case SPDK_NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED:
+ case SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED:
+ case SPDK_NVME_SC_INVALID_FIRMWARE_SLOT:
+ case SPDK_NVME_SC_INVALID_FIRMWARE_IMAGE:
+ case SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR:
+ case SPDK_NVME_SC_INVALID_LOG_PAGE:
+ case SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET:
+ case SPDK_NVME_SC_INVALID_QUEUE_DELETION:
+ case SPDK_NVME_SC_FEATURE_ID_NOT_SAVEABLE:
+ case SPDK_NVME_SC_FEATURE_NOT_CHANGEABLE:
+ case SPDK_NVME_SC_FEATURE_NOT_NAMESPACE_SPECIFIC:
+ case SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET:
+ case SPDK_NVME_SC_FIRMWARE_REQ_RESET:
+ case SPDK_NVME_SC_FIRMWARE_REQ_MAX_TIME_VIOLATION:
+ case SPDK_NVME_SC_FIRMWARE_ACTIVATION_PROHIBITED:
+ case SPDK_NVME_SC_OVERLAPPING_RANGE:
+ case SPDK_NVME_SC_NAMESPACE_INSUFFICIENT_CAPACITY:
+ case SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE:
+ case SPDK_NVME_SC_NAMESPACE_ALREADY_ATTACHED:
+ case SPDK_NVME_SC_NAMESPACE_IS_PRIVATE:
+ case SPDK_NVME_SC_NAMESPACE_NOT_ATTACHED:
+ case SPDK_NVME_SC_THINPROVISIONING_NOT_SUPPORTED:
+ case SPDK_NVME_SC_CONTROLLER_LIST_INVALID:
+ case SPDK_NVME_SC_INVALID_PROTECTION_INFO:
+ default:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ }
+ break;
+ case SPDK_NVME_SCT_MEDIA_ERROR:
+ switch (nvme_sc) {
+ case SPDK_NVME_SC_WRITE_FAULTS:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_UNRECOVERED_READ_ERROR:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_UNRECOVERED_READ_ERROR;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_GUARD_CHECK_ERROR:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+ *ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+ break;
+ case SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+ *ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+ break;
+ case SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MEDIUM_ERROR;
+ *asc = SPDK_SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+ *ascq = SPDK_SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+ break;
+ case SPDK_NVME_SC_COMPARE_FAILURE:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_MISCOMPARE;
+ *asc = SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ case SPDK_NVME_SC_ACCESS_DENIED:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_DATA_PROTECT;
+ *asc = SPDK_SCSI_ASC_ACCESS_DENIED;
+ *ascq = SPDK_SCSI_ASCQ_NO_ACCESS_RIGHTS;
+ break;
+ case SPDK_NVME_SC_DEALLOCATED_OR_UNWRITTEN_BLOCK:
+ default:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ }
+ break;
+ case SPDK_NVME_SCT_VENDOR_SPECIFIC:
+ default:
+ *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
+ *sk = SPDK_SCSI_SENSE_ILLEGAL_REQUEST;
+ *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
+ *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
+ break;
+ }
+}
diff --git a/src/spdk/lib/bdev/split/Makefile b/src/spdk/lib/bdev/split/Makefile
new file mode 100644
index 00000000..46edf89a
--- /dev/null
+++ b/src/spdk/lib/bdev/split/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = vbdev_split.c vbdev_split_rpc.c
+LIBNAME = vbdev_split
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/split/vbdev_split.c b/src/spdk/lib/bdev/split/vbdev_split.c
new file mode 100644
index 00000000..97f11984
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split.c
@@ -0,0 +1,565 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is a simple example of a virtual block device that takes a single
+ * bdev and slices it into multiple smaller bdevs.
+ */
+
+#include "vbdev_split.h"
+
+#include "spdk/rpc.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+struct spdk_vbdev_split_config {
+ char *base_bdev;
+ unsigned split_count;
+ uint64_t split_size_mb;
+
+ struct spdk_bdev_part_base *split_base;
+ bool removed;
+
+ TAILQ_ENTRY(spdk_vbdev_split_config) tailq;
+};
+
+static TAILQ_HEAD(, spdk_vbdev_split_config) g_split_config = TAILQ_HEAD_INITIALIZER(
+ g_split_config);
+static SPDK_BDEV_PART_TAILQ g_split_disks = TAILQ_HEAD_INITIALIZER(g_split_disks);
+
+struct vbdev_split_channel {
+ struct spdk_bdev_part_channel part_ch;
+};
+
+struct vbdev_split_bdev_io {
+ struct spdk_io_channel *ch;
+ struct spdk_bdev_io *bdev_io;
+
+ /* for bdev_io_wait */
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static void vbdev_split_del_config(struct spdk_vbdev_split_config *cfg);
+
+static int vbdev_split_init(void);
+static void vbdev_split_fini(void);
+static void vbdev_split_examine(struct spdk_bdev *bdev);
+static int vbdev_split_config_json(struct spdk_json_write_ctx *w);
+static int vbdev_split_get_ctx_size(void);
+
+static void
+vbdev_split_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
+
+static struct spdk_bdev_module split_if = {
+ .name = "split",
+ .module_init = vbdev_split_init,
+ .module_fini = vbdev_split_fini,
+ .get_ctx_size = vbdev_split_get_ctx_size,
+ .examine_config = vbdev_split_examine,
+ .config_json = vbdev_split_config_json,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&split_if)
+
+static void
+vbdev_split_base_free(void *ctx)
+{
+ struct spdk_vbdev_split_config *cfg = ctx;
+
+ cfg->split_base = NULL;
+ if (cfg->removed) {
+ vbdev_split_del_config(cfg);
+ }
+}
+
+static int
+vbdev_split_destruct(void *ctx)
+{
+ struct spdk_bdev_part *part = ctx;
+
+ return spdk_bdev_part_free(part);
+}
+
+static void
+vbdev_split_base_bdev_hotremove_cb(void *_base_bdev)
+{
+ spdk_bdev_part_base_hotremove(_base_bdev, &g_split_disks);
+}
+
+static void
+vbdev_split_resubmit_io(void *arg)
+{
+ struct vbdev_split_bdev_io *split_io = (struct vbdev_split_bdev_io *)arg;
+
+ vbdev_split_submit_request(split_io->ch, split_io->bdev_io);
+}
+
+static void
+vbdev_split_queue_io(struct vbdev_split_bdev_io *split_io)
+{
+ int rc;
+
+ split_io->bdev_io_wait.bdev = split_io->bdev_io->bdev;
+ split_io->bdev_io_wait.cb_fn = vbdev_split_resubmit_io;
+ split_io->bdev_io_wait.cb_arg = split_io;
+
+ rc = spdk_bdev_queue_io_wait(split_io->bdev_io->bdev,
+ split_io->ch, &split_io->bdev_io_wait);
+ if (rc != 0) {
+ SPDK_ERRLOG("Queue io failed in vbdev_split_queue_io, rc=%d\n", rc);
+ spdk_bdev_io_complete(split_io->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+vbdev_split_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_split_channel *ch = spdk_io_channel_get_ctx(_ch);
+ struct vbdev_split_bdev_io *io_ctx = (struct vbdev_split_bdev_io *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+ if (rc) {
+ if (rc == -ENOMEM) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "split: no memory, queue io.\n");
+ io_ctx->ch = _ch;
+ io_ctx->bdev_io = bdev_io;
+ vbdev_split_queue_io(io_ctx);
+ } else {
+ SPDK_ERRLOG("split: error on io submission, rc=%d.\n", rc);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static int
+vbdev_split_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct spdk_bdev_part *part = ctx;
+ struct spdk_bdev *split_base_bdev = spdk_bdev_part_get_base_bdev(part);
+ uint64_t offset_blocks = spdk_bdev_part_get_offset_blocks(part);
+
+ spdk_json_write_name(w, "split");
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "base_bdev");
+ spdk_json_write_string(w, spdk_bdev_get_name(split_base_bdev));
+ spdk_json_write_name(w, "offset_blocks");
+ spdk_json_write_uint64(w, offset_blocks);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+vbdev_split_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* No config per bdev needed */
+}
+
+static struct spdk_bdev_fn_table vbdev_split_fn_table = {
+ .destruct = vbdev_split_destruct,
+ .submit_request = vbdev_split_submit_request,
+ .dump_info_json = vbdev_split_dump_info_json,
+ .write_config_json = vbdev_split_write_config_json
+};
+
+static int
+vbdev_split_create(struct spdk_vbdev_split_config *cfg)
+{
+ uint64_t split_size_blocks, offset_blocks;
+ uint64_t split_count, max_split_count;
+ uint64_t mb = 1024 * 1024;
+ uint64_t i;
+ int rc;
+ char *name;
+ struct spdk_bdev *base_bdev;
+ struct spdk_bdev *split_base_bdev;
+ struct bdev_part_tailq *split_base_tailq;
+
+ assert(cfg->split_count > 0);
+
+ base_bdev = spdk_bdev_get_by_name(cfg->base_bdev);
+ if (!base_bdev) {
+ return -ENODEV;
+ }
+
+ if (cfg->split_size_mb) {
+ if (((cfg->split_size_mb * mb) % base_bdev->blocklen) != 0) {
+ SPDK_ERRLOG("Split size %" PRIu64 " MB is not possible with block size "
+ "%" PRIu32 "\n",
+ cfg->split_size_mb, base_bdev->blocklen);
+ return -EINVAL;
+ }
+ split_size_blocks = (cfg->split_size_mb * mb) / base_bdev->blocklen;
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "Split size %" PRIu64 " MB specified by user\n",
+ cfg->split_size_mb);
+ } else {
+ split_size_blocks = base_bdev->blockcnt / cfg->split_count;
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "Split size not specified by user\n");
+ }
+
+ max_split_count = base_bdev->blockcnt / split_size_blocks;
+ split_count = cfg->split_count;
+ if (split_count > max_split_count) {
+ SPDK_WARNLOG("Split count %" PRIu64 " is greater than maximum possible split count "
+ "%" PRIu64 " - clamping\n", split_count, max_split_count);
+ split_count = max_split_count;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_SPLIT, "base_bdev: %s split_count: %" PRIu64
+ " split_size_blocks: %" PRIu64 "\n",
+ spdk_bdev_get_name(base_bdev), split_count, split_size_blocks);
+
+ cfg->split_base = spdk_bdev_part_base_construct(base_bdev,
+ vbdev_split_base_bdev_hotremove_cb,
+ &split_if, &vbdev_split_fn_table,
+ &g_split_disks, vbdev_split_base_free, cfg,
+ sizeof(struct vbdev_split_channel), NULL, NULL);
+ if (!cfg->split_base) {
+ SPDK_ERRLOG("Cannot construct bdev part base\n");
+ return -ENOMEM;
+ }
+
+ offset_blocks = 0;
+ for (i = 0; i < split_count; i++) {
+ struct spdk_bdev_part *d;
+
+ d = calloc(1, sizeof(*d));
+ if (d == NULL) {
+ SPDK_ERRLOG("could not allocate bdev part\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ name = spdk_sprintf_alloc("%sp%" PRIu64, cfg->base_bdev, i);
+ if (!name) {
+ SPDK_ERRLOG("could not allocate name\n");
+ free(d);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = spdk_bdev_part_construct(d, cfg->split_base, name, offset_blocks, split_size_blocks,
+ "Split Disk");
+ free(name);
+ if (rc) {
+ SPDK_ERRLOG("could not construct bdev part\n");
+ /* spdk_bdev_part_construct will free name if it fails */
+ free(d);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ offset_blocks += split_size_blocks;
+ }
+
+ return 0;
+err:
+ split_base_bdev = spdk_bdev_part_base_get_bdev(cfg->split_base);
+ split_base_tailq = spdk_bdev_part_base_get_tailq(cfg->split_base);
+ cfg->removed = true;
+ spdk_bdev_part_base_hotremove(split_base_bdev, split_base_tailq);
+ return rc;
+}
+
+static void
+vbdev_split_del_config(struct spdk_vbdev_split_config *cfg)
+{
+ TAILQ_REMOVE(&g_split_config, cfg, tailq);
+ free(cfg->base_bdev);
+ free(cfg);
+}
+
+static void
+vbdev_split_destruct_config(struct spdk_vbdev_split_config *cfg)
+{
+ struct spdk_bdev *split_base_bdev;
+ struct bdev_part_tailq *split_base_tailq;
+
+ cfg->removed = true;
+ if (cfg->split_base != NULL) {
+ split_base_bdev = spdk_bdev_part_base_get_bdev(cfg->split_base);
+ split_base_tailq = spdk_bdev_part_base_get_tailq(cfg->split_base);
+ spdk_bdev_part_base_hotremove(split_base_bdev, split_base_tailq);
+ } else {
+ vbdev_split_del_config(cfg);
+ }
+}
+
+static void
+vbdev_split_clear_config(void)
+{
+ struct spdk_vbdev_split_config *cfg, *tmp_cfg;
+
+ TAILQ_FOREACH_SAFE(cfg, &g_split_config, tailq, tmp_cfg) {
+ vbdev_split_destruct_config(cfg);
+ }
+}
+
+static struct spdk_vbdev_split_config *
+vbdev_split_config_find_by_base_name(const char *base_bdev_name)
+{
+ struct spdk_vbdev_split_config *cfg;
+
+ TAILQ_FOREACH(cfg, &g_split_config, tailq) {
+ if (strcmp(cfg->base_bdev, base_bdev_name) == 0) {
+ return cfg;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+vbdev_split_add_config(const char *base_bdev_name, unsigned split_count, uint64_t split_size,
+ struct spdk_vbdev_split_config **config)
+{
+ struct spdk_vbdev_split_config *cfg;
+ assert(base_bdev_name);
+
+ if (base_bdev_name == NULL) {
+ SPDK_ERRLOG("Split bdev config: no base bdev provided.");
+ return -EINVAL;
+ }
+
+ if (split_count == 0) {
+ SPDK_ERRLOG("Split bdev config: split_count can't be 0.");
+ return -EINVAL;
+ }
+
+ /* Check if we already have 'base_bdev_name' registered in config */
+ cfg = vbdev_split_config_find_by_base_name(base_bdev_name);
+ if (cfg) {
+ SPDK_ERRLOG("Split bdev config for base bdev '%s' already exist.", base_bdev_name);
+ return -EEXIST;
+ }
+
+ cfg = calloc(1, sizeof(*cfg));
+ if (!cfg) {
+ SPDK_ERRLOG("calloc(): Out of memory");
+ return -ENOMEM;
+ }
+
+ cfg->base_bdev = strdup(base_bdev_name);
+ if (!cfg->base_bdev) {
+ SPDK_ERRLOG("strdup(): Out of memory");
+ free(cfg);
+ return -ENOMEM;
+ }
+
+ cfg->split_count = split_count;
+ cfg->split_size_mb = split_size;
+ TAILQ_INSERT_TAIL(&g_split_config, cfg, tailq);
+ if (config) {
+ *config = cfg;
+ }
+
+ return 0;
+}
+
+static int
+vbdev_split_init(void)
+{
+
+ struct spdk_conf_section *sp;
+ const char *base_bdev_name;
+ const char *split_count_str;
+ const char *split_size_str;
+ int rc, i, split_count, split_size;
+
+ sp = spdk_conf_find_section(NULL, "Split");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ for (i = 0; ; i++) {
+ if (!spdk_conf_section_get_nval(sp, "Split", i)) {
+ break;
+ }
+
+ base_bdev_name = spdk_conf_section_get_nmval(sp, "Split", i, 0);
+ if (!base_bdev_name) {
+ SPDK_ERRLOG("Split configuration missing bdev name\n");
+ rc = -EINVAL;
+ goto err;
+ }
+
+ split_count_str = spdk_conf_section_get_nmval(sp, "Split", i, 1);
+ if (!split_count_str) {
+ SPDK_ERRLOG("Split configuration missing split count\n");
+ rc = -EINVAL;
+ goto err;
+ }
+
+ split_count = atoi(split_count_str);
+ if (split_count < 1) {
+ SPDK_ERRLOG("Invalid Split count %d\n", split_count);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ /* Optional split size in MB */
+ split_size = 0;
+ split_size_str = spdk_conf_section_get_nmval(sp, "Split", i, 2);
+ if (split_size_str) {
+ split_size = atoi(split_size_str);
+ if (split_size <= 0) {
+ SPDK_ERRLOG("Invalid Split size %d\n", split_size);
+ rc = -EINVAL;
+ goto err;
+ }
+ }
+
+ rc = vbdev_split_add_config(base_bdev_name, split_count, split_size, NULL);
+ if (rc != 0) {
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ vbdev_split_clear_config();
+ return rc;
+}
+
+static void
+vbdev_split_fini(void)
+{
+ vbdev_split_clear_config();
+}
+
+static void
+vbdev_split_examine(struct spdk_bdev *bdev)
+{
+ struct spdk_vbdev_split_config *cfg = vbdev_split_config_find_by_base_name(bdev->name);
+
+ if (cfg != NULL && cfg->removed == false) {
+ assert(cfg->split_base == NULL);
+
+ if (vbdev_split_create(cfg)) {
+ SPDK_ERRLOG("could not split bdev %s\n", bdev->name);
+ }
+ }
+ spdk_bdev_module_examine_done(&split_if);
+}
+
+static int
+vbdev_split_config_json(struct spdk_json_write_ctx *w)
+{
+ struct spdk_vbdev_split_config *cfg;
+
+ TAILQ_FOREACH(cfg, &g_split_config, tailq) {
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_split_vbdev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "base_bdev", cfg->base_bdev);
+ spdk_json_write_named_uint32(w, "split_count", cfg->split_count);
+ spdk_json_write_named_uint64(w, "split_size_mb", cfg->split_size_mb);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+
+ return 0;
+}
+
+int
+create_vbdev_split(const char *base_bdev_name, unsigned split_count, uint64_t split_size_mb)
+{
+ int rc;
+ struct spdk_vbdev_split_config *cfg;
+
+ rc = vbdev_split_add_config(base_bdev_name, split_count, split_size_mb, &cfg);
+ if (rc) {
+ return rc;
+ }
+
+ rc = vbdev_split_create(cfg);
+ if (rc == -ENODEV) {
+ /* It is ok if base bdev does not exist yet. */
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int
+spdk_vbdev_split_destruct(const char *base_bdev_name)
+{
+ struct spdk_vbdev_split_config *cfg = vbdev_split_config_find_by_base_name(base_bdev_name);
+
+ if (!cfg) {
+ SPDK_ERRLOG("Split configuration for '%s' not found\n", base_bdev_name);
+ return -ENOENT;
+ }
+
+ vbdev_split_destruct_config(cfg);
+ return 0;
+}
+
+struct spdk_bdev_part_base *
+spdk_vbdev_split_get_part_base(struct spdk_bdev *bdev)
+{
+ struct spdk_vbdev_split_config *cfg;
+
+ cfg = vbdev_split_config_find_by_base_name(spdk_bdev_get_name(bdev));
+
+ if (cfg == NULL) {
+ return NULL;
+ }
+
+ return cfg->split_base;
+}
+
+/*
+ * During init we'll be asked how much memory we'd like passed to us
+ * in bev_io structures as context. Here's where we specify how
+ * much context we want per IO.
+ */
+static int
+vbdev_split_get_ctx_size(void)
+{
+ return sizeof(struct vbdev_split_bdev_io);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_split", SPDK_LOG_VBDEV_SPLIT)
diff --git a/src/spdk/lib/bdev/split/vbdev_split.h b/src/spdk/lib/bdev/split/vbdev_split.h
new file mode 100644
index 00000000..4231d443
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split.h
@@ -0,0 +1,68 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VBDEV_SPLIT_H
+#define SPDK_VBDEV_SPLIT_H
+
+#include "spdk/bdev_module.h"
+
+/**
+ * Add given disk name to split config. If bdev with \c base_bdev_name name
+ * exist the split bdevs will be created right away, if not the split bdevs will
+ * be created when base bdev became be available (during examination process).
+ *
+ * \param base_bdev_name Base bdev name
+ * \param split_count number of splits to be created.
+ * \param split_size_mb size of each bdev. If 0 use base bdev size / split_count
+ * \return value >= 0 - number of splits create. Negative errno code on error.
+ */
+int create_vbdev_split(const char *base_bdev_name, unsigned split_count, uint64_t split_size_mb);
+
+/**
+ * Remove all created split bdevs and split config.
+ *
+ * \param base_bdev_name base bdev name
+ * \return 0 on success or negative errno value.
+ */
+int spdk_vbdev_split_destruct(const char *base_bdev_name);
+
+/**
+ * Get the spdk_bdev_part_base associated with the given split base_bdev.
+ *
+ * \param base_bdev Bdev to get the part_base from
+ * \return pointer to the associated spdk_bdev_part_base
+ * \return NULL if the base_bdev is not being split by the split module
+ */
+struct spdk_bdev_part_base *spdk_vbdev_split_get_part_base(struct spdk_bdev *base_bdev);
+
+#endif // SPDK_VBDEV_SPLIT_H
diff --git a/src/spdk/lib/bdev/split/vbdev_split_rpc.c b/src/spdk/lib/bdev/split/vbdev_split_rpc.c
new file mode 100644
index 00000000..fe70346f
--- /dev/null
+++ b/src/spdk/lib/bdev/split/vbdev_split_rpc.c
@@ -0,0 +1,151 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "vbdev_split.h"
+#include "spdk_internal/log.h"
+
+struct rpc_construct_split {
+ char *base_bdev;
+ uint32_t split_count;
+ uint64_t split_size_mb;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_split_decoders[] = {
+ {"base_bdev", offsetof(struct rpc_construct_split, base_bdev), spdk_json_decode_string},
+ {"split_count", offsetof(struct rpc_construct_split, split_count), spdk_json_decode_uint32},
+ {"split_size_mb", offsetof(struct rpc_construct_split, split_size_mb), spdk_json_decode_uint64, true},
+};
+
+static void
+spdk_rpc_construct_split_vbdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_split req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *base_bdev;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_construct_split_decoders,
+ SPDK_COUNTOF(rpc_construct_split_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = create_vbdev_split(req.base_bdev, req.split_count, req.split_size_mb);
+ if (rc < 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Failed to create %"PRIu32" split bdevs from '%s': %s",
+ req.split_count, req.base_bdev, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ goto out;
+ }
+
+ spdk_json_write_array_begin(w);
+
+ base_bdev = spdk_bdev_get_by_name(req.base_bdev);
+ if (base_bdev != NULL) {
+ struct spdk_bdev_part_base *split_base;
+ struct bdev_part_tailq *split_base_tailq;
+ struct spdk_bdev_part *split_part;
+ struct spdk_bdev *split_bdev;
+
+ split_base = spdk_vbdev_split_get_part_base(base_bdev);
+
+ assert(split_base != NULL);
+
+ split_base_tailq = spdk_bdev_part_base_get_tailq(split_base);
+ TAILQ_FOREACH(split_part, split_base_tailq, tailq) {
+ split_bdev = spdk_bdev_part_get_bdev(split_part);
+ spdk_json_write_string(w, spdk_bdev_get_name(split_bdev));
+ }
+ }
+
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free(req.base_bdev);
+}
+SPDK_RPC_REGISTER("construct_split_vbdev", spdk_rpc_construct_split_vbdev, SPDK_RPC_RUNTIME)
+
+struct rpc_destruct_split {
+ char *base_bdev;
+};
+
+static const struct spdk_json_object_decoder rpc_destruct_split_decoders[] = {
+ {"base_bdev", offsetof(struct rpc_destruct_split, base_bdev), spdk_json_decode_string},
+};
+
+static void
+spdk_rpc_destruct_split(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_destruct_split req = {};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_destruct_split_decoders,
+ SPDK_COUNTOF(rpc_destruct_split_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = spdk_vbdev_split_destruct(req.base_bdev);
+ if (rc < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ goto out;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+out:
+ free(req.base_bdev);
+}
+SPDK_RPC_REGISTER("destruct_split_vbdev", spdk_rpc_destruct_split, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/lib/bdev/virtio/Makefile b/src/spdk/lib/bdev/virtio/Makefile
new file mode 100644
index 00000000..fabe2b9f
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/Makefile
@@ -0,0 +1,40 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+C_SRCS = bdev_virtio_scsi.c bdev_virtio_blk.c bdev_virtio_rpc.c
+LIBNAME = bdev_virtio
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio.h b/src/spdk/lib/bdev/virtio/bdev_virtio.h
new file mode 100644
index 00000000..538fab8f
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio.h
@@ -0,0 +1,164 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_VIRTIO_H
+#define SPDK_BDEV_VIRTIO_H
+
+#include "spdk/bdev.h"
+#include "spdk/env.h"
+
+/**
+ * Callback for creating virtio bdevs.
+ *
+ * \param ctx opaque context set by the user
+ * \param errnum error code. 0 on success, negative errno on error.
+ * \param bdevs contiguous array of created bdevs
+ * \param bdev_cnt number of bdevs in the `bdevs` array
+ */
+typedef void (*bdev_virtio_create_cb)(void *ctx, int errnum,
+ struct spdk_bdev **bdevs, size_t bdev_cnt);
+
+/**
+ * Callback for removing virtio devices.
+ *
+ * \param ctx opaque context set by the user
+ * \param errnum error code. 0 on success, negative errno on error.
+ */
+typedef void (*bdev_virtio_remove_cb)(void *ctx, int errnum);
+
+/**
+ * Connect to a vhost-user Unix domain socket and create a Virtio SCSI device.
+ * If the connection is successful, the device will be automatically scanned.
+ * The scan consists of probing the targets on the device and will result in
+ * creating possibly multiple Virtio SCSI bdevs - one for each target. Currently
+ * only one LUN per target is detected - LUN0. Note that the bdev creation is
+ * run asynchronously in the background. After it's finished, the `cb_fn`
+ * callback is called.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param path path to the socket
+ * \param num_queues max number of request virtqueues to use. `vdev` will be
+ * started successfully even if the host device supports less queues than requested.
+ * \param queue_size depth of each queue
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb.
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success (device scan is started) or negative error code.
+ * In case of error the \c cb_fn is not called.
+ */
+int bdev_virtio_user_scsi_dev_create(const char *name, const char *path,
+ unsigned num_queues, unsigned queue_size,
+ bdev_virtio_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Attach virtio-pci device. This creates a Virtio SCSI device with the same
+ * capabilities as the vhost-user equivalent. The device will be automatically
+ * scanned for exposed SCSI targets. This will result in creating possibly multiple
+ * Virtio SCSI bdevs - one for each target. Currently only one LUN per target is
+ * detected - LUN0. Note that the bdev creation is run asynchronously in the
+ * background. After it's finished, the `cb_fn` callback is called.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param pci_addr PCI address of the device to attach
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb.
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success (device scan is started) or negative error code.
+ * In case of error the \c cb_fn is not called.
+ */
+int bdev_virtio_pci_scsi_dev_create(const char *name, struct spdk_pci_addr *pci_addr,
+ bdev_virtio_create_cb cb_fn, void *cb_arg);
+
+/**
+ * Remove a Virtio device with given name. This will destroy all bdevs exposed
+ * by this device.
+ *
+ * \param name virtio device name
+ * \param cb_fn function to be called after scanning all targets on the virtio
+ * device. It's optional, can be NULL. See \c bdev_virtio_create_cb. Possible
+ * error codes are:
+ * * ENODEV - couldn't find device with given name
+ * * EBUSY - device is already being removed
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success or -ENODEV if scsi dev does not exist
+ */
+int bdev_virtio_scsi_dev_remove(const char *name,
+ bdev_virtio_remove_cb cb_fn, void *cb_arg);
+
+/**
+ * Remove a Virtio device with given name.
+ *
+ * \param bdev virtio blk device bdev
+ * \param cb_fn function to be called after removing bdev
+ * \param cb_arg argument for the `cb_fn`
+ * \return zero on success, -ENODEV if bdev with 'name' does not exist or
+ * -EINVAL if bdev with 'name' is not a virtio blk device.
+ */
+int bdev_virtio_blk_dev_remove(const char *name,
+ bdev_virtio_remove_cb cb_fn, void *cb_arg);
+
+/**
+ * List all created Virtio-SCSI devices.
+ *
+ * \param write_ctx JSON context to write into
+ */
+void bdev_virtio_scsi_dev_list(struct spdk_json_write_ctx *write_ctx);
+
+/**
+ * Connect to a vhost-user Unix domain socket and create a Virtio BLK bdev.
+ *
+ * \param name name for the virtio bdev
+ * \param path path to the socket
+ * \param num_queues max number of request virtqueues to use. `vdev` will be
+ * started successfully even if the host device supports less queues than requested.
+ * \param queue_size depth of each queue
+ * \return virtio-blk bdev or NULL
+ */
+struct spdk_bdev *bdev_virtio_user_blk_dev_create(const char *name, const char *path,
+ unsigned num_queues, unsigned queue_size);
+
+/**
+ * Attach virtio-pci device. This creates a Virtio BLK device with the same
+ * capabilities as the vhost-user equivalent.
+ *
+ * \param name name for the virtio device. It will be inherited by all created
+ * bdevs, which are named in the following format: <name>t<target_id>
+ * \param pci_addr PCI address of the device to attach
+ * \return virtio-blk bdev or NULL
+ */
+struct spdk_bdev *bdev_virtio_pci_blk_dev_create(const char *name,
+ struct spdk_pci_addr *pci_addr);
+
+#endif /* SPDK_BDEV_VIRTIO_H */
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c b/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c
new file mode 100644
index 00000000..598f7f15
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_blk.c
@@ -0,0 +1,707 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+
+#include "spdk_internal/assert.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/virtio.h"
+
+#include <linux/virtio_blk.h>
+
+#include "bdev_virtio.h"
+
+struct virtio_blk_dev {
+ struct virtio_dev vdev;
+ struct spdk_bdev bdev;
+ bool readonly;
+};
+
+struct virtio_blk_io_ctx {
+ struct iovec iov_req;
+ struct iovec iov_resp;
+ struct virtio_blk_outhdr req;
+ uint8_t resp;
+};
+
+struct bdev_virtio_blk_io_channel {
+ struct virtio_dev *vdev;
+
+ /** Virtqueue exclusively assigned to this channel. */
+ struct virtqueue *vq;
+
+ /** Virtio response poller. */
+ struct spdk_poller *poller;
+};
+
+/* Features desired/implemented by this driver. */
+#define VIRTIO_BLK_DEV_SUPPORTED_FEATURES \
+ (1ULL << VIRTIO_BLK_F_BLK_SIZE | \
+ 1ULL << VIRTIO_BLK_F_TOPOLOGY | \
+ 1ULL << VIRTIO_BLK_F_MQ | \
+ 1ULL << VIRTIO_BLK_F_RO | \
+ 1ULL << VIRTIO_RING_F_EVENT_IDX | \
+ 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+static int bdev_virtio_initialize(void);
+static int bdev_virtio_blk_get_ctx_size(void);
+
+static struct spdk_bdev_module virtio_blk_if = {
+ .name = "virtio_blk",
+ .module_init = bdev_virtio_initialize,
+ .get_ctx_size = bdev_virtio_blk_get_ctx_size,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&virtio_blk_if)
+
+static int bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf);
+static void bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf);
+
+static struct virtio_blk_io_ctx *
+bdev_virtio_blk_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_blk_outhdr *req;
+ uint8_t *resp;
+ struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+
+ req = &io_ctx->req;
+ resp = &io_ctx->resp;
+
+ io_ctx->iov_req.iov_base = req;
+ io_ctx->iov_req.iov_len = sizeof(*req);
+
+ io_ctx->iov_resp.iov_base = resp;
+ io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+ memset(req, 0, sizeof(*req));
+ return io_ctx;
+}
+
+static void
+bdev_virtio_blk_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct bdev_virtio_blk_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
+ struct virtqueue *vq = virtio_channel->vq;
+ struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ return;
+ } else if (rc != 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+ virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ bdev_io->type == SPDK_BDEV_IO_TYPE_READ ?
+ SPDK_VIRTIO_DESC_WR : SPDK_VIRTIO_DESC_RO);
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+
+ virtqueue_req_flush(vq);
+}
+
+static void
+bdev_virtio_rw(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_blk_io_ctx *io_ctx = bdev_virtio_blk_init_io_vreq(ch, bdev_io);
+ struct virtio_blk_outhdr *req = &io_ctx->req;
+
+ req->type = bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE ?
+ VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
+
+ req->sector = bdev_io->u.bdev.offset_blocks *
+ spdk_bdev_get_block_size(bdev_io->bdev) / 512;
+
+ bdev_virtio_blk_send_io(ch, bdev_io);
+}
+
+static int
+_bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_blk_dev *bvdev = bdev_io->bdev->ctxt;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_virtio_rw,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ if (bvdev->readonly) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ } else {
+ bdev_virtio_rw(ch, bdev_io);
+ }
+ return 0;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ default:
+ return -1;
+ }
+
+ SPDK_UNREACHABLE();
+}
+
+static void
+bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct virtio_blk_dev *bvdev = ctx;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ return !bvdev->readonly;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_virtio_get_io_channel(void *ctx)
+{
+ struct virtio_blk_dev *bvdev = ctx;
+
+ return spdk_get_io_channel(bvdev);
+}
+
+static void
+virtio_blk_dev_unregister_cb(void *io_device)
+{
+ struct virtio_blk_dev *bvdev = io_device;
+ struct virtio_dev *vdev = &bvdev->vdev;
+
+ virtio_dev_stop(vdev);
+ virtio_dev_destruct(vdev);
+ spdk_bdev_destruct_done(&bvdev->bdev, 0);
+ free(bvdev);
+}
+
+static int
+bdev_virtio_disk_destruct(void *ctx)
+{
+ struct virtio_blk_dev *bvdev = ctx;
+
+ spdk_io_device_unregister(bvdev, virtio_blk_dev_unregister_cb);
+ return 1;
+}
+
+int
+bdev_virtio_blk_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+ struct spdk_bdev *bdev;
+
+ bdev = spdk_bdev_get_by_name(name);
+ if (bdev == NULL) {
+ return -ENODEV;
+ }
+
+ if (bdev->module != &virtio_blk_if) {
+ return -ENODEV;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+
+ return 0;
+}
+
+static int
+bdev_virtio_dump_json_config(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct virtio_blk_dev *bvdev = ctx;
+
+ virtio_dev_dump_json_info(&bvdev->vdev, w);
+ return 0;
+}
+
+static void
+bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct virtio_blk_dev *bvdev = bdev->ctxt;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_virtio_dev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bvdev->vdev.name);
+ spdk_json_write_named_string(w, "dev_type", "blk");
+
+ /* Write transport specific parameters. */
+ bvdev->vdev.backend_ops->write_json_config(&bvdev->vdev, w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table virtio_fn_table = {
+ .destruct = bdev_virtio_disk_destruct,
+ .submit_request = bdev_virtio_submit_request,
+ .io_type_supported = bdev_virtio_io_type_supported,
+ .get_io_channel = bdev_virtio_get_io_channel,
+ .dump_info_json = bdev_virtio_dump_json_config,
+ .write_config_json = bdev_virtio_write_config_json,
+};
+
+static void
+bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_blk_io_ctx *io_ctx = (struct virtio_blk_io_ctx *)bdev_io->driver_ctx;
+
+ spdk_bdev_io_complete(bdev_io, io_ctx->resp == VIRTIO_BLK_S_OK ?
+ SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static int
+bdev_virtio_poll(void *arg)
+{
+ struct bdev_virtio_blk_io_channel *ch = arg;
+ void *io[32];
+ uint32_t io_len[32];
+ uint16_t i, cnt;
+
+ cnt = virtio_recv_pkts(ch->vq, io, io_len, SPDK_COUNTOF(io));
+ for (i = 0; i < cnt; ++i) {
+ bdev_virtio_io_cpl(io[i]);
+ }
+
+ return cnt;
+}
+
+static int
+bdev_virtio_blk_ch_create_cb(void *io_device, void *ctx_buf)
+{
+ struct virtio_blk_dev *bvdev = io_device;
+ struct virtio_dev *vdev = &bvdev->vdev;
+ struct bdev_virtio_blk_io_channel *ch = ctx_buf;
+ struct virtqueue *vq;
+ int32_t queue_idx;
+
+ queue_idx = virtio_dev_find_and_acquire_queue(vdev, 0);
+ if (queue_idx < 0) {
+ SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
+ return -1;
+ }
+
+ vq = vdev->vqs[queue_idx];
+
+ ch->vdev = vdev;
+ ch->vq = vq;
+
+ ch->poller = spdk_poller_register(bdev_virtio_poll, ch, 0);
+ return 0;
+}
+
+static void
+bdev_virtio_blk_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct virtio_blk_dev *bvdev = io_device;
+ struct virtio_dev *vdev = &bvdev->vdev;
+ struct bdev_virtio_blk_io_channel *ch = ctx_buf;
+ struct virtqueue *vq = ch->vq;
+
+ spdk_poller_unregister(&ch->poller);
+ virtio_dev_release_queue(vdev, vq->vq_queue_index);
+}
+
+static int
+virtio_blk_dev_init(struct virtio_blk_dev *bvdev, uint16_t max_queues)
+{
+ struct virtio_dev *vdev = &bvdev->vdev;
+ struct spdk_bdev *bdev = &bvdev->bdev;
+ uint64_t capacity, num_blocks;
+ uint32_t block_size;
+ uint16_t host_max_queues;
+ int rc;
+
+ if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) {
+ rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, blk_size),
+ &block_size, sizeof(block_size));
+ if (rc) {
+ SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+ return rc;
+ }
+
+ if (block_size == 0 || block_size % 512 != 0) {
+ SPDK_ERRLOG("%s: invalid block size (%"PRIu32"). Must be "
+ "a multiple of 512.\n", vdev->name, block_size);
+ return -EIO;
+ }
+ } else {
+ block_size = 512;
+ }
+
+ rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, capacity),
+ &capacity, sizeof(capacity));
+ if (rc) {
+ SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+ return rc;
+ }
+
+ /* `capacity` is a number of 512-byte sectors. */
+ num_blocks = capacity * 512 / block_size;
+ if (num_blocks == 0) {
+ SPDK_ERRLOG("%s: size too small (size: %"PRIu64", blocksize: %"PRIu32").\n",
+ vdev->name, capacity * 512, block_size);
+ return -EIO;
+ }
+
+ if ((capacity * 512) % block_size != 0) {
+ SPDK_WARNLOG("%s: size has been rounded down to the nearest block size boundary. "
+ "(block size: %"PRIu32", previous size: %"PRIu64", new size: %"PRIu64")\n",
+ vdev->name, block_size, capacity * 512, num_blocks * block_size);
+ }
+
+ if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
+ rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
+ &host_max_queues, sizeof(host_max_queues));
+ if (rc) {
+ SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+ return rc;
+ }
+ } else {
+ host_max_queues = 1;
+ }
+
+ if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_RO)) {
+ bvdev->readonly = true;
+ }
+
+ if (max_queues == 0) {
+ SPDK_ERRLOG("%s: requested 0 request queues (%"PRIu16" available).\n",
+ vdev->name, host_max_queues);
+ return -EINVAL;
+ }
+
+ if (max_queues > host_max_queues) {
+ SPDK_WARNLOG("%s: requested %"PRIu16" request queues "
+ "but only %"PRIu16" available.\n",
+ vdev->name, max_queues, host_max_queues);
+ max_queues = host_max_queues;
+ }
+
+ /* bdev is tied with the virtio device; we can reuse the name */
+ bdev->name = vdev->name;
+ rc = virtio_dev_start(vdev, max_queues, 0);
+ if (rc != 0) {
+ return rc;
+ }
+
+ bdev->product_name = "VirtioBlk Disk";
+ bdev->write_cache = 0;
+ bdev->blocklen = block_size;
+ bdev->blockcnt = num_blocks;
+
+ bdev->ctxt = bvdev;
+ bdev->fn_table = &virtio_fn_table;
+ bdev->module = &virtio_blk_if;
+
+ spdk_io_device_register(bvdev, bdev_virtio_blk_ch_create_cb,
+ bdev_virtio_blk_ch_destroy_cb,
+ sizeof(struct bdev_virtio_blk_io_channel),
+ vdev->name);
+
+ rc = spdk_bdev_register(bdev);
+ if (rc) {
+ SPDK_ERRLOG("Failed to register bdev name=%s\n", bdev->name);
+ spdk_io_device_unregister(bvdev, NULL);
+ virtio_dev_stop(vdev);
+ return rc;
+ }
+
+ return 0;
+}
+
+static struct virtio_blk_dev *
+virtio_pci_blk_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
+{
+ static int pci_dev_counter = 0;
+ struct virtio_blk_dev *bvdev;
+ struct virtio_dev *vdev;
+ char *default_name = NULL;
+ uint16_t num_queues;
+ int rc;
+
+ bvdev = calloc(1, sizeof(*bvdev));
+ if (bvdev == NULL) {
+ SPDK_ERRLOG("virtio device calloc failed\n");
+ return NULL;
+ }
+ vdev = &bvdev->vdev;
+
+ if (name == NULL) {
+ default_name = spdk_sprintf_alloc("VirtioBlk%"PRIu32, pci_dev_counter++);
+ if (default_name == NULL) {
+ free(vdev);
+ return NULL;
+ }
+ name = default_name;
+ }
+
+ rc = virtio_pci_dev_init(vdev, name, pci_ctx);
+ free(default_name);
+
+ if (rc != 0) {
+ free(bvdev);
+ return NULL;
+ }
+
+ rc = virtio_dev_reset(vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
+ if (rc != 0) {
+ virtio_dev_destruct(vdev);
+ free(bvdev);
+ return NULL;
+ }
+
+ /* TODO: add a way to limit usable virtqueues */
+ if (virtio_dev_has_feature(vdev, VIRTIO_BLK_F_MQ)) {
+ rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_blk_config, num_queues),
+ &num_queues, sizeof(num_queues));
+ if (rc) {
+ SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+ virtio_dev_destruct(vdev);
+ free(bvdev);
+ return NULL;
+ }
+ } else {
+ num_queues = 1;
+ }
+
+ rc = virtio_blk_dev_init(bvdev, num_queues);
+ if (rc != 0) {
+ virtio_dev_destruct(vdev);
+ free(bvdev);
+ return NULL;
+ }
+
+ return bvdev;
+}
+
+static struct virtio_blk_dev *
+virtio_user_blk_dev_create(const char *name, const char *path,
+ uint16_t num_queues, uint32_t queue_size)
+{
+ struct virtio_blk_dev *bvdev;
+ int rc;
+
+ bvdev = calloc(1, sizeof(*bvdev));
+ if (bvdev == NULL) {
+ SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
+ return NULL;
+ }
+
+ rc = virtio_user_dev_init(&bvdev->vdev, name, path, queue_size);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
+ free(bvdev);
+ return NULL;
+ }
+
+ rc = virtio_dev_reset(&bvdev->vdev, VIRTIO_BLK_DEV_SUPPORTED_FEATURES);
+ if (rc != 0) {
+ virtio_dev_destruct(&bvdev->vdev);
+ free(bvdev);
+ return NULL;
+ }
+
+ rc = virtio_blk_dev_init(bvdev, num_queues);
+ if (rc != 0) {
+ virtio_dev_destruct(&bvdev->vdev);
+ free(bvdev);
+ return NULL;
+ }
+
+ return bvdev;
+}
+
+struct bdev_virtio_pci_dev_create_ctx {
+ const char *name;
+ struct virtio_blk_dev *ret;
+};
+
+static int
+bdev_virtio_pci_blk_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+ struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
+
+ create_ctx->ret = virtio_pci_blk_dev_create(create_ctx->name, pci_ctx);
+ if (create_ctx->ret == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+struct spdk_bdev *
+bdev_virtio_pci_blk_dev_create(const char *name, struct spdk_pci_addr *pci_addr)
+{
+ struct bdev_virtio_pci_dev_create_ctx create_ctx;
+
+ create_ctx.name = name;
+ create_ctx.ret = NULL;
+
+ virtio_pci_dev_attach(bdev_virtio_pci_blk_dev_create_cb, &create_ctx,
+ PCI_DEVICE_ID_VIRTIO_BLK_MODERN, pci_addr);
+
+ if (create_ctx.ret == NULL) {
+ return NULL;
+ }
+
+ return &create_ctx.ret->bdev;
+}
+
+static int
+virtio_pci_blk_dev_enumerate_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+ struct virtio_blk_dev *bvdev;
+
+ bvdev = virtio_pci_blk_dev_create(NULL, pci_ctx);
+ return bvdev == NULL ? -1 : 0;
+}
+
+static int
+bdev_virtio_initialize(void)
+{
+ struct spdk_conf_section *sp;
+ struct virtio_blk_dev *bvdev;
+ char *default_name = NULL;
+ char *path, *type, *name;
+ unsigned vdev_num;
+ int num_queues;
+ bool enable_pci;
+ int rc = 0;
+
+ for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+ if (!spdk_conf_section_match_prefix(sp, "VirtioUser")) {
+ continue;
+ }
+
+ if (sscanf(spdk_conf_section_get_name(sp), "VirtioUser%u", &vdev_num) != 1) {
+ SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+ spdk_conf_section_get_name(sp));
+ return -1;
+ }
+
+ path = spdk_conf_section_get_val(sp, "Path");
+ if (path == NULL) {
+ SPDK_ERRLOG("VirtioUserBlk%u: missing Path\n", vdev_num);
+ return -1;
+ }
+
+ type = spdk_conf_section_get_val(sp, "Type");
+ if (type == NULL || strcmp(type, "Blk") != 0) {
+ continue;
+ }
+
+ num_queues = spdk_conf_section_get_intval(sp, "Queues");
+ if (num_queues < 1) {
+ num_queues = 1;
+ }
+
+ name = spdk_conf_section_get_val(sp, "Name");
+ if (name == NULL) {
+ default_name = spdk_sprintf_alloc("VirtioBlk%u", vdev_num);
+ name = default_name;
+ }
+
+ bvdev = virtio_user_blk_dev_create(name, path, num_queues, 512);
+ free(default_name);
+ default_name = NULL;
+
+ if (bvdev == NULL) {
+ return -1;
+ }
+ }
+
+ sp = spdk_conf_find_section(NULL, "VirtioPci");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ enable_pci = spdk_conf_section_get_boolval(sp, "Enable", false);
+ if (enable_pci) {
+ rc = virtio_pci_dev_enumerate(virtio_pci_blk_dev_enumerate_cb, NULL,
+ PCI_DEVICE_ID_VIRTIO_BLK_MODERN);
+ }
+
+ return rc;
+}
+
+struct spdk_bdev *
+bdev_virtio_user_blk_dev_create(const char *name, const char *path,
+ unsigned num_queues, unsigned queue_size)
+{
+ struct virtio_blk_dev *bvdev;
+
+ bvdev = virtio_user_blk_dev_create(name, path, num_queues, queue_size);
+ if (bvdev == NULL) {
+ return NULL;
+ }
+
+ return &bvdev->bdev;
+}
+
+static int
+bdev_virtio_blk_get_ctx_size(void)
+{
+ return sizeof(struct virtio_blk_io_ctx);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio_blk", SPDK_LOG_VIRTIO_BLK)
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c b/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c
new file mode 100644
index 00000000..e96fb42a
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_rpc.c
@@ -0,0 +1,613 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_virtio.h"
+
+#define SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT 1
+#define SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE 512
+
+struct rpc_construct_virtio_scsi_dev {
+ char *path;
+ char *pci_address;
+ char *name;
+ uint32_t vq_count;
+ uint32_t vq_size;
+ struct spdk_jsonrpc_request *request;
+
+};
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_user_scsi_dev[] = {
+ {"path", offsetof(struct rpc_construct_virtio_scsi_dev, path), spdk_json_decode_string },
+ {"name", offsetof(struct rpc_construct_virtio_scsi_dev, name), spdk_json_decode_string },
+ {"vq_count", offsetof(struct rpc_construct_virtio_scsi_dev, vq_size), spdk_json_decode_uint32, true },
+ {"vq_size", offsetof(struct rpc_construct_virtio_scsi_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+free_rpc_construct_virtio_scsi_dev(struct rpc_construct_virtio_scsi_dev *req)
+{
+ if (!req) {
+ return;
+ }
+
+ free(req->path);
+ free(req->pci_address);
+ free(req->name);
+ free(req);
+}
+
+static void
+rpc_construct_virtio_scsi_dev_cb(void *ctx, int result, struct spdk_bdev **bdevs, size_t cnt)
+{
+ struct rpc_construct_virtio_scsi_dev *req = ctx;
+ struct spdk_json_write_ctx *w;
+ size_t i;
+
+ if (result) {
+ spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-result));
+ free_rpc_construct_virtio_scsi_dev(req);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(req->request);
+ if (w) {
+ spdk_json_write_array_begin(w);
+
+ for (i = 0; i < cnt; i++) {
+ spdk_json_write_string(w, spdk_bdev_get_name(bdevs[i]));
+ }
+
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(req->request, w);
+ }
+
+ free_rpc_construct_virtio_scsi_dev(ctx);
+}
+
+static void
+spdk_rpc_create_virtio_user_scsi_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_virtio_scsi_dev *req;
+ int rc;
+
+ SPDK_WARNLOG("construct_virtio_user_scsi_bdev command has been deprecated and will be removed "
+ "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ rc = -ENOMEM;
+ goto invalid;
+ }
+
+ req->pci_address = NULL;
+ req->vq_count = SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT;
+ req->vq_size = SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE;
+
+ if (spdk_json_decode_object(params, rpc_construct_virtio_user_scsi_dev,
+ SPDK_COUNTOF(rpc_construct_virtio_user_scsi_dev),
+ req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ req->request = request;
+ rc = bdev_virtio_user_scsi_dev_create(req->name, req->path, req->vq_count, req->vq_size,
+ rpc_construct_virtio_scsi_dev_cb, req);
+ if (rc < 0) {
+ goto invalid;
+ }
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_construct_virtio_scsi_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_user_scsi_bdev", spdk_rpc_create_virtio_user_scsi_bdev,
+ SPDK_RPC_RUNTIME);
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_pci_scsi_dev[] = {
+ {"pci_address", offsetof(struct rpc_construct_virtio_scsi_dev, pci_address), spdk_json_decode_string },
+ {"name", offsetof(struct rpc_construct_virtio_scsi_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_construct_virtio_pci_scsi_dev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_virtio_scsi_dev *req;
+ struct spdk_pci_addr pci_addr;
+ int rc;
+
+ SPDK_WARNLOG("construct_virtio_pci_scsi_bdev command has been deprecated and will be removed "
+ "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ rc = -ENOMEM;
+ goto invalid;
+ }
+
+ req->path = NULL;
+
+ if (spdk_json_decode_object(params, rpc_construct_virtio_pci_scsi_dev,
+ SPDK_COUNTOF(rpc_construct_virtio_pci_scsi_dev),
+ req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ if (spdk_pci_addr_parse(&pci_addr, req->pci_address) != 0) {
+ SPDK_ERRLOG("Invalid PCI address '%s'\n", req->pci_address);
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ req->request = request;
+ rc = bdev_virtio_pci_scsi_dev_create(req->name, &pci_addr,
+ rpc_construct_virtio_scsi_dev_cb, req);
+ if (rc < 0) {
+ goto invalid;
+ }
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free_rpc_construct_virtio_scsi_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_pci_scsi_bdev", spdk_rpc_construct_virtio_pci_scsi_dev,
+ SPDK_RPC_RUNTIME);
+
+struct rpc_remove_virtio_dev {
+ char *name;
+};
+
+static const struct spdk_json_object_decoder rpc_remove_virtio_dev[] = {
+ {"name", offsetof(struct rpc_remove_virtio_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_remove_virtio_scsi_bdev_cb(void *ctx, int errnum)
+{
+ struct spdk_jsonrpc_request *request = ctx;
+ struct spdk_json_write_ctx *w;
+
+ if (errnum != 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-errnum));
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_remove_virtio_scsi_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_remove_virtio_dev req = {NULL};
+ int rc;
+
+ SPDK_WARNLOG("remove_virtio_scsi_bdev command has been deprecated and will be removed "
+ "in the subsequent release. Please use remove_virtio_bdev instead.\n");
+
+ if (spdk_json_decode_object(params, rpc_remove_virtio_dev,
+ SPDK_COUNTOF(rpc_remove_virtio_dev),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = bdev_virtio_scsi_dev_remove(req.name, spdk_rpc_remove_virtio_scsi_bdev_cb, request);
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ free(req.name);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free(req.name);
+}
+SPDK_RPC_REGISTER("remove_virtio_scsi_bdev", spdk_rpc_remove_virtio_scsi_bdev, SPDK_RPC_RUNTIME);
+
+static void
+spdk_rpc_remove_virtio_bdev_cb(void *ctx, int errnum)
+{
+ struct spdk_jsonrpc_request *request = ctx;
+ struct spdk_json_write_ctx *w;
+
+ if (errnum != 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-errnum));
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+spdk_rpc_remove_virtio_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_remove_virtio_dev req = {NULL};
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_remove_virtio_dev,
+ SPDK_COUNTOF(rpc_remove_virtio_dev),
+ &req)) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = bdev_virtio_blk_dev_remove(req.name, spdk_rpc_remove_virtio_bdev_cb, request);
+ if (rc == -ENODEV) {
+ rc = bdev_virtio_scsi_dev_remove(req.name, spdk_rpc_remove_virtio_bdev_cb, request);
+ }
+
+ if (rc != 0) {
+ goto invalid;
+ }
+
+ free(req.name);
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+ free(req.name);
+}
+SPDK_RPC_REGISTER("remove_virtio_bdev", spdk_rpc_remove_virtio_bdev, SPDK_RPC_RUNTIME);
+
+static void
+spdk_rpc_get_virtio_scsi_devs(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct spdk_json_write_ctx *w;
+
+ if (params != NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "get_virtio_scsi_devs requires no parameters");
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ bdev_virtio_scsi_dev_list(w);
+ spdk_jsonrpc_end_result(request, w);
+}
+SPDK_RPC_REGISTER("get_virtio_scsi_devs", spdk_rpc_get_virtio_scsi_devs, SPDK_RPC_RUNTIME)
+
+struct rpc_construct_virtio_blk_dev {
+ char *path;
+ char *pci_address;
+ char *name;
+ uint32_t vq_count;
+ uint32_t vq_size;
+};
+
+static void
+free_rpc_construct_virtio_blk_dev(struct rpc_construct_virtio_blk_dev *req)
+{
+ free(req->path);
+ free(req->pci_address);
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_user_blk_dev[] = {
+ {"path", offsetof(struct rpc_construct_virtio_blk_dev, path), spdk_json_decode_string },
+ {"name", offsetof(struct rpc_construct_virtio_blk_dev, name), spdk_json_decode_string },
+ {"vq_count", offsetof(struct rpc_construct_virtio_blk_dev, vq_count), spdk_json_decode_uint32, true },
+ {"vq_size", offsetof(struct rpc_construct_virtio_blk_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+spdk_rpc_create_virtio_user_blk_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_virtio_blk_dev req = {0};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+ int rc;
+
+ req.pci_address = NULL;
+ req.vq_count = SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT;
+ req.vq_size = SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE;
+
+ SPDK_WARNLOG("construct_virtio_user_blk_bdev command has been deprecated and will be removed "
+ "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+ if (spdk_json_decode_object(params, rpc_construct_virtio_user_blk_dev,
+ SPDK_COUNTOF(rpc_construct_virtio_user_blk_dev),
+ &req)) {
+ free_rpc_construct_virtio_blk_dev(&req);
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = bdev_virtio_user_blk_dev_create(req.name, req.path, req.vq_count, req.vq_size);
+ free_rpc_construct_virtio_blk_dev(&req);
+ if (bdev == NULL) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_virtio_user_blk_bdev", spdk_rpc_create_virtio_user_blk_bdev,
+ SPDK_RPC_RUNTIME);
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_pci_blk_dev[] = {
+ {"pci_address", offsetof(struct rpc_construct_virtio_blk_dev, pci_address), spdk_json_decode_string },
+ {"name", offsetof(struct rpc_construct_virtio_blk_dev, name), spdk_json_decode_string },
+};
+
+static void
+spdk_rpc_create_virtio_pci_blk_bdev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_virtio_blk_dev req = {0};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+ struct spdk_pci_addr pci_addr;
+ int rc;
+
+ req.pci_address = NULL;
+
+ SPDK_WARNLOG("construct_virtio_pci_blk_bdev command has been deprecated and will be removed "
+ "in the subsequent release. Please use construct_virtio_dev instead.\n");
+
+ if (spdk_json_decode_object(params, rpc_construct_virtio_pci_blk_dev,
+ SPDK_COUNTOF(rpc_construct_virtio_pci_blk_dev),
+ &req)) {
+ free_rpc_construct_virtio_blk_dev(&req);
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ if (spdk_pci_addr_parse(&pci_addr, req.pci_address) != 0) {
+ SPDK_ERRLOG("Invalid PCI address '%s'\n", req.pci_address);
+ free_rpc_construct_virtio_blk_dev(&req);
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ bdev = bdev_virtio_pci_blk_dev_create(req.name, &pci_addr);
+ free_rpc_construct_virtio_blk_dev(&req);
+ if (bdev == NULL) {
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ if (w == NULL) {
+ return;
+ }
+
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("construct_virtio_pci_blk_bdev", spdk_rpc_create_virtio_pci_blk_bdev,
+ SPDK_RPC_RUNTIME);
+
+struct rpc_construct_virtio_dev {
+ char *name;
+ char *trtype;
+ char *traddr;
+ char *dev_type;
+ uint32_t vq_count;
+ uint32_t vq_size;
+ struct spdk_jsonrpc_request *request;
+};
+
+static const struct spdk_json_object_decoder rpc_construct_virtio_dev[] = {
+ {"name", offsetof(struct rpc_construct_virtio_dev, name), spdk_json_decode_string },
+ {"trtype", offsetof(struct rpc_construct_virtio_dev, trtype), spdk_json_decode_string },
+ {"traddr", offsetof(struct rpc_construct_virtio_dev, traddr), spdk_json_decode_string },
+ {"dev_type", offsetof(struct rpc_construct_virtio_dev, dev_type), spdk_json_decode_string },
+ {"vq_count", offsetof(struct rpc_construct_virtio_dev, vq_count), spdk_json_decode_uint32, true },
+ {"vq_size", offsetof(struct rpc_construct_virtio_dev, vq_size), spdk_json_decode_uint32, true },
+};
+
+static void
+free_rpc_construct_virtio_dev(struct rpc_construct_virtio_dev *req)
+{
+ free(req->name);
+ free(req->trtype);
+ free(req->traddr);
+ free(req->dev_type);
+ free(req);
+}
+
+static void
+spdk_rpc_create_virtio_dev_cb(void *ctx, int result, struct spdk_bdev **bdevs, size_t cnt)
+{
+ struct rpc_construct_virtio_dev *req = ctx;
+ struct spdk_json_write_ctx *w;
+ size_t i;
+
+ if (result) {
+ spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ spdk_strerror(-result));
+ free_rpc_construct_virtio_dev(req);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(req->request);
+ if (w) {
+ spdk_json_write_array_begin(w);
+
+ for (i = 0; i < cnt; i++) {
+ spdk_json_write_string(w, spdk_bdev_get_name(bdevs[i]));
+ }
+
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(req->request, w);
+ }
+
+ free_rpc_construct_virtio_dev(ctx);
+}
+
+static void
+spdk_rpc_create_virtio_dev(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_construct_virtio_dev *req;
+ struct spdk_bdev *bdev;
+ struct spdk_pci_addr pci_addr;
+ bool pci;
+ int rc;
+
+ req = calloc(1, sizeof(*req));
+ if (!req) {
+ SPDK_ERRLOG("calloc() failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(ENOMEM));
+ return;
+ }
+
+ if (spdk_json_decode_object(params, rpc_construct_virtio_dev,
+ SPDK_COUNTOF(rpc_construct_virtio_dev),
+ req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(EINVAL));
+ goto invalid;
+ }
+
+ if (strcmp(req->trtype, "pci") == 0) {
+ if (req->vq_count != 0 || req->vq_size != 0) {
+ SPDK_ERRLOG("VQ count or size is not allowed for PCI transport type\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "vq_count or vq_size is not allowed for PCI transport type.");
+ goto invalid;
+ }
+
+ if (spdk_pci_addr_parse(&pci_addr, req->traddr) != 0) {
+ SPDK_ERRLOG("Invalid PCI address '%s'\n", req->traddr);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid PCI address '%s'", req->traddr);
+ goto invalid;
+ }
+
+ pci = true;
+ } else if (strcmp(req->trtype, "user") == 0) {
+ req->vq_count = req->vq_count == 0 ? SPDK_VIRTIO_USER_DEFAULT_VQ_COUNT : req->vq_count;
+ req->vq_size = req->vq_size == 0 ? SPDK_VIRTIO_USER_DEFAULT_QUEUE_SIZE : req->vq_size;
+ pci = false;
+ } else {
+ SPDK_ERRLOG("Invalid trtype '%s'\n", req->trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid trtype '%s'", req->trtype);
+ goto invalid;
+ }
+
+ req->request = request;
+ if (strcmp(req->dev_type, "blk") == 0) {
+ if (pci) {
+ bdev = bdev_virtio_pci_blk_dev_create(req->name, &pci_addr);
+ } else {
+ bdev = bdev_virtio_user_blk_dev_create(req->name, req->traddr, req->vq_count, req->vq_size);
+ }
+
+ /* Virtio blk doesn't use callback so call it manually to send result. */
+ rc = bdev ? 0 : -EINVAL;
+ spdk_rpc_create_virtio_dev_cb(req, rc, &bdev, bdev ? 1 : 0);
+ } else if (strcmp(req->dev_type, "scsi") == 0) {
+ if (pci) {
+ rc = bdev_virtio_pci_scsi_dev_create(req->name, &pci_addr, spdk_rpc_create_virtio_dev_cb, req);
+ } else {
+ rc = bdev_virtio_user_scsi_dev_create(req->name, req->traddr, req->vq_count, req->vq_size,
+ spdk_rpc_create_virtio_dev_cb, req);
+ }
+
+ if (rc < 0) {
+ /* In case of error callback is not called so do it manually to send result. */
+ spdk_rpc_create_virtio_dev_cb(req, rc, NULL, 0);
+ }
+ } else {
+ SPDK_ERRLOG("Invalid dev_type '%s'\n", req->dev_type);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "Invalid dev_type '%s'", req->dev_type);
+ goto invalid;
+ }
+
+ return;
+invalid:
+ free_rpc_construct_virtio_dev(req);
+}
+SPDK_RPC_REGISTER("construct_virtio_dev", spdk_rpc_create_virtio_dev, SPDK_RPC_RUNTIME);
diff --git a/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c b/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c
new file mode 100644
index 00000000..4ff3db4a
--- /dev/null
+++ b/src/spdk/lib/bdev/virtio/bdev_virtio_scsi.c
@@ -0,0 +1,2017 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/thread.h"
+#include "spdk/scsi_spec.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/json.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+#include "spdk_internal/virtio.h"
+
+#include <linux/virtio_scsi.h>
+
+#include "bdev_virtio.h"
+
+#define BDEV_VIRTIO_MAX_TARGET 64
+#define BDEV_VIRTIO_SCAN_PAYLOAD_SIZE 256
+#define MGMT_POLL_PERIOD_US (1000 * 5)
+#define CTRLQ_RING_SIZE 16
+#define SCAN_REQUEST_RETRIES 5
+
+/* Number of non-request queues - eventq and controlq */
+#define SPDK_VIRTIO_SCSI_QUEUE_NUM_FIXED 2
+
+#define VIRTIO_SCSI_EVENTQ_BUFFER_COUNT 16
+
+#define VIRTIO_SCSI_CONTROLQ 0
+#define VIRTIO_SCSI_EVENTQ 1
+#define VIRTIO_SCSI_REQUESTQ 2
+
+static int bdev_virtio_initialize(void);
+static void bdev_virtio_finish(void);
+
+struct virtio_scsi_dev {
+ /* Generic virtio device data. */
+ struct virtio_dev vdev;
+
+ /** Detected SCSI LUNs */
+ TAILQ_HEAD(, virtio_scsi_disk) luns;
+
+ /** Context for the SCSI target scan. */
+ struct virtio_scsi_scan_base *scan_ctx;
+
+ /** Controlq poller. */
+ struct spdk_poller *mgmt_poller;
+
+ /** Controlq messages to be sent. */
+ struct spdk_ring *ctrlq_ring;
+
+ /** Buffers for the eventq. */
+ struct virtio_scsi_eventq_io *eventq_ios;
+
+ /** Device marked for removal. */
+ bool removed;
+
+ /** Callback to be called after vdev removal. */
+ bdev_virtio_remove_cb remove_cb;
+
+ /** Context for the `remove_cb`. */
+ void *remove_ctx;
+
+ TAILQ_ENTRY(virtio_scsi_dev) tailq;
+};
+
+struct virtio_scsi_io_ctx {
+ struct iovec iov_req;
+ struct iovec iov_resp;
+ union {
+ struct virtio_scsi_cmd_req req;
+ struct virtio_scsi_ctrl_tmf_req tmf_req;
+ };
+ union {
+ struct virtio_scsi_cmd_resp resp;
+ struct virtio_scsi_ctrl_tmf_resp tmf_resp;
+ };
+};
+
+struct virtio_scsi_eventq_io {
+ struct iovec iov;
+ struct virtio_scsi_event ev;
+};
+
+struct virtio_scsi_scan_info {
+ uint64_t num_blocks;
+ uint32_t block_size;
+ uint8_t target;
+ bool unmap_supported;
+ TAILQ_ENTRY(virtio_scsi_scan_info) tailq;
+};
+
+struct virtio_scsi_scan_base {
+ struct virtio_scsi_dev *svdev;
+
+ /** I/O channel used for the scan I/O. */
+ struct bdev_virtio_io_channel *channel;
+
+ bdev_virtio_create_cb cb_fn;
+ void *cb_arg;
+
+ /** Scan all targets on the device. */
+ bool full_scan;
+
+ /** Start a full rescan after receiving next scan I/O response. */
+ bool restart;
+
+ /** Additional targets to be (re)scanned. */
+ TAILQ_HEAD(, virtio_scsi_scan_info) scan_queue;
+
+ /** Remaining attempts for sending the current request. */
+ unsigned retries;
+
+ /** If set, the last scan I/O needs to be resent */
+ bool needs_resend;
+
+ struct virtio_scsi_io_ctx io_ctx;
+ struct iovec iov;
+ uint8_t payload[BDEV_VIRTIO_SCAN_PAYLOAD_SIZE];
+
+ /** Scan results for the current target. */
+ struct virtio_scsi_scan_info info;
+};
+
+struct virtio_scsi_disk {
+ struct spdk_bdev bdev;
+ struct virtio_scsi_dev *svdev;
+ struct virtio_scsi_scan_info info;
+
+ /** Descriptor opened just to be notified of external bdev hotremove. */
+ struct spdk_bdev_desc *notify_desc;
+
+ /** Disk marked for removal. */
+ bool removed;
+ TAILQ_ENTRY(virtio_scsi_disk) link;
+};
+
+struct bdev_virtio_io_channel {
+ struct virtio_scsi_dev *svdev;
+
+ /** Virtqueue exclusively assigned to this channel. */
+ struct virtqueue *vq;
+
+ /** Virtio response poller. */
+ struct spdk_poller *poller;
+};
+
+static TAILQ_HEAD(, virtio_scsi_dev) g_virtio_scsi_devs =
+ TAILQ_HEAD_INITIALIZER(g_virtio_scsi_devs);
+
+static pthread_mutex_t g_virtio_scsi_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/** Module finish in progress */
+static bool g_bdev_virtio_finish = false;
+
+/* Features desired/implemented by this driver. */
+#define VIRTIO_SCSI_DEV_SUPPORTED_FEATURES \
+ (1ULL << VIRTIO_SCSI_F_INOUT | \
+ 1ULL << VIRTIO_SCSI_F_HOTPLUG | \
+ 1ULL << VIRTIO_RING_F_EVENT_IDX | \
+ 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+static void virtio_scsi_dev_unregister_cb(void *io_device);
+static void virtio_scsi_dev_remove(struct virtio_scsi_dev *svdev,
+ bdev_virtio_remove_cb cb_fn, void *cb_arg);
+static int bdev_virtio_scsi_ch_create_cb(void *io_device, void *ctx_buf);
+static void bdev_virtio_scsi_ch_destroy_cb(void *io_device, void *ctx_buf);
+static void process_scan_resp(struct virtio_scsi_scan_base *base);
+static int bdev_virtio_mgmt_poll(void *arg);
+
+static int
+virtio_scsi_dev_send_eventq_io(struct virtqueue *vq, struct virtio_scsi_eventq_io *io)
+{
+ int rc;
+
+ rc = virtqueue_req_start(vq, io, 1);
+ if (rc != 0) {
+ return -1;
+ }
+
+ virtqueue_req_add_iovs(vq, &io->iov, 1, SPDK_VIRTIO_DESC_WR);
+ virtqueue_req_flush(vq);
+
+ return 0;
+}
+
+static int
+virtio_scsi_dev_init(struct virtio_scsi_dev *svdev, uint16_t max_queues)
+{
+ struct virtio_dev *vdev = &svdev->vdev;
+ struct spdk_ring *ctrlq_ring;
+ struct virtio_scsi_eventq_io *eventq_io;
+ struct virtqueue *eventq;
+ uint16_t i, num_events;
+ int rc;
+
+ rc = virtio_dev_reset(vdev, VIRTIO_SCSI_DEV_SUPPORTED_FEATURES);
+ if (rc != 0) {
+ return rc;
+ }
+
+ rc = virtio_dev_start(vdev, max_queues, SPDK_VIRTIO_SCSI_QUEUE_NUM_FIXED);
+ if (rc != 0) {
+ return rc;
+ }
+
+ ctrlq_ring = spdk_ring_create(SPDK_RING_TYPE_MP_SC, CTRLQ_RING_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (ctrlq_ring == NULL) {
+ SPDK_ERRLOG("Failed to allocate send ring for the controlq.\n");
+ return -1;
+ }
+
+ rc = virtio_dev_acquire_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to acquire the controlq.\n");
+ spdk_ring_free(ctrlq_ring);
+ return -1;
+ }
+
+ rc = virtio_dev_acquire_queue(vdev, VIRTIO_SCSI_EVENTQ);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to acquire the eventq.\n");
+ virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+ spdk_ring_free(ctrlq_ring);
+ return -1;
+ }
+
+ eventq = vdev->vqs[VIRTIO_SCSI_EVENTQ];
+ num_events = spdk_min(eventq->vq_nentries, VIRTIO_SCSI_EVENTQ_BUFFER_COUNT);
+ svdev->eventq_ios = spdk_dma_zmalloc(sizeof(*svdev->eventq_ios) * num_events,
+ 0, NULL);
+ if (svdev->eventq_ios == NULL) {
+ SPDK_ERRLOG("cannot allocate memory for %"PRIu16" eventq buffers\n",
+ num_events);
+ virtio_dev_release_queue(vdev, VIRTIO_SCSI_EVENTQ);
+ virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+ spdk_ring_free(ctrlq_ring);
+ return -1;
+ }
+
+ for (i = 0; i < num_events; i++) {
+ eventq_io = &svdev->eventq_ios[i];
+ eventq_io->iov.iov_base = &eventq_io->ev;
+ eventq_io->iov.iov_len = sizeof(eventq_io->ev);
+ virtio_scsi_dev_send_eventq_io(eventq, eventq_io);
+ }
+
+ svdev->ctrlq_ring = ctrlq_ring;
+
+ svdev->mgmt_poller = spdk_poller_register(bdev_virtio_mgmt_poll, svdev,
+ MGMT_POLL_PERIOD_US);
+
+ TAILQ_INIT(&svdev->luns);
+ svdev->scan_ctx = NULL;
+ svdev->removed = false;
+ svdev->remove_cb = NULL;
+ svdev->remove_ctx = NULL;
+
+ spdk_io_device_register(svdev, bdev_virtio_scsi_ch_create_cb,
+ bdev_virtio_scsi_ch_destroy_cb,
+ sizeof(struct bdev_virtio_io_channel),
+ svdev->vdev.name);
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_INSERT_TAIL(&g_virtio_scsi_devs, svdev, tailq);
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ return 0;
+}
+
+static struct virtio_scsi_dev *
+virtio_pci_scsi_dev_create(const char *name, struct virtio_pci_ctx *pci_ctx)
+{
+ static int pci_dev_counter = 0;
+ struct virtio_scsi_dev *svdev;
+ struct virtio_dev *vdev;
+ char *default_name = NULL;
+ uint32_t num_queues;
+ int rc;
+
+ svdev = calloc(1, sizeof(*svdev));
+ if (svdev == NULL) {
+ SPDK_ERRLOG("virtio device calloc failed\n");
+ return NULL;
+ }
+
+ vdev = &svdev->vdev;
+ if (name == NULL) {
+ default_name = spdk_sprintf_alloc("VirtioScsi%"PRIu32, pci_dev_counter++);
+ if (default_name == NULL) {
+ free(vdev);
+ return NULL;
+ }
+ name = default_name;
+ }
+
+ rc = virtio_pci_dev_init(vdev, name, pci_ctx);
+ free(default_name);
+
+ if (rc != 0) {
+ free(svdev);
+ return NULL;
+ }
+
+ rc = virtio_dev_read_dev_config(vdev, offsetof(struct virtio_scsi_config, num_queues),
+ &num_queues, sizeof(num_queues));
+ if (rc) {
+ SPDK_ERRLOG("%s: config read failed: %s\n", vdev->name, spdk_strerror(-rc));
+ virtio_dev_destruct(vdev);
+ free(svdev);
+ return NULL;
+ }
+
+ rc = virtio_scsi_dev_init(svdev, num_queues);
+ if (rc != 0) {
+ virtio_dev_destruct(vdev);
+ free(svdev);
+ return NULL;
+ }
+
+ return svdev;
+}
+
+static struct virtio_scsi_dev *
+virtio_user_scsi_dev_create(const char *name, const char *path,
+ uint16_t num_queues, uint32_t queue_size)
+{
+ struct virtio_scsi_dev *svdev;
+ struct virtio_dev *vdev;
+ int rc;
+
+ svdev = calloc(1, sizeof(*svdev));
+ if (svdev == NULL) {
+ SPDK_ERRLOG("calloc failed for virtio device %s: %s\n", name, path);
+ return NULL;
+ }
+
+ vdev = &svdev->vdev;
+ rc = virtio_user_dev_init(vdev, name, path, queue_size);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to create virito device %s: %s\n", name, path);
+ free(svdev);
+ return NULL;
+ }
+
+ rc = virtio_scsi_dev_init(svdev, num_queues);
+ if (rc != 0) {
+ virtio_dev_destruct(vdev);
+ free(svdev);
+ return NULL;
+ }
+
+ return svdev;
+}
+
+static struct virtio_scsi_disk *
+virtio_scsi_dev_get_disk_by_id(struct virtio_scsi_dev *svdev, uint8_t target_id)
+{
+ struct virtio_scsi_disk *disk;
+
+ TAILQ_FOREACH(disk, &svdev->luns, link) {
+ if (disk->info.target == target_id) {
+ return disk;
+ }
+ }
+
+ return NULL;
+}
+
+static int virtio_scsi_dev_scan(struct virtio_scsi_dev *svdev,
+ bdev_virtio_create_cb cb_fn, void *cb_arg);
+static int send_scan_io(struct virtio_scsi_scan_base *base);
+static void _virtio_scsi_dev_scan_tgt(struct virtio_scsi_scan_base *base, uint8_t target);
+static int _virtio_scsi_dev_scan_next(struct virtio_scsi_scan_base *base, int rc);
+static void _virtio_scsi_dev_scan_finish(struct virtio_scsi_scan_base *base, int errnum);
+static int virtio_scsi_dev_scan_tgt(struct virtio_scsi_dev *svdev, uint8_t target);
+
+static int
+bdev_virtio_get_ctx_size(void)
+{
+ return sizeof(struct virtio_scsi_io_ctx);
+}
+
+static int
+bdev_virtio_scsi_config_json(struct spdk_json_write_ctx *w)
+{
+ struct virtio_scsi_dev *svdev;
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "construct_virtio_dev");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", svdev->vdev.name);
+ spdk_json_write_named_string(w, "dev_type", "scsi");
+
+ /* Write transport specific parameters. */
+ svdev->vdev.backend_ops->write_json_config(&svdev->vdev, w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ }
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+ return 0;
+}
+
+
+static struct spdk_bdev_module virtio_scsi_if = {
+ .name = "virtio_scsi",
+ .module_init = bdev_virtio_initialize,
+ .module_fini = bdev_virtio_finish,
+ .get_ctx_size = bdev_virtio_get_ctx_size,
+ .config_json = bdev_virtio_scsi_config_json,
+ .async_init = true,
+ .async_fini = true,
+};
+
+SPDK_BDEV_MODULE_REGISTER(&virtio_scsi_if)
+
+static struct virtio_scsi_io_ctx *
+bdev_virtio_init_io_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_cmd_req *req;
+ struct virtio_scsi_cmd_resp *resp;
+ struct virtio_scsi_disk *disk = (struct virtio_scsi_disk *)bdev_io->bdev;
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+ req = &io_ctx->req;
+ resp = &io_ctx->resp;
+
+ io_ctx->iov_req.iov_base = req;
+ io_ctx->iov_req.iov_len = sizeof(*req);
+
+ io_ctx->iov_resp.iov_base = resp;
+ io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+ memset(req, 0, sizeof(*req));
+ req->lun[0] = 1;
+ req->lun[1] = disk->info.target;
+
+ return io_ctx;
+}
+
+static struct virtio_scsi_io_ctx *
+bdev_virtio_init_tmf_vreq(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_ctrl_tmf_req *tmf_req;
+ struct virtio_scsi_ctrl_tmf_resp *tmf_resp;
+ struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+ tmf_req = &io_ctx->tmf_req;
+ tmf_resp = &io_ctx->tmf_resp;
+
+ io_ctx->iov_req.iov_base = tmf_req;
+ io_ctx->iov_req.iov_len = sizeof(*tmf_req);
+ io_ctx->iov_resp.iov_base = tmf_resp;
+ io_ctx->iov_resp.iov_len = sizeof(*tmf_resp);
+
+ memset(tmf_req, 0, sizeof(*tmf_req));
+ tmf_req->lun[0] = 1;
+ tmf_req->lun[1] = disk->info.target;
+
+ return io_ctx;
+}
+
+static void
+bdev_virtio_send_io(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct bdev_virtio_io_channel *virtio_channel = spdk_io_channel_get_ctx(ch);
+ struct virtqueue *vq = virtio_channel->vq;
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = virtqueue_req_start(vq, bdev_io, bdev_io->u.bdev.iovcnt + 2);
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ return;
+ } else if (rc != 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+ virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ SPDK_VIRTIO_DESC_WR);
+ } else {
+ virtqueue_req_add_iovs(vq, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ SPDK_VIRTIO_DESC_RO);
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+ }
+
+ virtqueue_req_flush(vq);
+}
+
+static void
+bdev_virtio_rw(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+ struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_io_vreq(ch, bdev_io);
+ struct virtio_scsi_cmd_req *req = &io_ctx->req;
+ bool is_write = bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE;
+
+ if (disk->info.num_blocks > (1ULL << 32)) {
+ req->cdb[0] = is_write ? SPDK_SBC_WRITE_16 : SPDK_SBC_READ_16;
+ to_be64(&req->cdb[2], bdev_io->u.bdev.offset_blocks);
+ to_be32(&req->cdb[10], bdev_io->u.bdev.num_blocks);
+ } else {
+ req->cdb[0] = is_write ? SPDK_SBC_WRITE_10 : SPDK_SBC_READ_10;
+ to_be32(&req->cdb[2], bdev_io->u.bdev.offset_blocks);
+ to_be16(&req->cdb[7], bdev_io->u.bdev.num_blocks);
+ }
+
+ bdev_virtio_send_io(ch, bdev_io);
+}
+
+static void
+bdev_virtio_reset(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct bdev_virtio_io_channel *virtio_ch = spdk_io_channel_get_ctx(ch);
+ struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_tmf_vreq(ch, bdev_io);
+ struct virtio_scsi_ctrl_tmf_req *tmf_req = &io_ctx->tmf_req;
+ struct virtio_scsi_dev *svdev = virtio_ch->svdev;
+ size_t enqueued_count;
+
+ tmf_req->type = VIRTIO_SCSI_T_TMF;
+ tmf_req->subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET;
+
+ enqueued_count = spdk_ring_enqueue(svdev->ctrlq_ring, (void **)&bdev_io, 1);
+ if (spdk_likely(enqueued_count == 1)) {
+ return;
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ }
+}
+
+static void
+bdev_virtio_unmap(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_io_ctx *io_ctx = bdev_virtio_init_io_vreq(ch, bdev_io);
+ struct virtio_scsi_cmd_req *req = &io_ctx->req;
+ struct spdk_scsi_unmap_bdesc *desc, *first_desc;
+ uint8_t *buf;
+ uint64_t offset_blocks, num_blocks;
+ uint16_t cmd_len;
+
+ buf = bdev_io->u.bdev.iovs[0].iov_base;
+
+ offset_blocks = bdev_io->u.bdev.offset_blocks;
+ num_blocks = bdev_io->u.bdev.num_blocks;
+
+ /* (n-1) * 16-byte descriptors */
+ first_desc = desc = (struct spdk_scsi_unmap_bdesc *)&buf[8];
+ while (num_blocks > UINT32_MAX) {
+ to_be64(&desc->lba, offset_blocks);
+ to_be32(&desc->block_count, UINT32_MAX);
+ memset(&desc->reserved, 0, sizeof(desc->reserved));
+ offset_blocks += UINT32_MAX;
+ num_blocks -= UINT32_MAX;
+ desc++;
+ }
+
+ /* The last descriptor with block_count <= UINT32_MAX */
+ to_be64(&desc->lba, offset_blocks);
+ to_be32(&desc->block_count, num_blocks);
+ memset(&desc->reserved, 0, sizeof(desc->reserved));
+
+ /* 8-byte header + n * 16-byte block descriptor */
+ cmd_len = 8 + (desc - first_desc + 1) * sizeof(struct spdk_scsi_unmap_bdesc);
+
+ req->cdb[0] = SPDK_SBC_UNMAP;
+ to_be16(&req->cdb[7], cmd_len);
+
+ /* 8-byte header */
+ to_be16(&buf[0], cmd_len - 2); /* total length (excluding the length field) */
+ to_be16(&buf[2], cmd_len - 8); /* length of block descriptors */
+ memset(&buf[4], 0, 4); /* reserved */
+
+ bdev_virtio_send_io(ch, bdev_io);
+}
+
+static int _bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_disk *disk = SPDK_CONTAINEROF(bdev_io->bdev, struct virtio_scsi_disk, bdev);
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_virtio_rw,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ bdev_virtio_rw(ch, bdev_io);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ bdev_virtio_reset(ch, bdev_io);
+ return 0;
+ case SPDK_BDEV_IO_TYPE_UNMAP: {
+ uint64_t buf_len = 8 /* header size */ +
+ (bdev_io->u.bdev.num_blocks + UINT32_MAX - 1) /
+ UINT32_MAX * sizeof(struct spdk_scsi_unmap_bdesc);
+
+ if (!disk->info.unmap_supported) {
+ return -1;
+ }
+
+ if (buf_len > SPDK_BDEV_LARGE_BUF_MAX_SIZE) {
+ SPDK_ERRLOG("Trying to UNMAP too many blocks: %"PRIu64"\n",
+ bdev_io->u.bdev.num_blocks);
+ return -1;
+ }
+ spdk_bdev_io_get_buf(bdev_io, bdev_virtio_unmap, buf_len);
+ return 0;
+ }
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+static void bdev_virtio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_virtio_submit_request(ch, bdev_io) < 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_virtio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct virtio_scsi_disk *disk = ctx;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ return disk->info.unmap_supported;
+
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_virtio_get_io_channel(void *ctx)
+{
+ struct virtio_scsi_disk *disk = ctx;
+
+ return spdk_get_io_channel(disk->svdev);
+}
+
+static int
+bdev_virtio_disk_destruct(void *ctx)
+{
+ struct virtio_scsi_disk *disk = ctx;
+ struct virtio_scsi_dev *svdev = disk->svdev;
+
+ TAILQ_REMOVE(&svdev->luns, disk, link);
+ free(disk->bdev.name);
+ free(disk);
+
+ if (svdev->removed && TAILQ_EMPTY(&svdev->luns)) {
+ spdk_io_device_unregister(svdev, virtio_scsi_dev_unregister_cb);
+ }
+
+ return 0;
+}
+
+static int
+bdev_virtio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct virtio_scsi_disk *disk = ctx;
+
+ virtio_dev_dump_json_info(&disk->svdev->vdev, w);
+ return 0;
+}
+
+static void
+bdev_virtio_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* SCSI targets and LUNS are discovered during scan process so nothing
+ * to save here.
+ */
+}
+
+static const struct spdk_bdev_fn_table virtio_fn_table = {
+ .destruct = bdev_virtio_disk_destruct,
+ .submit_request = bdev_virtio_submit_request,
+ .io_type_supported = bdev_virtio_io_type_supported,
+ .get_io_channel = bdev_virtio_get_io_channel,
+ .dump_info_json = bdev_virtio_dump_info_json,
+ .write_config_json = bdev_virtio_write_config_json,
+};
+
+static void
+get_scsi_status(struct virtio_scsi_cmd_resp *resp, int *sk, int *asc, int *ascq)
+{
+ /* see spdk_scsi_task_build_sense_data() for sense data details */
+ *sk = 0;
+ *asc = 0;
+ *ascq = 0;
+
+ if (resp->sense_len < 3) {
+ return;
+ }
+
+ *sk = resp->sense[2] & 0xf;
+
+ if (resp->sense_len < 13) {
+ return;
+ }
+
+ *asc = resp->sense[12];
+
+ if (resp->sense_len < 14) {
+ return;
+ }
+
+ *ascq = resp->sense[13];
+}
+
+static void
+bdev_virtio_io_cpl(struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+ int sk, asc, ascq;
+
+ get_scsi_status(&io_ctx->resp, &sk, &asc, &ascq);
+ spdk_bdev_io_complete_scsi_status(bdev_io, io_ctx->resp.status, sk, asc, ascq);
+}
+
+static int
+bdev_virtio_poll(void *arg)
+{
+ struct bdev_virtio_io_channel *ch = arg;
+ struct virtio_scsi_dev *svdev = ch->svdev;
+ struct virtio_scsi_scan_base *scan_ctx = svdev->scan_ctx;
+ void *io[32];
+ uint32_t io_len[32];
+ uint16_t i, cnt;
+ int rc;
+
+ cnt = virtio_recv_pkts(ch->vq, (void **)io, io_len, SPDK_COUNTOF(io));
+ for (i = 0; i < cnt; ++i) {
+ if (spdk_unlikely(scan_ctx && io[i] == &scan_ctx->io_ctx)) {
+ if (svdev->removed) {
+ _virtio_scsi_dev_scan_finish(scan_ctx, -EINTR);
+ return -1;
+ }
+
+ if (scan_ctx->restart) {
+ scan_ctx->restart = false;
+ scan_ctx->full_scan = true;
+ _virtio_scsi_dev_scan_tgt(scan_ctx, 0);
+ continue;
+ }
+
+ process_scan_resp(scan_ctx);
+ continue;
+ }
+
+ bdev_virtio_io_cpl(io[i]);
+ }
+
+ if (spdk_unlikely(scan_ctx && scan_ctx->needs_resend)) {
+ if (svdev->removed) {
+ _virtio_scsi_dev_scan_finish(scan_ctx, -EINTR);
+ return -1;
+ } else if (cnt == 0) {
+ return 0;
+ }
+
+ rc = send_scan_io(scan_ctx);
+ if (rc != 0) {
+ assert(scan_ctx->retries > 0);
+ scan_ctx->retries--;
+ if (scan_ctx->retries == 0) {
+ SPDK_ERRLOG("Target scan failed unrecoverably with rc = %d.\n", rc);
+ _virtio_scsi_dev_scan_finish(scan_ctx, rc);
+ }
+ }
+ }
+
+ return cnt;
+}
+
+static void
+bdev_virtio_tmf_cpl_cb(void *ctx)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+
+ if (io_ctx->tmf_resp.response == VIRTIO_SCSI_S_OK) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+bdev_virtio_tmf_cpl(struct spdk_bdev_io *bdev_io)
+{
+ spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_virtio_tmf_cpl_cb, bdev_io);
+}
+
+static void
+bdev_virtio_eventq_io_cpl(struct virtio_scsi_dev *svdev, struct virtio_scsi_eventq_io *io)
+{
+ struct virtio_scsi_event *ev = &io->ev;
+ struct virtio_scsi_disk *disk;
+
+ if (ev->lun[0] != 1) {
+ SPDK_WARNLOG("Received an event with invalid data layout.\n");
+ goto out;
+ }
+
+ if (ev->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
+ ev->event &= ~VIRTIO_SCSI_T_EVENTS_MISSED;
+ virtio_scsi_dev_scan(svdev, NULL, NULL);
+ }
+
+ switch (ev->event) {
+ case VIRTIO_SCSI_T_NO_EVENT:
+ break;
+ case VIRTIO_SCSI_T_TRANSPORT_RESET:
+ switch (ev->reason) {
+ case VIRTIO_SCSI_EVT_RESET_RESCAN:
+ virtio_scsi_dev_scan_tgt(svdev, ev->lun[1]);
+ break;
+ case VIRTIO_SCSI_EVT_RESET_REMOVED:
+ disk = virtio_scsi_dev_get_disk_by_id(svdev, ev->lun[1]);
+ if (disk != NULL) {
+ spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ virtio_scsi_dev_send_eventq_io(svdev->vdev.vqs[VIRTIO_SCSI_EVENTQ], io);
+}
+
+static void
+bdev_virtio_tmf_abort_nomem_cb(void *ctx)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+}
+
+static void
+bdev_virtio_tmf_abort_ioerr_cb(void *ctx)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+}
+
+static void
+bdev_virtio_tmf_abort(struct spdk_bdev_io *bdev_io, int status)
+{
+ spdk_thread_fn fn;
+
+ if (status == -ENOMEM) {
+ fn = bdev_virtio_tmf_abort_nomem_cb;
+ } else {
+ fn = bdev_virtio_tmf_abort_ioerr_cb;
+ }
+
+ spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), fn, bdev_io);
+}
+
+static int
+bdev_virtio_send_tmf_io(struct virtqueue *ctrlq, struct spdk_bdev_io *bdev_io)
+{
+ struct virtio_scsi_io_ctx *io_ctx = (struct virtio_scsi_io_ctx *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = virtqueue_req_start(ctrlq, bdev_io, 2);
+ if (rc != 0) {
+ return rc;
+ }
+
+ virtqueue_req_add_iovs(ctrlq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+ virtqueue_req_add_iovs(ctrlq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+
+ virtqueue_req_flush(ctrlq);
+ return 0;
+}
+
+static int
+bdev_virtio_mgmt_poll(void *arg)
+{
+ struct virtio_scsi_dev *svdev = arg;
+ struct virtio_dev *vdev = &svdev->vdev;
+ struct virtqueue *eventq = vdev->vqs[VIRTIO_SCSI_EVENTQ];
+ struct virtqueue *ctrlq = vdev->vqs[VIRTIO_SCSI_CONTROLQ];
+ struct spdk_ring *send_ring = svdev->ctrlq_ring;
+ void *io[16];
+ uint32_t io_len[16];
+ uint16_t i, cnt;
+ int rc;
+ int total = 0;
+
+ cnt = spdk_ring_dequeue(send_ring, io, SPDK_COUNTOF(io));
+ total += cnt;
+ for (i = 0; i < cnt; ++i) {
+ rc = bdev_virtio_send_tmf_io(ctrlq, io[i]);
+ if (rc != 0) {
+ bdev_virtio_tmf_abort(io[i], rc);
+ }
+ }
+
+ cnt = virtio_recv_pkts(ctrlq, io, io_len, SPDK_COUNTOF(io));
+ total += cnt;
+ for (i = 0; i < cnt; ++i) {
+ bdev_virtio_tmf_cpl(io[i]);
+ }
+
+ cnt = virtio_recv_pkts(eventq, io, io_len, SPDK_COUNTOF(io));
+ total += cnt;
+ for (i = 0; i < cnt; ++i) {
+ bdev_virtio_eventq_io_cpl(svdev, io[i]);
+ }
+
+ return total;
+}
+
+static int
+bdev_virtio_scsi_ch_create_cb(void *io_device, void *ctx_buf)
+{
+ struct virtio_scsi_dev *svdev = io_device;
+ struct virtio_dev *vdev = &svdev->vdev;
+ struct bdev_virtio_io_channel *ch = ctx_buf;
+ struct virtqueue *vq;
+ int32_t queue_idx;
+
+ queue_idx = virtio_dev_find_and_acquire_queue(vdev, VIRTIO_SCSI_REQUESTQ);
+ if (queue_idx < 0) {
+ SPDK_ERRLOG("Couldn't get an unused queue for the io_channel.\n");
+ return -1;
+ }
+
+ vq = vdev->vqs[queue_idx];
+
+ ch->svdev = svdev;
+ ch->vq = vq;
+
+ ch->poller = spdk_poller_register(bdev_virtio_poll, ch, 0);
+
+ return 0;
+}
+
+static void
+bdev_virtio_scsi_ch_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_virtio_io_channel *ch = ctx_buf;
+ struct virtio_scsi_dev *svdev = ch->svdev;
+ struct virtio_dev *vdev = &svdev->vdev;
+ struct virtqueue *vq = ch->vq;
+
+ spdk_poller_unregister(&ch->poller);
+ virtio_dev_release_queue(vdev, vq->vq_queue_index);
+}
+
+static void
+_virtio_scsi_dev_scan_finish(struct virtio_scsi_scan_base *base, int errnum)
+{
+ struct virtio_scsi_dev *svdev = base->svdev;
+ size_t bdevs_cnt;
+ struct spdk_bdev *bdevs[BDEV_VIRTIO_MAX_TARGET];
+ struct virtio_scsi_disk *disk;
+ struct virtio_scsi_scan_info *tgt, *next_tgt;
+
+ spdk_put_io_channel(spdk_io_channel_from_ctx(base->channel));
+ base->svdev->scan_ctx = NULL;
+
+ TAILQ_FOREACH_SAFE(tgt, &base->scan_queue, tailq, next_tgt) {
+ TAILQ_REMOVE(&base->scan_queue, tgt, tailq);
+ free(tgt);
+ }
+
+ if (base->cb_fn == NULL) {
+ spdk_dma_free(base);
+ return;
+ }
+
+ bdevs_cnt = 0;
+ if (errnum == 0) {
+ TAILQ_FOREACH(disk, &svdev->luns, link) {
+ bdevs[bdevs_cnt] = &disk->bdev;
+ bdevs_cnt++;
+ }
+ }
+
+ base->cb_fn(base->cb_arg, errnum, bdevs, bdevs_cnt);
+ spdk_dma_free(base);
+}
+
+static int
+send_scan_io(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_io_ctx *io_ctx = &base->io_ctx;
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct virtqueue *vq = base->channel->vq;
+ int payload_iov_cnt = base->iov.iov_len > 0 ? 1 : 0;
+ int rc;
+
+ req->lun[0] = 1;
+ req->lun[1] = base->info.target;
+
+ rc = virtqueue_req_start(vq, io_ctx, 2 + payload_iov_cnt);
+ if (rc != 0) {
+ base->needs_resend = true;
+ return -1;
+ }
+
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_req, 1, SPDK_VIRTIO_DESC_RO);
+ virtqueue_req_add_iovs(vq, &io_ctx->iov_resp, 1, SPDK_VIRTIO_DESC_WR);
+ virtqueue_req_add_iovs(vq, &base->iov, payload_iov_cnt, SPDK_VIRTIO_DESC_WR);
+
+ virtqueue_req_flush(vq);
+ return 0;
+}
+
+static int
+send_inquiry(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct spdk_scsi_cdb_inquiry *cdb;
+
+ memset(req, 0, sizeof(*req));
+
+ base->iov.iov_len = BDEV_VIRTIO_SCAN_PAYLOAD_SIZE;
+ cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+ cdb->opcode = SPDK_SPC_INQUIRY;
+ to_be16(cdb->alloc_len, BDEV_VIRTIO_SCAN_PAYLOAD_SIZE);
+
+ return send_scan_io(base);
+}
+
+static int
+send_inquiry_vpd(struct virtio_scsi_scan_base *base, uint8_t page_code)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct spdk_scsi_cdb_inquiry *inquiry_cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+
+ memset(req, 0, sizeof(*req));
+
+ base->iov.iov_len = BDEV_VIRTIO_SCAN_PAYLOAD_SIZE;
+ inquiry_cdb->opcode = SPDK_SPC_INQUIRY;
+ inquiry_cdb->evpd = 1;
+ inquiry_cdb->page_code = page_code;
+ to_be16(inquiry_cdb->alloc_len, base->iov.iov_len);
+
+ return send_scan_io(base);
+}
+
+static int
+send_read_cap_10(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+ memset(req, 0, sizeof(*req));
+
+ base->iov.iov_len = 8;
+ req->cdb[0] = SPDK_SBC_READ_CAPACITY_10;
+
+ return send_scan_io(base);
+}
+
+static int
+send_read_cap_16(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+ memset(req, 0, sizeof(*req));
+
+ base->iov.iov_len = 32;
+ req->cdb[0] = SPDK_SPC_SERVICE_ACTION_IN_16;
+ req->cdb[1] = SPDK_SBC_SAI_READ_CAPACITY_16;
+ to_be32(&req->cdb[10], base->iov.iov_len);
+
+ return send_scan_io(base);
+}
+
+static int
+send_test_unit_ready(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+ memset(req, 0, sizeof(*req));
+ req->cdb[0] = SPDK_SPC_TEST_UNIT_READY;
+ base->iov.iov_len = 0;
+
+ return send_scan_io(base);
+}
+
+static int
+send_start_stop_unit(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+
+ memset(req, 0, sizeof(*req));
+ req->cdb[0] = SPDK_SBC_START_STOP_UNIT;
+ req->cdb[4] = SPDK_SBC_START_STOP_UNIT_START_BIT;
+ base->iov.iov_len = 0;
+
+ return send_scan_io(base);
+}
+
+static int
+process_scan_start_stop_unit(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+
+ if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+ return send_inquiry_vpd(base, SPDK_SPC_VPD_SUPPORTED_VPD_PAGES);
+ }
+
+ return -1;
+}
+
+static int
+process_scan_test_unit_ready(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ int sk, asc, ascq;
+
+ get_scsi_status(resp, &sk, &asc, &ascq);
+
+ /* check response, get VPD if spun up otherwise send SSU */
+ if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+ return send_inquiry_vpd(base, SPDK_SPC_VPD_SUPPORTED_VPD_PAGES);
+ } else if (resp->response == VIRTIO_SCSI_S_OK &&
+ resp->status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
+ sk == SPDK_SCSI_SENSE_UNIT_ATTENTION &&
+ asc == SPDK_SCSI_ASC_LOGICAL_UNIT_NOT_READY) {
+ return send_start_stop_unit(base);
+ } else {
+ return -1;
+ }
+}
+
+static int
+process_scan_inquiry_standard(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ struct spdk_scsi_cdb_inquiry_data *inquiry_data =
+ (struct spdk_scsi_cdb_inquiry_data *)base->payload;
+
+ if (resp->status != SPDK_SCSI_STATUS_GOOD) {
+ return -1;
+ }
+
+ /* check to make sure its a supported device */
+ if (inquiry_data->peripheral_device_type != SPDK_SPC_PERIPHERAL_DEVICE_TYPE_DISK ||
+ inquiry_data->peripheral_qualifier != SPDK_SPC_PERIPHERAL_QUALIFIER_CONNECTED) {
+ SPDK_WARNLOG("Unsupported peripheral device type 0x%02x (qualifier 0x%02x)\n",
+ inquiry_data->peripheral_device_type,
+ inquiry_data->peripheral_qualifier);
+ return -1;
+ }
+
+ return send_test_unit_ready(base);
+}
+
+static int
+process_scan_inquiry_vpd_supported_vpd_pages(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ bool block_provisioning_page_supported = false;
+
+ if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+ const uint8_t *vpd_data = base->payload;
+ const uint8_t *supported_vpd_pages = vpd_data + 4;
+ uint16_t page_length;
+ uint16_t num_supported_pages;
+ uint16_t i;
+
+ page_length = from_be16(vpd_data + 2);
+ num_supported_pages = spdk_min(page_length, base->iov.iov_len - 4);
+
+ for (i = 0; i < num_supported_pages; i++) {
+ if (supported_vpd_pages[i] == SPDK_SPC_VPD_BLOCK_THIN_PROVISION) {
+ block_provisioning_page_supported = true;
+ break;
+ }
+ }
+ }
+
+ if (block_provisioning_page_supported) {
+ return send_inquiry_vpd(base, SPDK_SPC_VPD_BLOCK_THIN_PROVISION);
+ } else {
+ return send_read_cap_10(base);
+ }
+}
+
+static int
+process_scan_inquiry_vpd_block_thin_provision(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+
+ base->info.unmap_supported = false;
+
+ if (resp->status == SPDK_SCSI_STATUS_GOOD) {
+ uint8_t *vpd_data = base->payload;
+
+ base->info.unmap_supported = !!(vpd_data[5] & SPDK_SCSI_UNMAP_LBPU);
+ }
+
+ SPDK_INFOLOG(SPDK_LOG_VIRTIO, "Target %u: unmap supported = %d\n",
+ base->info.target, (int)base->info.unmap_supported);
+
+ return send_read_cap_10(base);
+}
+
+static int
+process_scan_inquiry(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct spdk_scsi_cdb_inquiry *inquiry_cdb = (struct spdk_scsi_cdb_inquiry *)req->cdb;
+
+ if ((inquiry_cdb->evpd & 1) == 0) {
+ return process_scan_inquiry_standard(base);
+ }
+
+ switch (inquiry_cdb->page_code) {
+ case SPDK_SPC_VPD_SUPPORTED_VPD_PAGES:
+ return process_scan_inquiry_vpd_supported_vpd_pages(base);
+ case SPDK_SPC_VPD_BLOCK_THIN_PROVISION:
+ return process_scan_inquiry_vpd_block_thin_provision(base);
+ default:
+ SPDK_DEBUGLOG(SPDK_LOG_VIRTIO, "Unexpected VPD page 0x%02x\n", inquiry_cdb->page_code);
+ return -1;
+ }
+}
+
+static void
+bdev_virtio_disc_notify_remove(void *remove_ctx)
+{
+ struct virtio_scsi_disk *disk = remove_ctx;
+
+ disk->removed = true;
+ spdk_bdev_close(disk->notify_desc);
+}
+
+/* To be called only from the thread performing target scan */
+static int
+virtio_scsi_dev_add_tgt(struct virtio_scsi_dev *svdev, struct virtio_scsi_scan_info *info)
+{
+ struct virtio_scsi_disk *disk;
+ struct spdk_bdev *bdev;
+ int rc;
+
+ TAILQ_FOREACH(disk, &svdev->luns, link) {
+ if (disk->info.target == info->target) {
+ /* Target is already attached and param change is not supported */
+ return 0;
+ }
+ }
+
+ if (info->block_size == 0 || info->num_blocks == 0) {
+ SPDK_ERRLOG("%s: invalid target %u: bs=%"PRIu32" blocks=%"PRIu64"\n",
+ svdev->vdev.name, info->target, info->block_size, info->num_blocks);
+ return -EINVAL;
+ }
+
+ disk = calloc(1, sizeof(*disk));
+ if (disk == NULL) {
+ SPDK_ERRLOG("could not allocate disk\n");
+ return -ENOMEM;
+ }
+
+ disk->svdev = svdev;
+ memcpy(&disk->info, info, sizeof(*info));
+
+ bdev = &disk->bdev;
+ bdev->name = spdk_sprintf_alloc("%st%"PRIu8, svdev->vdev.name, info->target);
+ if (bdev->name == NULL) {
+ SPDK_ERRLOG("Couldn't alloc memory for the bdev name.\n");
+ free(disk);
+ return -ENOMEM;
+ }
+
+ bdev->product_name = "Virtio SCSI Disk";
+ bdev->write_cache = 0;
+ bdev->blocklen = disk->info.block_size;
+ bdev->blockcnt = disk->info.num_blocks;
+
+ bdev->ctxt = disk;
+ bdev->fn_table = &virtio_fn_table;
+ bdev->module = &virtio_scsi_if;
+
+ rc = spdk_bdev_register(&disk->bdev);
+ if (rc) {
+ SPDK_ERRLOG("Failed to register bdev name=%s\n", disk->bdev.name);
+ free(bdev->name);
+ free(disk);
+ return rc;
+ }
+
+ rc = spdk_bdev_open(bdev, false, bdev_virtio_disc_notify_remove, disk, &disk->notify_desc);
+ if (rc) {
+ assert(false);
+ }
+
+ TAILQ_INSERT_TAIL(&svdev->luns, disk, link);
+ return 0;
+}
+
+static int
+process_read_cap_10(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ uint64_t max_block;
+ uint32_t block_size;
+ uint8_t target_id = req->lun[1];
+ int rc;
+
+ if (resp->response != VIRTIO_SCSI_S_OK || resp->status != SPDK_SCSI_STATUS_GOOD) {
+ SPDK_ERRLOG("READ CAPACITY (10) failed for target %"PRIu8".\n", target_id);
+ return -1;
+ }
+
+ block_size = from_be32(base->payload + 4);
+ max_block = from_be32(base->payload);
+
+ if (max_block == 0xffffffff) {
+ return send_read_cap_16(base);
+ }
+
+ base->info.num_blocks = (uint64_t)max_block + 1;
+ base->info.block_size = block_size;
+
+ rc = virtio_scsi_dev_add_tgt(base->svdev, &base->info);
+ if (rc != 0) {
+ return rc;
+ }
+
+ return _virtio_scsi_dev_scan_next(base, 0);
+}
+
+static int
+process_read_cap_16(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ uint8_t target_id = req->lun[1];
+ int rc;
+
+ if (resp->response != VIRTIO_SCSI_S_OK || resp->status != SPDK_SCSI_STATUS_GOOD) {
+ SPDK_ERRLOG("READ CAPACITY (16) failed for target %"PRIu8".\n", target_id);
+ return -1;
+ }
+
+ base->info.num_blocks = from_be64(base->payload) + 1;
+ base->info.block_size = from_be32(base->payload + 8);
+ rc = virtio_scsi_dev_add_tgt(base->svdev, &base->info);
+ if (rc != 0) {
+ return rc;
+ }
+
+ return _virtio_scsi_dev_scan_next(base, 0);
+}
+
+static void
+process_scan_resp(struct virtio_scsi_scan_base *base)
+{
+ struct virtio_scsi_cmd_req *req = &base->io_ctx.req;
+ struct virtio_scsi_cmd_resp *resp = &base->io_ctx.resp;
+ int rc, sk, asc, ascq;
+ uint8_t target_id;
+
+ if (base->io_ctx.iov_req.iov_len < sizeof(struct virtio_scsi_cmd_req) ||
+ base->io_ctx.iov_resp.iov_len < sizeof(struct virtio_scsi_cmd_resp)) {
+ SPDK_ERRLOG("Received target scan message with invalid length.\n");
+ _virtio_scsi_dev_scan_next(base, -EIO);
+ return;
+ }
+
+ get_scsi_status(resp, &sk, &asc, &ascq);
+ target_id = req->lun[1];
+
+ if (resp->response == VIRTIO_SCSI_S_BAD_TARGET ||
+ resp->response == VIRTIO_SCSI_S_INCORRECT_LUN) {
+ _virtio_scsi_dev_scan_next(base, -ENODEV);
+ return;
+ }
+
+ if (resp->response != VIRTIO_SCSI_S_OK ||
+ (resp->status == SPDK_SCSI_STATUS_CHECK_CONDITION &&
+ sk != SPDK_SCSI_SENSE_ILLEGAL_REQUEST)) {
+ assert(base->retries > 0);
+ base->retries--;
+ if (base->retries == 0) {
+ SPDK_NOTICELOG("Target %"PRIu8" is present, but unavailable.\n", target_id);
+ SPDK_TRACEDUMP(SPDK_LOG_VIRTIO, "CDB", req->cdb, sizeof(req->cdb));
+ SPDK_TRACEDUMP(SPDK_LOG_VIRTIO, "SENSE DATA", resp->sense, sizeof(resp->sense));
+ _virtio_scsi_dev_scan_next(base, -EBUSY);
+ return;
+ }
+
+ /* resend the same request */
+ rc = send_scan_io(base);
+ if (rc != 0) {
+ /* Let response poller do the resend */
+ }
+ return;
+ }
+
+ base->retries = SCAN_REQUEST_RETRIES;
+
+ switch (req->cdb[0]) {
+ case SPDK_SPC_INQUIRY:
+ rc = process_scan_inquiry(base);
+ break;
+ case SPDK_SPC_TEST_UNIT_READY:
+ rc = process_scan_test_unit_ready(base);
+ break;
+ case SPDK_SBC_START_STOP_UNIT:
+ rc = process_scan_start_stop_unit(base);
+ break;
+ case SPDK_SBC_READ_CAPACITY_10:
+ rc = process_read_cap_10(base);
+ break;
+ case SPDK_SPC_SERVICE_ACTION_IN_16:
+ rc = process_read_cap_16(base);
+ break;
+ default:
+ SPDK_ERRLOG("Received invalid target scan message: cdb[0] = %"PRIu8".\n", req->cdb[0]);
+ rc = -1;
+ break;
+ }
+
+ if (rc != 0) {
+ if (base->needs_resend) {
+ return; /* Let response poller do the resend */
+ }
+
+ _virtio_scsi_dev_scan_next(base, rc);
+ }
+}
+
+static int
+_virtio_scsi_dev_scan_next(struct virtio_scsi_scan_base *base, int rc)
+{
+ struct virtio_scsi_scan_info *next;
+ struct virtio_scsi_disk *disk;
+ uint8_t target_id;
+
+ if (base->full_scan) {
+ if (rc != 0) {
+ disk = virtio_scsi_dev_get_disk_by_id(base->svdev,
+ base->info.target);
+ if (disk != NULL) {
+ spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+ }
+ }
+
+ target_id = base->info.target + 1;
+ if (target_id < BDEV_VIRTIO_MAX_TARGET) {
+ _virtio_scsi_dev_scan_tgt(base, target_id);
+ return 0;
+ }
+
+ base->full_scan = false;
+ }
+
+ next = TAILQ_FIRST(&base->scan_queue);
+ if (next == NULL) {
+ _virtio_scsi_dev_scan_finish(base, 0);
+ return 0;
+ }
+
+ TAILQ_REMOVE(&base->scan_queue, next, tailq);
+ target_id = next->target;
+ free(next);
+
+ _virtio_scsi_dev_scan_tgt(base, target_id);
+ return 0;
+}
+
+static int
+virtio_pci_scsi_dev_enumerate_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+ struct virtio_scsi_dev *svdev;
+
+ svdev = virtio_pci_scsi_dev_create(NULL, pci_ctx);
+ return svdev == NULL ? -1 : 0;
+}
+
+static int
+bdev_virtio_process_config(void)
+{
+ struct spdk_conf_section *sp;
+ struct virtio_scsi_dev *svdev;
+ char *default_name = NULL;
+ char *path, *type, *name;
+ unsigned vdev_num;
+ int num_queues;
+ bool enable_pci;
+ int rc = 0;
+
+ for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
+ if (!spdk_conf_section_match_prefix(sp, "VirtioUser")) {
+ continue;
+ }
+
+ if (sscanf(spdk_conf_section_get_name(sp), "VirtioUser%u", &vdev_num) != 1) {
+ SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
+ spdk_conf_section_get_name(sp));
+ rc = -1;
+ goto out;
+ }
+
+ path = spdk_conf_section_get_val(sp, "Path");
+ if (path == NULL) {
+ SPDK_ERRLOG("VirtioUser%u: missing Path\n", vdev_num);
+ rc = -1;
+ goto out;
+ }
+
+ type = spdk_conf_section_get_val(sp, "Type");
+ if (type != NULL && strcmp(type, "SCSI") != 0) {
+ continue;
+ }
+
+ num_queues = spdk_conf_section_get_intval(sp, "Queues");
+ if (num_queues < 1) {
+ num_queues = 1;
+ } else if (num_queues > SPDK_VIRTIO_MAX_VIRTQUEUES) {
+ num_queues = SPDK_VIRTIO_MAX_VIRTQUEUES;
+ }
+
+ name = spdk_conf_section_get_val(sp, "Name");
+ if (name == NULL) {
+ default_name = spdk_sprintf_alloc("VirtioScsi%u", vdev_num);
+ name = default_name;
+ }
+
+ svdev = virtio_user_scsi_dev_create(name, path, num_queues, 512);
+ free(default_name);
+ default_name = NULL;
+
+ if (svdev == NULL) {
+ rc = -1;
+ goto out;
+ }
+ }
+
+ sp = spdk_conf_find_section(NULL, "VirtioPci");
+ if (sp == NULL) {
+ return 0;
+ }
+
+ enable_pci = spdk_conf_section_get_boolval(sp, "Enable", false);
+ if (enable_pci) {
+ rc = virtio_pci_dev_enumerate(virtio_pci_scsi_dev_enumerate_cb, NULL,
+ PCI_DEVICE_ID_VIRTIO_SCSI_MODERN);
+ }
+
+out:
+ return rc;
+}
+
+static int
+_virtio_scsi_dev_scan_init(struct virtio_scsi_dev *svdev)
+{
+ struct virtio_scsi_scan_base *base;
+ struct spdk_io_channel *io_ch;
+ struct virtio_scsi_io_ctx *io_ctx;
+ struct virtio_scsi_cmd_req *req;
+ struct virtio_scsi_cmd_resp *resp;
+
+ io_ch = spdk_get_io_channel(svdev);
+ if (io_ch == NULL) {
+ return -EBUSY;
+ }
+
+ base = spdk_dma_zmalloc(sizeof(*base), 64, NULL);
+ if (base == NULL) {
+ SPDK_ERRLOG("couldn't allocate memory for scsi target scan.\n");
+ return -ENOMEM;
+ }
+
+ base->svdev = svdev;
+
+ base->channel = spdk_io_channel_get_ctx(io_ch);
+ TAILQ_INIT(&base->scan_queue);
+ svdev->scan_ctx = base;
+
+ base->iov.iov_base = base->payload;
+ io_ctx = &base->io_ctx;
+ req = &io_ctx->req;
+ resp = &io_ctx->resp;
+ io_ctx->iov_req.iov_base = req;
+ io_ctx->iov_req.iov_len = sizeof(*req);
+ io_ctx->iov_resp.iov_base = resp;
+ io_ctx->iov_resp.iov_len = sizeof(*resp);
+
+ base->retries = SCAN_REQUEST_RETRIES;
+ return 0;
+}
+
+static void
+_virtio_scsi_dev_scan_tgt(struct virtio_scsi_scan_base *base, uint8_t target)
+{
+ int rc;
+
+ memset(&base->info, 0, sizeof(base->info));
+ base->info.target = target;
+
+ rc = send_inquiry(base);
+ if (rc) {
+ /* Let response poller do the resend */
+ }
+}
+
+static int
+virtio_scsi_dev_scan(struct virtio_scsi_dev *svdev, bdev_virtio_create_cb cb_fn,
+ void *cb_arg)
+{
+ struct virtio_scsi_scan_base *base;
+ struct virtio_scsi_scan_info *tgt, *next_tgt;
+ int rc;
+
+ if (svdev->scan_ctx) {
+ if (svdev->scan_ctx->full_scan) {
+ return -EEXIST;
+ }
+
+ /* We're about to start a full rescan, so there's no need
+ * to scan particular targets afterwards.
+ */
+ TAILQ_FOREACH_SAFE(tgt, &svdev->scan_ctx->scan_queue, tailq, next_tgt) {
+ TAILQ_REMOVE(&svdev->scan_ctx->scan_queue, tgt, tailq);
+ free(tgt);
+ }
+
+ svdev->scan_ctx->cb_fn = cb_fn;
+ svdev->scan_ctx->cb_arg = cb_arg;
+ svdev->scan_ctx->restart = true;
+ return 0;
+ }
+
+ rc = _virtio_scsi_dev_scan_init(svdev);
+ if (rc != 0) {
+ return rc;
+ }
+
+ base = svdev->scan_ctx;
+ base->cb_fn = cb_fn;
+ base->cb_arg = cb_arg;
+ base->full_scan = true;
+
+ _virtio_scsi_dev_scan_tgt(base, 0);
+ return 0;
+}
+
+static int
+virtio_scsi_dev_scan_tgt(struct virtio_scsi_dev *svdev, uint8_t target)
+{
+ struct virtio_scsi_scan_base *base;
+ struct virtio_scsi_scan_info *info;
+ int rc;
+
+ base = svdev->scan_ctx;
+ if (base) {
+ info = calloc(1, sizeof(*info));
+ if (info == NULL) {
+ SPDK_ERRLOG("calloc failed\n");
+ return -ENOMEM;
+ }
+
+ info->target = target;
+ TAILQ_INSERT_TAIL(&base->scan_queue, info, tailq);
+ return 0;
+ }
+
+ rc = _virtio_scsi_dev_scan_init(svdev);
+ if (rc != 0) {
+ return rc;
+ }
+
+ base = svdev->scan_ctx;
+ base->full_scan = true;
+ _virtio_scsi_dev_scan_tgt(base, target);
+ return 0;
+}
+
+static void
+bdev_virtio_initial_scan_complete(void *ctx, int result,
+ struct spdk_bdev **bdevs, size_t bdevs_cnt)
+{
+ struct virtio_scsi_dev *svdev;
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+ if (svdev->scan_ctx) {
+ /* another device is still being scanned */
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ return;
+ }
+ }
+
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ spdk_bdev_module_init_done(&virtio_scsi_if);
+}
+
+static int
+bdev_virtio_initialize(void)
+{
+ struct virtio_scsi_dev *svdev, *next_svdev;
+ int rc;
+
+ rc = bdev_virtio_process_config();
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+
+ if (rc != 0) {
+ goto err_unlock;
+ }
+
+ if (TAILQ_EMPTY(&g_virtio_scsi_devs)) {
+ goto out_unlock;
+ }
+
+ /* Initialize all created devices and scan available targets */
+ TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+ rc = virtio_scsi_dev_scan(svdev, bdev_virtio_initial_scan_complete, NULL);
+ if (rc != 0) {
+ goto err_unlock;
+ }
+ }
+
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ return 0;
+
+err_unlock:
+ /* Remove any created devices */
+ TAILQ_FOREACH_SAFE(svdev, &g_virtio_scsi_devs, tailq, next_svdev) {
+ virtio_scsi_dev_remove(svdev, NULL, NULL);
+ }
+
+out_unlock:
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ spdk_bdev_module_init_done(&virtio_scsi_if);
+ return rc;
+}
+
+static void
+_virtio_scsi_dev_unregister_cb(void *io_device)
+{
+ struct virtio_scsi_dev *svdev = io_device;
+ struct virtio_dev *vdev = &svdev->vdev;
+ bool finish_module;
+ bdev_virtio_remove_cb remove_cb;
+ void *remove_ctx;
+
+ assert(spdk_ring_count(svdev->ctrlq_ring) == 0);
+ spdk_ring_free(svdev->ctrlq_ring);
+ spdk_poller_unregister(&svdev->mgmt_poller);
+
+ virtio_dev_release_queue(vdev, VIRTIO_SCSI_EVENTQ);
+ virtio_dev_release_queue(vdev, VIRTIO_SCSI_CONTROLQ);
+
+ virtio_dev_stop(vdev);
+ virtio_dev_destruct(vdev);
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_REMOVE(&g_virtio_scsi_devs, svdev, tailq);
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+ remove_cb = svdev->remove_cb;
+ remove_ctx = svdev->remove_ctx;
+ spdk_dma_free(svdev->eventq_ios);
+ free(svdev);
+
+ if (remove_cb) {
+ remove_cb(remove_ctx, 0);
+ }
+
+ finish_module = TAILQ_EMPTY(&g_virtio_scsi_devs);
+
+ if (g_bdev_virtio_finish && finish_module) {
+ spdk_bdev_module_finish_done();
+ }
+}
+
+static void
+virtio_scsi_dev_unregister_cb(void *io_device)
+{
+ struct virtio_scsi_dev *svdev = io_device;
+ struct spdk_thread *thread;
+
+ thread = virtio_dev_queue_get_thread(&svdev->vdev, VIRTIO_SCSI_CONTROLQ);
+ spdk_thread_send_msg(thread, _virtio_scsi_dev_unregister_cb, io_device);
+}
+
+static void
+virtio_scsi_dev_remove(struct virtio_scsi_dev *svdev,
+ bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+ struct virtio_scsi_disk *disk, *disk_tmp;
+ bool do_remove = true;
+
+ if (svdev->removed) {
+ if (cb_fn) {
+ cb_fn(cb_arg, -EBUSY);
+ }
+ return;
+ }
+
+ svdev->remove_cb = cb_fn;
+ svdev->remove_ctx = cb_arg;
+ svdev->removed = true;
+
+ if (svdev->scan_ctx) {
+ /* The removal will continue after we receive a pending scan I/O. */
+ return;
+ }
+
+ TAILQ_FOREACH_SAFE(disk, &svdev->luns, link, disk_tmp) {
+ if (!disk->removed) {
+ spdk_bdev_unregister(&disk->bdev, NULL, NULL);
+ }
+ do_remove = false;
+ }
+
+ if (do_remove) {
+ spdk_io_device_unregister(svdev, virtio_scsi_dev_unregister_cb);
+ }
+}
+
+static void
+bdev_virtio_finish(void)
+{
+ struct virtio_scsi_dev *svdev, *next;
+
+ g_bdev_virtio_finish = true;
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ if (TAILQ_EMPTY(&g_virtio_scsi_devs)) {
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ spdk_bdev_module_finish_done();
+ return;
+ }
+
+ /* Defer module finish until all controllers are removed. */
+ TAILQ_FOREACH_SAFE(svdev, &g_virtio_scsi_devs, tailq, next) {
+ virtio_scsi_dev_remove(svdev, NULL, NULL);
+ }
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+}
+
+int
+bdev_virtio_user_scsi_dev_create(const char *base_name, const char *path,
+ unsigned num_queues, unsigned queue_size,
+ bdev_virtio_create_cb cb_fn, void *cb_arg)
+{
+ struct virtio_scsi_dev *svdev;
+ int rc;
+
+ svdev = virtio_user_scsi_dev_create(base_name, path, num_queues, queue_size);
+ if (svdev == NULL) {
+ return -1;
+ }
+
+ rc = virtio_scsi_dev_scan(svdev, cb_fn, cb_arg);
+ if (rc) {
+ virtio_scsi_dev_remove(svdev, NULL, NULL);
+ }
+
+ return rc;
+}
+
+struct bdev_virtio_pci_dev_create_ctx {
+ const char *name;
+ bdev_virtio_create_cb cb_fn;
+ void *cb_arg;
+};
+
+static int
+bdev_virtio_pci_scsi_dev_create_cb(struct virtio_pci_ctx *pci_ctx, void *ctx)
+{
+ struct virtio_scsi_dev *svdev;
+ struct bdev_virtio_pci_dev_create_ctx *create_ctx = ctx;
+ int rc;
+
+ svdev = virtio_pci_scsi_dev_create(create_ctx->name, pci_ctx);
+ if (svdev == NULL) {
+ return -1;
+ }
+
+ rc = virtio_scsi_dev_scan(svdev, create_ctx->cb_fn, create_ctx->cb_arg);
+ if (rc) {
+ virtio_scsi_dev_remove(svdev, NULL, NULL);
+ }
+
+ return rc;
+}
+
+int
+bdev_virtio_pci_scsi_dev_create(const char *name, struct spdk_pci_addr *pci_addr,
+ bdev_virtio_create_cb cb_fn, void *cb_arg)
+{
+ struct bdev_virtio_pci_dev_create_ctx create_ctx;
+
+ create_ctx.name = name;
+ create_ctx.cb_fn = cb_fn;
+ create_ctx.cb_arg = cb_arg;
+
+ return virtio_pci_dev_attach(bdev_virtio_pci_scsi_dev_create_cb, &create_ctx,
+ PCI_DEVICE_ID_VIRTIO_SCSI_MODERN, pci_addr);
+}
+
+int
+bdev_virtio_scsi_dev_remove(const char *name, bdev_virtio_remove_cb cb_fn, void *cb_arg)
+{
+ struct virtio_scsi_dev *svdev;
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+ if (strcmp(svdev->vdev.name, name) == 0) {
+ break;
+ }
+ }
+
+ if (svdev == NULL) {
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+ SPDK_ERRLOG("Cannot find Virtio-SCSI device named '%s'\n", name);
+ return -ENODEV;
+ }
+
+ virtio_scsi_dev_remove(svdev, cb_fn, cb_arg);
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+ return 0;
+}
+
+void
+bdev_virtio_scsi_dev_list(struct spdk_json_write_ctx *w)
+{
+ struct virtio_scsi_dev *svdev;
+
+ spdk_json_write_array_begin(w);
+
+ pthread_mutex_lock(&g_virtio_scsi_mutex);
+ TAILQ_FOREACH(svdev, &g_virtio_scsi_devs, tailq) {
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_name(w, "name");
+ spdk_json_write_string(w, svdev->vdev.name);
+
+ virtio_dev_dump_json_info(&svdev->vdev, w);
+
+ spdk_json_write_object_end(w);
+ }
+ pthread_mutex_unlock(&g_virtio_scsi_mutex);
+
+ spdk_json_write_array_end(w);
+}
+
+SPDK_LOG_REGISTER_COMPONENT("virtio", SPDK_LOG_VIRTIO)
diff --git a/src/spdk/lib/bdev/vtune.c b/src/spdk/lib/bdev/vtune.c
new file mode 100644
index 00000000..2cb48826
--- /dev/null
+++ b/src/spdk/lib/bdev/vtune.c
@@ -0,0 +1,49 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/config.h"
+#if SPDK_CONFIG_VTUNE
+
+/* Disable warnings triggered by the VTune code */
+#if defined(__GNUC__) && \
+ __GNUC__ > 4 || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#if __GNUC__ >= 7
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#endif
+#endif
+
+#include "ittnotify_static.c"
+
+#endif