summaryrefslogtreecommitdiffstats
path: root/src/spdk/module/bdev/nvme
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/spdk/module/bdev/nvme/Makefile50
-rw-r--r--src/spdk/module/bdev/nvme/bdev_nvme.c2924
-rw-r--r--src/spdk/module/bdev/nvme/bdev_nvme.h90
-rw-r--r--src/spdk/module/bdev/nvme/bdev_nvme_cuse_rpc.c152
-rw-r--r--src/spdk/module/bdev/nvme/bdev_nvme_rpc.c842
-rw-r--r--src/spdk/module/bdev/nvme/bdev_ocssd.c1498
-rw-r--r--src/spdk/module/bdev/nvme/bdev_ocssd.h67
-rw-r--r--src/spdk/module/bdev/nvme/bdev_ocssd_rpc.c197
-rw-r--r--src/spdk/module/bdev/nvme/common.c204
-rw-r--r--src/spdk/module/bdev/nvme/common.h163
-rw-r--r--src/spdk/module/bdev/nvme/nvme_rpc.c492
-rw-r--r--src/spdk/module/bdev/nvme/vbdev_opal.c630
-rw-r--r--src/spdk/module/bdev/nvme/vbdev_opal.h54
-rw-r--r--src/spdk/module/bdev/nvme/vbdev_opal_rpc.c453
14 files changed, 7816 insertions, 0 deletions
diff --git a/src/spdk/module/bdev/nvme/Makefile b/src/spdk/module/bdev/nvme/Makefile
new file mode 100644
index 000000000..f9ddb2389
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/Makefile
@@ -0,0 +1,50 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 3
+SO_MINOR := 0
+
+C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c common.c bdev_ocssd.c bdev_ocssd_rpc.c
+C_SRCS-$(CONFIG_NVME_CUSE) += bdev_nvme_cuse_rpc.c
+
+ifeq ($(OS),Linux)
+C_SRCS += vbdev_opal.c vbdev_opal_rpc.c
+endif
+LIBNAME = bdev_nvme
+
+SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/module/bdev/nvme/bdev_nvme.c b/src/spdk/module/bdev/nvme/bdev_nvme.c
new file mode 100644
index 000000000..4a89b8eb2
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_nvme.c
@@ -0,0 +1,2924 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+#include "bdev_ocssd.h"
+
+#include "spdk/config.h"
+#include "spdk/conf.h"
+#include "spdk/endian.h"
+#include "spdk/bdev.h"
+#include "spdk/json.h"
+#include "spdk/nvme.h"
+#include "spdk/nvme_ocssd.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
+
+static void bdev_nvme_get_spdk_running_config(FILE *fp);
+static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
+
+struct nvme_bdev_io {
+ /** array of iovecs to transfer. */
+ struct iovec *iovs;
+
+ /** Number of iovecs in iovs array. */
+ int iovcnt;
+
+ /** Current iovec position. */
+ int iovpos;
+
+ /** Offset in current iovec. */
+ uint32_t iov_offset;
+
+ /** array of iovecs to transfer. */
+ struct iovec *fused_iovs;
+
+ /** Number of iovecs in iovs array. */
+ int fused_iovcnt;
+
+ /** Current iovec position. */
+ int fused_iovpos;
+
+ /** Offset in current iovec. */
+ uint32_t fused_iov_offset;
+
+ /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
+ struct spdk_nvme_cpl cpl;
+
+ /** Originating thread */
+ struct spdk_thread *orig_thread;
+
+ /** Keeps track if first of fused commands was submitted */
+ bool first_fused_submitted;
+};
+
+struct nvme_probe_ctx {
+ size_t count;
+ struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
+ struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS];
+ const char *names[NVME_MAX_CONTROLLERS];
+ uint32_t prchk_flags[NVME_MAX_CONTROLLERS];
+ const char *hostnqn;
+};
+
+struct nvme_probe_skip_entry {
+ struct spdk_nvme_transport_id trid;
+ TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
+};
+/* All the controllers deleted by users via RPC are skipped by hotplug monitor */
+static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
+ g_skipped_nvme_ctrlrs);
+
+static struct spdk_bdev_nvme_opts g_opts = {
+ .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
+ .timeout_us = 0,
+ .retry_count = 4,
+ .arbitration_burst = 0,
+ .low_priority_weight = 0,
+ .medium_priority_weight = 0,
+ .high_priority_weight = 0,
+ .nvme_adminq_poll_period_us = 10000ULL,
+ .nvme_ioq_poll_period_us = 0,
+ .io_queue_requests = 0,
+ .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
+};
+
+#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
+#define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
+
+static int g_hot_insert_nvme_controller_index = 0;
+static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
+static bool g_nvme_hotplug_enabled = false;
+static struct spdk_thread *g_bdev_nvme_init_thread;
+static struct spdk_poller *g_hotplug_poller;
+static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
+static char *g_nvme_hostnqn = NULL;
+
+static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_async_probe_ctx *ctx);
+static void nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx);
+static int bdev_nvme_library_init(void);
+static void bdev_nvme_library_fini(void);
+static int bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
+static int bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
+static int bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
+static int bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
+static int bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
+ int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba);
+static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
+static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len);
+static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio);
+static int bdev_nvme_abort(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
+
+typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx);
+static void nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx);
+
+static populate_namespace_fn g_populate_namespace_fn[] = {
+ NULL,
+ nvme_ctrlr_populate_standard_namespace,
+ bdev_ocssd_populate_namespace,
+};
+
+typedef void (*depopulate_namespace_fn)(struct nvme_bdev_ns *ns);
+static void nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns);
+
+static depopulate_namespace_fn g_depopulate_namespace_fn[] = {
+ NULL,
+ nvme_ctrlr_depopulate_standard_namespace,
+ bdev_ocssd_depopulate_namespace,
+};
+
+typedef void (*config_json_namespace_fn)(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns);
+static void nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w,
+ struct nvme_bdev_ns *ns);
+
+static config_json_namespace_fn g_config_json_namespace_fn[] = {
+ NULL,
+ nvme_ctrlr_config_json_standard_namespace,
+ bdev_ocssd_namespace_config_json,
+};
+
+struct spdk_nvme_qpair *
+bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
+{
+ struct nvme_io_channel *nvme_ch;
+
+ nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
+
+ return nvme_ch->qpair;
+}
+
+static int
+bdev_nvme_get_ctx_size(void)
+{
+ return sizeof(struct nvme_bdev_io);
+}
+
+static struct spdk_bdev_module nvme_if = {
+ .name = "nvme",
+ .async_fini = true,
+ .module_init = bdev_nvme_library_init,
+ .module_fini = bdev_nvme_library_fini,
+ .config_text = bdev_nvme_get_spdk_running_config,
+ .config_json = bdev_nvme_config_json,
+ .get_ctx_size = bdev_nvme_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
+
+static void
+bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
+{
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "qpar %p is disconnected, attempting reconnect.\n", qpair);
+ /*
+ * Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will
+ * reconnect a qpair and we will stop getting a callback for this one.
+ */
+ spdk_nvme_ctrlr_reconnect_io_qpair(qpair);
+}
+
+static int
+bdev_nvme_poll(void *arg)
+{
+ struct nvme_bdev_poll_group *group = arg;
+ int64_t num_completions;
+
+ if (group->collect_spin_stat && group->start_ticks == 0) {
+ group->start_ticks = spdk_get_ticks();
+ }
+
+ num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
+ bdev_nvme_disconnected_qpair_cb);
+ if (group->collect_spin_stat) {
+ if (num_completions > 0) {
+ if (group->end_ticks != 0) {
+ group->spin_ticks += (group->end_ticks - group->start_ticks);
+ group->end_ticks = 0;
+ }
+ group->start_ticks = 0;
+ } else {
+ group->end_ticks = spdk_get_ticks();
+ }
+ }
+
+ return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
+}
+
+static int
+bdev_nvme_poll_adminq(void *arg)
+{
+ int32_t rc;
+ struct spdk_nvme_ctrlr *ctrlr = arg;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr);
+
+ if (rc < 0) {
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr));
+ assert(nvme_bdev_ctrlr != NULL);
+ bdev_nvme_reset(nvme_bdev_ctrlr, NULL);
+ }
+
+ return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
+}
+
+static int
+bdev_nvme_destruct(void *ctx)
+{
+ struct nvme_bdev *nvme_disk = ctx;
+
+ nvme_bdev_detach_bdev_from_ns(nvme_disk);
+
+ free(nvme_disk->disk.name);
+ free(nvme_disk);
+
+ return 0;
+}
+
+static int
+bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio,
+ uint64_t offset, uint64_t nbytes)
+{
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS);
+
+ return 0;
+}
+
+static void
+_bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch);
+ struct spdk_bdev_io *bdev_io;
+ enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ /* A NULL ctx means success. */
+ if (spdk_io_channel_iter_get_ctx(i) != NULL) {
+ status = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ while (!TAILQ_EMPTY(&nvme_ch->pending_resets)) {
+ bdev_io = TAILQ_FIRST(&nvme_ch->pending_resets);
+ TAILQ_REMOVE(&nvme_ch->pending_resets, bdev_io, module_link);
+ spdk_bdev_io_complete(bdev_io, status);
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc)
+{
+ /* we are using the for_each_channel cb_arg like a return code here. */
+ /* If it's zero, we succeeded, otherwise, the reset failed. */
+ void *cb_arg = NULL;
+
+ if (rc) {
+ cb_arg = (void *)0x1;
+ SPDK_ERRLOG("Resetting controller failed.\n");
+ } else {
+ SPDK_NOTICELOG("Resetting controller successful.\n");
+ }
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr->resetting = false;
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ /* Make sure we clear any pending resets before returning. */
+ spdk_for_each_channel(nvme_bdev_ctrlr,
+ _bdev_nvme_complete_pending_resets,
+ cb_arg, NULL);
+}
+
+static void
+_bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i);
+ void *ctx = spdk_io_channel_iter_get_ctx(i);
+ int rc = SPDK_BDEV_IO_STATUS_SUCCESS;
+
+ if (status) {
+ rc = SPDK_BDEV_IO_STATUS_FAILED;
+ }
+ if (ctx) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc);
+ }
+ _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status);
+}
+
+static void
+_bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i);
+ struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch);
+ struct spdk_nvme_io_qpair_opts opts;
+
+ spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts));
+ opts.delay_cmd_submit = g_opts.delay_cmd_submit;
+ opts.create_only = true;
+
+ nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts));
+ if (!nvme_ch->qpair) {
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+
+ assert(nvme_ch->group != NULL);
+ if (spdk_nvme_poll_group_add(nvme_ch->group->group, nvme_ch->qpair) != 0) {
+ SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
+ spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+
+ if (spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair)) {
+ SPDK_ERRLOG("Unable to connect I/O qpair.\n");
+ spdk_nvme_poll_group_remove(nvme_ch->group->group, nvme_ch->qpair);
+ spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
+ spdk_for_each_channel_continue(i, -1);
+ return;
+ }
+
+ spdk_for_each_channel_continue(i, 0);
+}
+
+static void
+_bdev_nvme_reset(struct spdk_io_channel_iter *i, int status)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = spdk_io_channel_iter_get_io_device(i);
+ struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
+ int rc;
+
+ if (status) {
+ if (bio) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ _bdev_nvme_reset_complete(nvme_bdev_ctrlr, status);
+ return;
+ }
+
+ rc = spdk_nvme_ctrlr_reset(nvme_bdev_ctrlr->ctrlr);
+ if (rc != 0) {
+ if (bio) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ _bdev_nvme_reset_complete(nvme_bdev_ctrlr, rc);
+ return;
+ }
+
+ /* Recreate all of the I/O queue pairs */
+ spdk_for_each_channel(nvme_bdev_ctrlr,
+ _bdev_nvme_reset_create_qpair,
+ bio,
+ _bdev_nvme_reset_create_qpairs_done);
+
+
+}
+
+static void
+_bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
+{
+ struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
+ if (!rc) {
+ nvme_ch->qpair = NULL;
+ }
+
+ spdk_for_each_channel_continue(i, rc);
+}
+
+static int
+bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio)
+{
+ struct spdk_io_channel *ch;
+ struct nvme_io_channel *nvme_ch;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ if (nvme_bdev_ctrlr->destruct) {
+ /* Don't bother resetting if the controller is in the process of being destructed. */
+ if (bio) {
+ spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return 0;
+ }
+
+ if (!nvme_bdev_ctrlr->resetting) {
+ nvme_bdev_ctrlr->resetting = true;
+ } else {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
+ /*
+ * The internal reset calls won't be queued. This is on purpose so that we don't
+ * interfere with the app framework reset strategy. i.e. we are deferring to the
+ * upper level. If they are in the middle of a reset, we won't try to schedule another one.
+ */
+ if (bio) {
+ ch = spdk_get_io_channel(nvme_bdev_ctrlr);
+ assert(ch != NULL);
+ nvme_ch = spdk_io_channel_get_ctx(ch);
+ TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link);
+ spdk_put_io_channel(ch);
+ }
+ return 0;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ /* First, delete all NVMe I/O queue pairs. */
+ spdk_for_each_channel(nvme_bdev_ctrlr,
+ _bdev_nvme_reset_destroy_qpair,
+ bio,
+ _bdev_nvme_reset);
+
+ return 0;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ uint64_t offset_blocks,
+ uint64_t num_blocks);
+
+static void
+bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
+ bool success)
+{
+ int ret;
+
+ if (!success) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ ch,
+ (struct nvme_bdev_io *)bdev_io->driver_ctx,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ if (spdk_likely(ret == 0)) {
+ return;
+ } else if (ret == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static int
+_bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
+ struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
+ struct nvme_bdev_io *nbdev_io_to_abort;
+
+ if (nvme_ch->qpair == NULL) {
+ /* The device is currently resetting */
+ return -1;
+ }
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ return bdev_nvme_writev(nbdev,
+ ch,
+ nbdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ case SPDK_BDEV_IO_TYPE_COMPARE:
+ return bdev_nvme_comparev(nbdev,
+ ch,
+ nbdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
+ return bdev_nvme_comparev_and_writev(nbdev,
+ ch,
+ nbdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.fused_iovs,
+ bdev_io->u.bdev.fused_iovcnt,
+ bdev_io->u.bdev.md_buf,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ return bdev_nvme_unmap(nbdev,
+ ch,
+ nbdev_io,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ return bdev_nvme_unmap(nbdev,
+ ch,
+ nbdev_io,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io);
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return bdev_nvme_flush(nbdev,
+ nbdev_io,
+ bdev_io->u.bdev.offset_blocks,
+ bdev_io->u.bdev.num_blocks);
+
+ case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+ return bdev_nvme_admin_passthru(nbdev,
+ ch,
+ nbdev_io,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes);
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ return bdev_nvme_io_passthru(nbdev,
+ ch,
+ nbdev_io,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes);
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return bdev_nvme_io_passthru_md(nbdev,
+ ch,
+ nbdev_io,
+ &bdev_io->u.nvme_passthru.cmd,
+ bdev_io->u.nvme_passthru.buf,
+ bdev_io->u.nvme_passthru.nbytes,
+ bdev_io->u.nvme_passthru.md_buf,
+ bdev_io->u.nvme_passthru.md_len);
+
+ case SPDK_BDEV_IO_TYPE_ABORT:
+ nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
+ return bdev_nvme_abort(nbdev,
+ ch,
+ nbdev_io,
+ nbdev_io_to_abort);
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void
+bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ int rc = _bdev_nvme_submit_request(ch, bdev_io);
+
+ if (spdk_unlikely(rc != 0)) {
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static bool
+bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct nvme_bdev *nbdev = ctx;
+ const struct spdk_nvme_ctrlr_data *cdata;
+
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ case SPDK_BDEV_IO_TYPE_ABORT:
+ return true;
+
+ case SPDK_BDEV_IO_TYPE_COMPARE:
+ return spdk_nvme_ns_supports_compare(nbdev->nvme_ns->ns);
+
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns) ? true : false;
+
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr);
+ return cdata->oncs.dsm;
+
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr);
+ /*
+ * If an NVMe controller guarantees reading unallocated blocks returns zero,
+ * we can implement WRITE_ZEROES as an NVMe deallocate command.
+ */
+ if (cdata->oncs.dsm &&
+ spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->nvme_ns->ns) ==
+ SPDK_NVME_DEALLOC_READ_00) {
+ return true;
+ }
+ /*
+ * The NVMe controller write_zeroes function is currently not used by our driver.
+ * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail.
+ * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read.
+ */
+ return false;
+
+ case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
+ if (spdk_nvme_ctrlr_get_flags(nbdev->nvme_bdev_ctrlr->ctrlr) &
+ SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
+ return true;
+ }
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_nvme_create_cb(void *io_device, void *ctx_buf)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device;
+ struct nvme_io_channel *ch = ctx_buf;
+ struct spdk_nvme_io_qpair_opts opts;
+ struct spdk_io_channel *pg_ch = NULL;
+ int rc;
+
+ spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts));
+ opts.delay_cmd_submit = g_opts.delay_cmd_submit;
+ opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
+ opts.create_only = true;
+ g_opts.io_queue_requests = opts.io_queue_requests;
+
+ ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_bdev_ctrlr->ctrlr, &opts, sizeof(opts));
+
+ if (ch->qpair == NULL) {
+ return -1;
+ }
+
+ if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) {
+ if (bdev_ocssd_create_io_channel(ch)) {
+ goto err;
+ }
+ }
+
+ pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
+ if (!pg_ch) {
+ goto err;
+ }
+
+ ch->group = spdk_io_channel_get_ctx(pg_ch);
+ if (spdk_nvme_poll_group_add(ch->group->group, ch->qpair) != 0) {
+ goto err;
+ }
+
+ rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_bdev_ctrlr->ctrlr, ch->qpair);
+ if (rc) {
+ spdk_nvme_poll_group_remove(ch->group->group, ch->qpair);
+ goto err;
+ }
+
+#ifdef SPDK_CONFIG_VTUNE
+ ch->group->collect_spin_stat = true;
+#else
+ ch->group->collect_spin_stat = false;
+#endif
+
+ TAILQ_INIT(&ch->pending_resets);
+ return 0;
+
+err:
+ if (pg_ch) {
+ spdk_put_io_channel(pg_ch);
+ }
+ spdk_nvme_ctrlr_free_io_qpair(ch->qpair);
+ return -1;
+}
+
+static void
+bdev_nvme_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device;
+ struct nvme_io_channel *ch = ctx_buf;
+ struct nvme_bdev_poll_group *group;
+
+ group = ch->group;
+ assert(group != NULL);
+
+ if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) {
+ bdev_ocssd_destroy_io_channel(ch);
+ }
+
+ if (ch->qpair != NULL) {
+ spdk_nvme_poll_group_remove(group->group, ch->qpair);
+ }
+ spdk_put_io_channel(spdk_io_channel_from_ctx(group));
+
+ spdk_nvme_ctrlr_free_io_qpair(ch->qpair);
+}
+
+static int
+bdev_nvme_poll_group_create_cb(void *io_device, void *ctx_buf)
+{
+ struct nvme_bdev_poll_group *group = ctx_buf;
+
+ group->group = spdk_nvme_poll_group_create(group);
+ if (group->group == NULL) {
+ return -1;
+ }
+
+ group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
+
+ if (group->poller == NULL) {
+ spdk_nvme_poll_group_destroy(group->group);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+bdev_nvme_poll_group_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct nvme_bdev_poll_group *group = ctx_buf;
+
+ spdk_poller_unregister(&group->poller);
+ if (spdk_nvme_poll_group_destroy(group->group)) {
+ SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.");
+ assert(false);
+ }
+}
+
+static struct spdk_io_channel *
+bdev_nvme_get_io_channel(void *ctx)
+{
+ struct nvme_bdev *nvme_bdev = ctx;
+
+ return spdk_get_io_channel(nvme_bdev->nvme_bdev_ctrlr);
+}
+
+static int
+bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct nvme_bdev *nvme_bdev = ctx;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr;
+ const struct spdk_nvme_ctrlr_data *cdata;
+ struct spdk_nvme_ns *ns;
+ union spdk_nvme_vs_register vs;
+ union spdk_nvme_csts_register csts;
+ char buf[128];
+
+ cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
+ vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
+ csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
+ ns = nvme_bdev->nvme_ns->ns;
+
+ spdk_json_write_named_object_begin(w, "nvme");
+
+ if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->trid->traddr);
+ }
+
+ spdk_json_write_named_object_begin(w, "trid");
+
+ nvme_bdev_dump_trid_json(nvme_bdev_ctrlr->trid, w);
+
+ spdk_json_write_object_end(w);
+
+#ifdef SPDK_CONFIG_NVME_CUSE
+ size_t cuse_name_size = 128;
+ char cuse_name[cuse_name_size];
+
+ int rc = spdk_nvme_cuse_get_ns_name(nvme_bdev->nvme_bdev_ctrlr->ctrlr, spdk_nvme_ns_get_id(ns),
+ cuse_name, &cuse_name_size);
+ if (rc == 0) {
+ spdk_json_write_named_string(w, "cuse_device", cuse_name);
+ }
+#endif
+
+ spdk_json_write_named_object_begin(w, "ctrlr_data");
+
+ spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
+
+ snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "model_number", buf);
+
+ snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "serial_number", buf);
+
+ snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
+ spdk_str_trim(buf);
+ spdk_json_write_named_string(w, "firmware_revision", buf);
+
+ spdk_json_write_named_object_begin(w, "oacs");
+
+ spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
+ spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
+ spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
+ spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "vs");
+
+ spdk_json_write_name(w, "nvme_version");
+ if (vs.bits.ter) {
+ spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
+ } else {
+ spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
+ }
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "csts");
+
+ spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy);
+ spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_named_object_begin(w, "ns_data");
+
+ spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
+
+ spdk_json_write_object_end(w);
+
+ if (cdata->oacs.security) {
+ spdk_json_write_named_object_begin(w, "security");
+
+ spdk_json_write_named_bool(w, "opal", nvme_bdev_ctrlr->opal_dev ? true : false);
+
+ spdk_json_write_object_end(w);
+ }
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ /* No config per bdev needed */
+}
+
+static uint64_t
+bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ struct nvme_bdev_poll_group *group = nvme_ch->group;
+ uint64_t spin_time;
+
+ if (!group || !group->collect_spin_stat) {
+ return 0;
+ }
+
+ if (group->end_ticks != 0) {
+ group->spin_ticks += (group->end_ticks - group->start_ticks);
+ group->end_ticks = 0;
+ }
+
+ spin_time = (group->spin_ticks * 1000000ULL) / spdk_get_ticks_hz();
+ group->start_ticks = 0;
+ group->spin_ticks = 0;
+
+ return spin_time;
+}
+
+static const struct spdk_bdev_fn_table nvmelib_fn_table = {
+ .destruct = bdev_nvme_destruct,
+ .submit_request = bdev_nvme_submit_request,
+ .io_type_supported = bdev_nvme_io_type_supported,
+ .get_io_channel = bdev_nvme_get_io_channel,
+ .dump_info_json = bdev_nvme_dump_info_json,
+ .write_config_json = bdev_nvme_write_config_json,
+ .get_spin_time = bdev_nvme_get_spin_time,
+};
+
+static void
+nvme_ctrlr_populate_standard_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx)
+{
+ struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr;
+ struct nvme_bdev *bdev;
+ struct spdk_nvme_ns *ns;
+ const struct spdk_uuid *uuid;
+ const struct spdk_nvme_ctrlr_data *cdata;
+ const struct spdk_nvme_ns_data *nsdata;
+ int rc;
+
+ cdata = spdk_nvme_ctrlr_get_data(ctrlr);
+
+ ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
+ if (!ns) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nvme_ns->id);
+ nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -EINVAL);
+ return;
+ }
+
+ bdev = calloc(1, sizeof(*bdev));
+ if (!bdev) {
+ SPDK_ERRLOG("bdev calloc() failed\n");
+ nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM);
+ return;
+ }
+
+ bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr;
+ nvme_ns->ns = ns;
+ bdev->nvme_ns = nvme_ns;
+
+ bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns));
+ if (!bdev->disk.name) {
+ free(bdev);
+ nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, -ENOMEM);
+ return;
+ }
+ bdev->disk.product_name = "NVMe disk";
+
+ bdev->disk.write_cache = 0;
+ if (cdata->vwc.present) {
+ /* Enable if the Volatile Write Cache exists */
+ bdev->disk.write_cache = 1;
+ }
+ bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
+ bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns);
+ bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
+
+ uuid = spdk_nvme_ns_get_uuid(ns);
+ if (uuid != NULL) {
+ bdev->disk.uuid = *uuid;
+ }
+
+ nsdata = spdk_nvme_ns_get_data(ns);
+
+ bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns);
+ if (bdev->disk.md_len != 0) {
+ bdev->disk.md_interleave = nsdata->flbas.extended;
+ bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
+ if (bdev->disk.dif_type != SPDK_DIF_DISABLE) {
+ bdev->disk.dif_is_head_of_md = nsdata->dps.md_start;
+ bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags;
+ }
+ }
+
+ if (!bdev_nvme_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) {
+ bdev->disk.acwu = 0;
+ } else if (nsdata->nsfeat.ns_atomic_write_unit) {
+ bdev->disk.acwu = nsdata->nacwu;
+ } else {
+ bdev->disk.acwu = cdata->acwu;
+ }
+
+ bdev->disk.ctxt = bdev;
+ bdev->disk.fn_table = &nvmelib_fn_table;
+ bdev->disk.module = &nvme_if;
+ rc = spdk_bdev_register(&bdev->disk);
+ if (rc) {
+ free(bdev->disk.name);
+ free(bdev);
+ nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, rc);
+ return;
+ }
+
+ nvme_bdev_attach_bdev_to_ns(nvme_ns, bdev);
+ nvme_ctrlr_populate_namespace_done(ctx, nvme_ns, 0);
+}
+
+static bool
+hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct nvme_probe_skip_entry *entry;
+
+ TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
+ if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
+ return false;
+ }
+ }
+
+ opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
+ opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
+ opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
+ opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr);
+
+ return true;
+}
+
+static bool
+probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct nvme_probe_ctx *ctx = cb_ctx;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr);
+
+ if (nvme_bdev_ctrlr_get(trid)) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+ trid->traddr);
+ return false;
+ }
+
+ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ bool claim_device = false;
+ size_t i;
+
+ for (i = 0; i < ctx->count; i++) {
+ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+ claim_device = true;
+ break;
+ }
+ }
+
+ if (!claim_device) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr);
+ return false;
+ }
+ }
+
+ if (ctx->hostnqn) {
+ snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn);
+ }
+
+ opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
+ opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
+ opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
+ opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
+
+ return true;
+}
+
+static void
+nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_nvme_ctrlr *ctrlr = ctx;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_WARNLOG("Abort failed. Resetting controller.\n");
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr));
+ assert(nvme_bdev_ctrlr != NULL);
+ bdev_nvme_reset(nvme_bdev_ctrlr, NULL);
+ }
+}
+
+static void
+timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
+ struct spdk_nvme_qpair *qpair, uint16_t cid)
+{
+ int rc;
+ union spdk_nvme_csts_register csts;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
+
+ csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
+ if (csts.bits.cfs) {
+ SPDK_ERRLOG("Controller Fatal Status, reset required\n");
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr));
+ assert(nvme_bdev_ctrlr != NULL);
+ bdev_nvme_reset(nvme_bdev_ctrlr, NULL);
+ return;
+ }
+
+ switch (g_opts.action_on_timeout) {
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+ if (qpair) {
+ rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
+ nvme_abort_cpl, ctrlr);
+ if (rc == 0) {
+ return;
+ }
+
+ SPDK_ERRLOG("Unable to send abort. Resetting.\n");
+ }
+
+ /* FALLTHROUGH */
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr));
+ assert(nvme_bdev_ctrlr != NULL);
+ bdev_nvme_reset(nvme_bdev_ctrlr, NULL);
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n");
+ break;
+ default:
+ SPDK_ERRLOG("An invalid timeout action value is found.\n");
+ break;
+ }
+}
+
+void
+nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr->ref--;
+
+ if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr);
+ return;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+static void
+nvme_ctrlr_depopulate_standard_namespace(struct nvme_bdev_ns *ns)
+{
+ struct nvme_bdev *bdev, *tmp;
+
+ TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) {
+ spdk_bdev_unregister(&bdev->disk, NULL, NULL);
+ }
+
+ ns->populated = false;
+
+ nvme_ctrlr_depopulate_namespace_done(ns->ctrlr);
+}
+
+static void nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns,
+ struct nvme_async_probe_ctx *ctx)
+{
+ g_populate_namespace_fn[ns->type](ctrlr, ns, ctx);
+}
+
+static void nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns)
+{
+ g_depopulate_namespace_fn[ns->type](ns);
+}
+
+void
+nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx,
+ struct nvme_bdev_ns *ns, int rc)
+{
+ if (rc == 0) {
+ ns->populated = true;
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ ns->ctrlr->ref++;
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ } else {
+ memset(ns, 0, sizeof(*ns));
+ }
+
+ if (ctx) {
+ ctx->populates_in_progress--;
+ if (ctx->populates_in_progress == 0) {
+ nvme_ctrlr_populate_namespaces_done(ctx);
+ }
+ }
+}
+
+static void
+nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_async_probe_ctx *ctx)
+{
+ struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr;
+ struct nvme_bdev_ns *ns;
+ struct spdk_nvme_ns *nvme_ns;
+ struct nvme_bdev *bdev;
+ uint32_t i;
+ int rc;
+ uint64_t num_sectors;
+ bool ns_is_active;
+
+ if (ctx) {
+ /* Initialize this count to 1 to handle the populate functions
+ * calling nvme_ctrlr_populate_namespace_done() immediately.
+ */
+ ctx->populates_in_progress = 1;
+ }
+
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ uint32_t nsid = i + 1;
+
+ ns = nvme_bdev_ctrlr->namespaces[i];
+ ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid);
+
+ if (ns->populated && ns_is_active && ns->type == NVME_BDEV_NS_STANDARD) {
+ /* NS is still there but attributes may have changed */
+ nvme_ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
+ num_sectors = spdk_nvme_ns_get_num_sectors(nvme_ns);
+ bdev = TAILQ_FIRST(&ns->bdevs);
+ if (bdev->disk.blockcnt != num_sectors) {
+ SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %lu, new size %lu\n",
+ nsid,
+ bdev->disk.name,
+ bdev->disk.blockcnt,
+ num_sectors);
+ rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
+ if (rc != 0) {
+ SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
+ bdev->disk.name, rc);
+ }
+ }
+ }
+
+ if (!ns->populated && ns_is_active) {
+ ns->id = nsid;
+ ns->ctrlr = nvme_bdev_ctrlr;
+ if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
+ ns->type = NVME_BDEV_NS_OCSSD;
+ } else {
+ ns->type = NVME_BDEV_NS_STANDARD;
+ }
+
+ TAILQ_INIT(&ns->bdevs);
+
+ if (ctx) {
+ ctx->populates_in_progress++;
+ }
+ nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, ctx);
+ }
+
+ if (ns->populated && !ns_is_active) {
+ nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns);
+ }
+ }
+
+ if (ctx) {
+ /* Decrement this count now that the loop is over to account
+ * for the one we started with. If the count is then 0, we
+ * know any populate_namespace functions completed immediately,
+ * so we'll kick the callback here.
+ */
+ ctx->populates_in_progress--;
+ if (ctx->populates_in_progress == 0) {
+ nvme_ctrlr_populate_namespaces_done(ctx);
+ }
+ }
+
+}
+
+static void
+aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg;
+ union spdk_nvme_async_event_completion event;
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_WARNLOG("AER request execute failed");
+ return;
+ }
+
+ event.raw = cpl->cdw0;
+ if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
+ (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
+ nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL);
+ } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_VENDOR) &&
+ (event.bits.log_page_identifier == SPDK_OCSSD_LOG_CHUNK_NOTIFICATION) &&
+ spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) {
+ bdev_ocssd_handle_chunk_notification(nvme_bdev_ctrlr);
+ }
+}
+
+static int
+create_ctrlr(struct spdk_nvme_ctrlr *ctrlr,
+ const char *name,
+ const struct spdk_nvme_transport_id *trid,
+ uint32_t prchk_flags)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ uint32_t i;
+ int rc;
+
+ nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr));
+ if (nvme_bdev_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed to allocate device struct\n");
+ return -ENOMEM;
+ }
+
+ nvme_bdev_ctrlr->trid = calloc(1, sizeof(*nvme_bdev_ctrlr->trid));
+ if (nvme_bdev_ctrlr->trid == NULL) {
+ SPDK_ERRLOG("Failed to allocate device trid struct\n");
+ free(nvme_bdev_ctrlr);
+ return -ENOMEM;
+ }
+
+ nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
+ nvme_bdev_ctrlr->namespaces = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev_ns *));
+ if (!nvme_bdev_ctrlr->namespaces) {
+ SPDK_ERRLOG("Failed to allocate block namespaces pointer\n");
+ free(nvme_bdev_ctrlr->trid);
+ free(nvme_bdev_ctrlr);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ nvme_bdev_ctrlr->namespaces[i] = calloc(1, sizeof(struct nvme_bdev_ns));
+ if (nvme_bdev_ctrlr->namespaces[i] == NULL) {
+ SPDK_ERRLOG("Failed to allocate block namespace struct\n");
+ for (; i > 0; i--) {
+ free(nvme_bdev_ctrlr->namespaces[i - 1]);
+ }
+ free(nvme_bdev_ctrlr->namespaces);
+ free(nvme_bdev_ctrlr->trid);
+ free(nvme_bdev_ctrlr);
+ return -ENOMEM;
+ }
+ }
+
+ nvme_bdev_ctrlr->thread = spdk_get_thread();
+ nvme_bdev_ctrlr->adminq_timer_poller = NULL;
+ nvme_bdev_ctrlr->ctrlr = ctrlr;
+ nvme_bdev_ctrlr->ref = 0;
+ *nvme_bdev_ctrlr->trid = *trid;
+ nvme_bdev_ctrlr->name = strdup(name);
+ if (nvme_bdev_ctrlr->name == NULL) {
+ free(nvme_bdev_ctrlr->namespaces);
+ free(nvme_bdev_ctrlr->trid);
+ free(nvme_bdev_ctrlr);
+ return -ENOMEM;
+ }
+
+ if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) {
+ rc = bdev_ocssd_init_ctrlr(nvme_bdev_ctrlr);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to initialize OCSSD controller\n");
+ free(nvme_bdev_ctrlr->name);
+ free(nvme_bdev_ctrlr->namespaces);
+ free(nvme_bdev_ctrlr->trid);
+ free(nvme_bdev_ctrlr);
+ return rc;
+ }
+ }
+
+ nvme_bdev_ctrlr->prchk_flags = prchk_flags;
+
+ spdk_io_device_register(nvme_bdev_ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,
+ sizeof(struct nvme_io_channel),
+ name);
+
+ nvme_bdev_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, ctrlr,
+ g_opts.nvme_adminq_poll_period_us);
+
+ TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq);
+
+ if (g_opts.timeout_us > 0) {
+ spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
+ timeout_cb, NULL);
+ }
+
+ spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr);
+
+ if (spdk_nvme_ctrlr_get_flags(nvme_bdev_ctrlr->ctrlr) &
+ SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
+ nvme_bdev_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_bdev_ctrlr->ctrlr);
+ if (nvme_bdev_ctrlr->opal_dev == NULL) {
+ SPDK_ERRLOG("Failed to initialize Opal\n");
+ }
+ }
+ return 0;
+}
+
+static void
+attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct nvme_probe_ctx *ctx = cb_ctx;
+ char *name = NULL;
+ uint32_t prchk_flags = 0;
+ size_t i;
+
+ if (ctx) {
+ for (i = 0; i < ctx->count; i++) {
+ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
+ prchk_flags = ctx->prchk_flags[i];
+ name = strdup(ctx->names[i]);
+ break;
+ }
+ }
+ } else {
+ name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
+ }
+ if (!name) {
+ SPDK_ERRLOG("Failed to assign name to NVMe device\n");
+ return;
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name);
+
+ create_ctrlr(ctrlr, name, trid, prchk_flags);
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid);
+ if (!nvme_bdev_ctrlr) {
+ SPDK_ERRLOG("Failed to find new NVMe controller\n");
+ free(name);
+ return;
+ }
+
+ nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL);
+
+ free(name);
+}
+
+static void
+remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
+{
+ uint32_t i;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct nvme_bdev_ns *ns;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
+ if (nvme_bdev_ctrlr->ctrlr == ctrlr) {
+ /* The controller's destruction was already started */
+ if (nvme_bdev_ctrlr->destruct) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return;
+ }
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ uint32_t nsid = i + 1;
+
+ ns = nvme_bdev_ctrlr->namespaces[nsid - 1];
+ if (ns->populated) {
+ assert(ns->id == nsid);
+ nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns);
+ }
+ }
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr->destruct = true;
+ if (nvme_bdev_ctrlr->ref == 0) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr);
+ } else {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ }
+ return;
+ }
+ }
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+static int
+bdev_nvme_hotplug(void *arg)
+{
+ struct spdk_nvme_transport_id trid_pcie;
+ int done;
+
+ if (!g_hotplug_probe_ctx) {
+ memset(&trid_pcie, 0, sizeof(trid_pcie));
+ spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
+
+ g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
+ hotplug_probe_cb,
+ attach_cb, remove_cb);
+ if (!g_hotplug_probe_ctx) {
+ return SPDK_POLLER_BUSY;
+ }
+ }
+
+ done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx);
+ if (done != -EAGAIN) {
+ g_hotplug_probe_ctx = NULL;
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+void
+bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
+{
+ *opts = g_opts;
+}
+
+int
+bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
+{
+ if (g_bdev_nvme_init_thread != NULL) {
+ if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
+ return -EPERM;
+ }
+ }
+
+ g_opts = *opts;
+
+ return 0;
+}
+
+struct set_nvme_hotplug_ctx {
+ uint64_t period_us;
+ bool enabled;
+ spdk_msg_fn fn;
+ void *fn_ctx;
+};
+
+static void
+set_nvme_hotplug_period_cb(void *_ctx)
+{
+ struct set_nvme_hotplug_ctx *ctx = _ctx;
+
+ spdk_poller_unregister(&g_hotplug_poller);
+ if (ctx->enabled) {
+ g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
+ }
+
+ g_nvme_hotplug_poll_period_us = ctx->period_us;
+ g_nvme_hotplug_enabled = ctx->enabled;
+ if (ctx->fn) {
+ ctx->fn(ctx->fn_ctx);
+ }
+
+ free(ctx);
+}
+
+int
+bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
+{
+ struct set_nvme_hotplug_ctx *ctx;
+
+ if (enabled == true && !spdk_process_is_primary()) {
+ return -EPERM;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ return -ENOMEM;
+ }
+
+ period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
+ ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
+ ctx->enabled = enabled;
+ ctx->fn = cb;
+ ctx->fn_ctx = cb_ctx;
+
+ spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
+ return 0;
+}
+
+static void
+populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, size_t count, int rc)
+{
+ if (ctx->cb_fn) {
+ ctx->cb_fn(ctx->cb_ctx, count, rc);
+ }
+
+ free(ctx);
+}
+
+static void
+nvme_ctrlr_populate_namespaces_done(struct nvme_async_probe_ctx *ctx)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct nvme_bdev_ns *ns;
+ struct nvme_bdev *nvme_bdev, *tmp;
+ uint32_t i, nsid;
+ size_t j;
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid);
+ assert(nvme_bdev_ctrlr != NULL);
+
+ /*
+ * Report the new bdevs that were created in this call.
+ * There can be more than one bdev per NVMe controller.
+ */
+ j = 0;
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ nsid = i + 1;
+ ns = nvme_bdev_ctrlr->namespaces[nsid - 1];
+ if (!ns->populated) {
+ continue;
+ }
+ assert(ns->id == nsid);
+ TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) {
+ if (j < ctx->count) {
+ ctx->names[j] = nvme_bdev->disk.name;
+ j++;
+ } else {
+ SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
+ ctx->count);
+ populate_namespaces_cb(ctx, 0, -ERANGE);
+ return;
+ }
+ }
+ }
+
+ populate_namespaces_cb(ctx, j, 0);
+}
+
+static void
+connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct nvme_async_probe_ctx *ctx;
+ int rc;
+
+ ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts);
+
+ spdk_poller_unregister(&ctx->poller);
+
+ rc = create_ctrlr(ctrlr, ctx->base_name, &ctx->trid, ctx->prchk_flags);
+ if (rc) {
+ SPDK_ERRLOG("Failed to create new device\n");
+ populate_namespaces_cb(ctx, 0, rc);
+ return;
+ }
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&ctx->trid);
+ assert(nvme_bdev_ctrlr != NULL);
+
+ nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, ctx);
+}
+
+static int
+bdev_nvme_async_poll(void *arg)
+{
+ struct nvme_async_probe_ctx *ctx = arg;
+ int rc;
+
+ rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
+ if (spdk_unlikely(rc != -EAGAIN && rc != 0)) {
+ spdk_poller_unregister(&ctx->poller);
+ free(ctx);
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+int
+bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_host_id *hostid,
+ const char *base_name,
+ const char **names,
+ uint32_t count,
+ const char *hostnqn,
+ uint32_t prchk_flags,
+ spdk_bdev_create_nvme_fn cb_fn,
+ void *cb_ctx)
+{
+ struct nvme_probe_skip_entry *entry, *tmp;
+ struct nvme_async_probe_ctx *ctx;
+
+ if (nvme_bdev_ctrlr_get(trid) != NULL) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
+ return -EEXIST;
+ }
+
+ if (nvme_bdev_ctrlr_get_by_name(base_name)) {
+ SPDK_ERRLOG("A controller with the provided name (%s) already exists.\n", base_name);
+ return -EEXIST;
+ }
+
+ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
+ if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
+ TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
+ free(entry);
+ break;
+ }
+ }
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ return -ENOMEM;
+ }
+ ctx->base_name = base_name;
+ ctx->names = names;
+ ctx->count = count;
+ ctx->cb_fn = cb_fn;
+ ctx->cb_ctx = cb_ctx;
+ ctx->prchk_flags = prchk_flags;
+ ctx->trid = *trid;
+
+ spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts));
+ ctx->opts.transport_retry_count = g_opts.retry_count;
+
+ if (hostnqn) {
+ snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn);
+ }
+
+ if (hostid->hostaddr[0] != '\0') {
+ snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr);
+ }
+
+ if (hostid->hostsvcid[0] != '\0') {
+ snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid);
+ }
+
+ ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb);
+ if (ctx->probe_ctx == NULL) {
+ SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
+ free(ctx);
+ return -ENODEV;
+ }
+ ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
+
+ return 0;
+}
+
+int
+bdev_nvme_delete(const char *name)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL;
+ struct nvme_probe_skip_entry *entry;
+
+ if (name == NULL) {
+ return -EINVAL;
+ }
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
+ if (nvme_bdev_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed to find NVMe controller\n");
+ return -ENODEV;
+ }
+
+ if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ entry = calloc(1, sizeof(*entry));
+ if (!entry) {
+ return -ENOMEM;
+ }
+ entry->trid = *nvme_bdev_ctrlr->trid;
+ TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
+ }
+
+ remove_cb(NULL, nvme_bdev_ctrlr->ctrlr);
+ return 0;
+}
+
+static int
+bdev_nvme_library_init(void)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct spdk_conf_section *sp;
+ const char *val;
+ int rc = 0;
+ int64_t intval = 0;
+ size_t i;
+ struct nvme_probe_ctx *probe_ctx = NULL;
+ int retry_count;
+ uint32_t local_nvme_num = 0;
+ int64_t hotplug_period;
+ bool hotplug_enabled = g_nvme_hotplug_enabled;
+
+ g_bdev_nvme_init_thread = spdk_get_thread();
+
+ spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_poll_group_create_cb,
+ bdev_nvme_poll_group_destroy_cb,
+ sizeof(struct nvme_bdev_poll_group), "bdev_nvme_poll_groups");
+
+ sp = spdk_conf_find_section(NULL, "Nvme");
+ if (sp == NULL) {
+ goto end;
+ }
+
+ probe_ctx = calloc(1, sizeof(*probe_ctx));
+ if (probe_ctx == NULL) {
+ SPDK_ERRLOG("Failed to allocate probe_ctx\n");
+ rc = -1;
+ goto end;
+ }
+
+ retry_count = spdk_conf_section_get_intval(sp, "RetryCount");
+ if (retry_count >= 0) {
+ g_opts.retry_count = retry_count;
+ }
+
+ val = spdk_conf_section_get_val(sp, "TimeoutUsec");
+ if (val != NULL) {
+ intval = spdk_strtoll(val, 10);
+ if (intval < 0) {
+ SPDK_ERRLOG("Invalid TimeoutUsec value\n");
+ rc = -1;
+ goto end;
+ }
+ }
+
+ g_opts.timeout_us = intval;
+
+ if (g_opts.timeout_us > 0) {
+ val = spdk_conf_section_get_val(sp, "ActionOnTimeout");
+ if (val != NULL) {
+ if (!strcasecmp(val, "Reset")) {
+ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+ } else if (!strcasecmp(val, "Abort")) {
+ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+ }
+ }
+ }
+
+ intval = spdk_conf_section_get_intval(sp, "AdminPollRate");
+ if (intval > 0) {
+ g_opts.nvme_adminq_poll_period_us = intval;
+ }
+
+ intval = spdk_conf_section_get_intval(sp, "IOPollRate");
+ if (intval > 0) {
+ g_opts.nvme_ioq_poll_period_us = intval;
+ }
+
+ if (spdk_process_is_primary()) {
+ hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false);
+ }
+
+ hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate");
+ if (hotplug_period < 0) {
+ hotplug_period = 0;
+ }
+
+ g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN");
+ probe_ctx->hostnqn = g_nvme_hostnqn;
+
+ g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit",
+ SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT);
+
+ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
+ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
+ if (val == NULL) {
+ break;
+ }
+
+ rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
+ rc = -1;
+ goto end;
+ }
+
+ rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to parse HostID: %s\n", val);
+ rc = -1;
+ goto end;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1);
+ if (val == NULL) {
+ SPDK_ERRLOG("No name provided for TransportID\n");
+ rc = -1;
+ goto end;
+ }
+
+ probe_ctx->names[i] = val;
+
+ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2);
+ if (val != NULL) {
+ rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val);
+ if (rc < 0) {
+ SPDK_ERRLOG("Unable to parse prchk: %s\n", val);
+ rc = -1;
+ goto end;
+ }
+ }
+
+ probe_ctx->count++;
+
+ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+ struct spdk_nvme_ctrlr *ctrlr;
+ struct spdk_nvme_ctrlr_opts opts;
+
+ if (nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) {
+ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
+ probe_ctx->trids[i].traddr);
+ rc = -1;
+ goto end;
+ }
+
+ if (probe_ctx->trids[i].subnqn[0] == '\0') {
+ SPDK_ERRLOG("Need to provide subsystem nqn\n");
+ rc = -1;
+ goto end;
+ }
+
+ spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
+ opts.transport_retry_count = g_opts.retry_count;
+
+ if (probe_ctx->hostnqn != NULL) {
+ snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn);
+ }
+
+ if (probe_ctx->hostids[i].hostaddr[0] != '\0') {
+ snprintf(opts.src_addr, sizeof(opts.src_addr), "%s", probe_ctx->hostids[i].hostaddr);
+ }
+
+ if (probe_ctx->hostids[i].hostsvcid[0] != '\0') {
+ snprintf(opts.src_svcid, sizeof(opts.src_svcid), "%s", probe_ctx->hostids[i].hostsvcid);
+ }
+
+ ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts));
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n",
+ probe_ctx->trids[i].traddr);
+ rc = -1;
+ goto end;
+ }
+
+ rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i], 0);
+ if (rc) {
+ goto end;
+ }
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(&probe_ctx->trids[i]);
+ if (!nvme_bdev_ctrlr) {
+ SPDK_ERRLOG("Failed to find new NVMe controller\n");
+ rc = -ENODEV;
+ goto end;
+ }
+
+ nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL);
+ } else {
+ local_nvme_num++;
+ }
+ }
+
+ if (local_nvme_num > 0) {
+ /* used to probe local NVMe device */
+ if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) {
+ rc = -1;
+ goto end;
+ }
+
+ for (i = 0; i < probe_ctx->count; i++) {
+ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
+ continue;
+ }
+
+ if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) {
+ SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr);
+ SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n");
+ }
+ }
+ }
+
+ rc = bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL);
+ if (rc) {
+ SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc));
+ rc = -1;
+ }
+end:
+ free(probe_ctx);
+ return rc;
+}
+
+static void
+bdev_nvme_library_fini(void)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp;
+ struct nvme_probe_skip_entry *entry, *entry_tmp;
+ struct nvme_bdev_ns *ns;
+ uint32_t i;
+
+ spdk_poller_unregister(&g_hotplug_poller);
+ free(g_hotplug_probe_ctx);
+
+ TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
+ TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
+ free(entry);
+ }
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) {
+ if (nvme_bdev_ctrlr->destruct) {
+ /* This controller's destruction was already started
+ * before the application started shutting down
+ */
+ continue;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ uint32_t nsid = i + 1;
+
+ ns = nvme_bdev_ctrlr->namespaces[nsid - 1];
+ if (ns->populated) {
+ assert(ns->id == nsid);
+ nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns);
+ }
+ }
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr->destruct = true;
+
+ if (nvme_bdev_ctrlr->ref == 0) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr);
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ }
+ }
+
+ g_bdev_nvme_module_finish = true;
+ if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
+ spdk_bdev_module_finish_done();
+ return;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+static void
+bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_dif_ctx dif_ctx;
+ struct spdk_dif_error err_blk = {};
+ int rc;
+
+ rc = spdk_dif_ctx_init(&dif_ctx,
+ bdev->blocklen, bdev->md_len, bdev->md_interleave,
+ bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
+ bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0);
+ if (rc != 0) {
+ SPDK_ERRLOG("Initialization of DIF context failed\n");
+ return;
+ }
+
+ if (bdev->md_interleave) {
+ rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
+ } else {
+ struct iovec md_iov = {
+ .iov_base = bdev_io->u.bdev.md_buf,
+ .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
+ };
+
+ rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
+ }
+
+ if (rc != 0) {
+ SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
+ err_blk.err_type, err_blk.err_offset);
+ } else {
+ SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
+ }
+}
+
+static void
+bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+ if (spdk_nvme_cpl_is_success(cpl)) {
+ /* Run PI verification for read data buffer. */
+ bdev_nvme_verify_pi_error(bdev_io);
+ }
+
+ /* Return original completion status */
+ spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct,
+ bio->cpl.status.sc);
+}
+
+static void
+bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+ int ret;
+
+ if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
+ SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
+ cpl->status.sct, cpl->status.sc);
+
+ /* Save completion status to use after verifying PI error. */
+ bio->cpl = *cpl;
+
+ /* Read without PI checking to verify PI error. */
+ ret = bdev_nvme_no_pi_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
+ spdk_bdev_io_get_io_channel(bdev_io),
+ bio,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf,
+ bdev_io->u.bdev.num_blocks,
+ bdev_io->u.bdev.offset_blocks);
+ if (ret == 0) {
+ return;
+ }
+ }
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
+
+ if (spdk_nvme_cpl_is_pi_error(cpl)) {
+ SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
+ cpl->status.sct, cpl->status.sc);
+ /* Run PI verification for write data buffer if PI error is detected. */
+ bdev_nvme_verify_pi_error(bdev_io);
+ }
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
+
+ if (spdk_nvme_cpl_is_pi_error(cpl)) {
+ SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
+ cpl->status.sct, cpl->status.sc);
+ /* Run PI verification for compare data buffer if PI error is detected. */
+ bdev_nvme_verify_pi_error(bdev_io);
+ }
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+ /* Compare operation completion */
+ if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) {
+ /* Save compare result for write callback */
+ bio->cpl = *cpl;
+ return;
+ }
+
+ /* Write operation completion */
+ if (spdk_nvme_cpl_is_error(&bio->cpl)) {
+ /* If bio->cpl is already an error, it means the compare operation failed. In that case,
+ * complete the IO with the compare operation's status.
+ */
+ if (!spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_ERRLOG("Unexpected write success after compare failure.\n");
+ }
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc);
+ } else {
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
+ }
+}
+
+static void
+bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
+}
+
+static void
+bdev_nvme_admin_passthru_completion(void *ctx)
+{
+ struct nvme_bdev_io *bio = ctx;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+ spdk_bdev_io_complete_nvme_status(bdev_io,
+ bio->cpl.cdw0, bio->cpl.status.sct, bio->cpl.status.sc);
+}
+
+static void
+bdev_nvme_abort_completion(void *ctx)
+{
+ struct nvme_bdev_io *bio = ctx;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+
+ if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+
+ bio->cpl = *cpl;
+ spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio);
+}
+
+static void
+bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_io *bio = ref;
+
+ bio->cpl = *cpl;
+ spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio);
+}
+
+static void
+bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ bio->iov_offset = sgl_offset;
+ for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
+ iov = &bio->iovs[bio->iovpos];
+ if (bio->iov_offset < iov->iov_len) {
+ break;
+ }
+
+ bio->iov_offset -= iov->iov_len;
+ }
+}
+
+static int
+bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ assert(bio->iovpos < bio->iovcnt);
+
+ iov = &bio->iovs[bio->iovpos];
+
+ *address = iov->iov_base;
+ *length = iov->iov_len;
+
+ if (bio->iov_offset) {
+ assert(bio->iov_offset <= iov->iov_len);
+ *address += bio->iov_offset;
+ *length -= bio->iov_offset;
+ }
+
+ bio->iov_offset += *length;
+ if (bio->iov_offset == iov->iov_len) {
+ bio->iovpos++;
+ bio->iov_offset = 0;
+ }
+
+ return 0;
+}
+
+static void
+bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ bio->fused_iov_offset = sgl_offset;
+ for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
+ iov = &bio->fused_iovs[bio->fused_iovpos];
+ if (bio->fused_iov_offset < iov->iov_len) {
+ break;
+ }
+
+ bio->fused_iov_offset -= iov->iov_len;
+ }
+}
+
+static int
+bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
+{
+ struct nvme_bdev_io *bio = ref;
+ struct iovec *iov;
+
+ assert(bio->fused_iovpos < bio->fused_iovcnt);
+
+ iov = &bio->fused_iovs[bio->fused_iovpos];
+
+ *address = iov->iov_base;
+ *length = iov->iov_len;
+
+ if (bio->fused_iov_offset) {
+ assert(bio->fused_iov_offset <= iov->iov_len);
+ *address += bio->fused_iov_offset;
+ *length -= bio->fused_iov_offset;
+ }
+
+ bio->fused_iov_offset += *length;
+ if (bio->fused_iov_offset == iov->iov_len) {
+ bio->fused_iovpos++;
+ bio->fused_iov_offset = 0;
+ }
+
+ return 0;
+}
+
+static int
+bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
+ void *md, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx without PI check\n",
+ lba_count, lba);
+
+ bio->iovs = iov;
+ bio->iovcnt = iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+
+ rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_no_pi_readv_done, bio, 0,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
+ md, 0, 0);
+
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
+ }
+ return rc;
+}
+
+static int
+bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
+ void *md, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ bio->iovs = iov;
+ bio->iovcnt = iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+
+ rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_readv_done, bio, nbdev->disk.dif_check_flags,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
+ md, 0, 0);
+
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("readv failed: rc = %d\n", rc);
+ }
+ return rc;
+}
+
+static int
+bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ bio->iovs = iov;
+ bio->iovcnt = iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+
+ rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_writev_done, bio, nbdev->disk.dif_check_flags,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
+ md, 0, 0);
+
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("writev failed: rc = %d\n", rc);
+ }
+ return rc;
+}
+
+static int
+bdev_nvme_comparev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ bio->iovs = iov;
+ bio->iovcnt = iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+
+ rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_comparev_done, bio, nbdev->disk.dif_check_flags,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
+ md, 0, 0);
+
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
+ }
+ return rc;
+}
+
+static int
+bdev_nvme_comparev_and_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
+ int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+ uint32_t flags = nbdev->disk.dif_check_flags;
+ int rc;
+
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "compare and write %lu blocks with offset %#lx\n",
+ lba_count, lba);
+
+ bio->iovs = cmp_iov;
+ bio->iovcnt = cmp_iovcnt;
+ bio->iovpos = 0;
+ bio->iov_offset = 0;
+ bio->fused_iovs = write_iov;
+ bio->fused_iovcnt = write_iovcnt;
+ bio->fused_iovpos = 0;
+ bio->fused_iov_offset = 0;
+
+ if (bdev_io->num_retries == 0) {
+ bio->first_fused_submitted = false;
+ }
+
+ if (!bio->first_fused_submitted) {
+ flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
+ memset(&bio->cpl, 0, sizeof(bio->cpl));
+
+ rc = spdk_nvme_ns_cmd_comparev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_comparev_and_writev_done, bio, flags,
+ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
+ if (rc == 0) {
+ bio->first_fused_submitted = true;
+ flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
+ } else {
+ if (rc != -ENOMEM) {
+ SPDK_ERRLOG("compare failed: rc = %d\n", rc);
+ }
+ return rc;
+ }
+ }
+
+ flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
+
+ rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->nvme_ns->ns, nvme_ch->qpair, lba, lba_count,
+ bdev_nvme_comparev_and_writev_done, bio, flags,
+ bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
+ if (rc != 0 && rc != -ENOMEM) {
+ SPDK_ERRLOG("write failed: rc = %d\n", rc);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static int
+bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ uint64_t offset_blocks,
+ uint64_t num_blocks)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
+ struct spdk_nvme_dsm_range *range;
+ uint64_t offset, remaining;
+ uint64_t num_ranges_u64;
+ uint16_t num_ranges;
+ int rc;
+
+ num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
+ SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
+ SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
+ return -EINVAL;
+ }
+ num_ranges = (uint16_t)num_ranges_u64;
+
+ offset = offset_blocks;
+ remaining = num_blocks;
+ range = &dsm_ranges[0];
+
+ /* Fill max-size ranges until the remaining blocks fit into one range */
+ while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
+ range->attributes.raw = 0;
+ range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ range->starting_lba = offset;
+
+ offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
+ range++;
+ }
+
+ /* Final range describes the remaining blocks */
+ range->attributes.raw = 0;
+ range->length = remaining;
+ range->starting_lba = offset;
+
+ rc = spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair,
+ SPDK_NVME_DSM_ATTR_DEALLOCATE,
+ dsm_ranges, num_ranges,
+ bdev_nvme_queued_done, bio);
+
+ return rc;
+}
+
+static int
+bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ bio->orig_thread = spdk_io_channel_get_thread(ch);
+
+ return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_bdev_ctrlr->ctrlr, cmd, buf,
+ (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ /*
+ * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+ * so fill it out automatically.
+ */
+ cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns);
+
+ return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+ (uint32_t)nbytes, bdev_nvme_queued_done, bio);
+}
+
+static int
+bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio,
+ struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->nvme_ns->ns);
+ uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
+
+ if (nbytes > max_xfer_size) {
+ SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
+ return -EINVAL;
+ }
+
+ if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->nvme_ns->ns)) {
+ SPDK_ERRLOG("invalid meta data buffer size\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
+ * so fill it out automatically.
+ */
+ cmd->nsid = spdk_nvme_ns_get_id(nbdev->nvme_ns->ns);
+
+ return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
+ (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
+}
+
+static void
+bdev_nvme_abort_admin_cmd(void *ctx)
+{
+ struct nvme_bdev_io *bio = ctx;
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
+ struct nvme_bdev *nbdev;
+ struct nvme_bdev_io *bio_to_abort;
+ int rc;
+
+ nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
+ bio_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
+
+ rc = spdk_nvme_ctrlr_cmd_abort_ext(nbdev->nvme_bdev_ctrlr->ctrlr,
+ NULL,
+ bio_to_abort,
+ bdev_nvme_abort_done, bio);
+ if (rc == -ENOENT) {
+ /* If no admin command was found in admin qpair, complete the abort
+ * request with failure.
+ */
+ bio->cpl.cdw0 |= 1U;
+ bio->cpl.status.sc = SPDK_NVME_SC_SUCCESS;
+ bio->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
+
+ spdk_thread_send_msg(bio->orig_thread, bdev_nvme_abort_completion, bio);
+ }
+}
+
+static int
+bdev_nvme_abort(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
+ struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort)
+{
+ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
+ int rc;
+
+ bio->orig_thread = spdk_io_channel_get_thread(ch);
+
+ rc = spdk_nvme_ctrlr_cmd_abort_ext(nbdev->nvme_bdev_ctrlr->ctrlr,
+ nvme_ch->qpair,
+ bio_to_abort,
+ bdev_nvme_abort_done, bio);
+ if (rc == -ENOENT) {
+ /* If no command was found in I/O qpair, the target command may be
+ * admin command. Only a single thread tries aborting admin command
+ * to clean I/O flow.
+ */
+ spdk_thread_send_msg(nbdev->nvme_bdev_ctrlr->thread,
+ bdev_nvme_abort_admin_cmd, bio);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void
+bdev_nvme_get_spdk_running_config(FILE *fp)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ fprintf(fp, "\n[Nvme]");
+ fprintf(fp, "\n"
+ "# NVMe Device Whitelist\n"
+ "# Users may specify which NVMe devices to claim by their transport id.\n"
+ "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n"
+ "# The second argument is the assigned name, which can be referenced from\n"
+ "# other sections in the configuration file. For NVMe devices, a namespace\n"
+ "# is automatically appended to each name in the format <YourName>nY, where\n"
+ "# Y is the NSID (starts at 1).\n");
+
+ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
+ const char *trtype;
+ const char *prchk_flags;
+
+ trtype = spdk_nvme_transport_id_trtype_str(nvme_bdev_ctrlr->trid->trtype);
+ if (!trtype) {
+ continue;
+ }
+
+ if (nvme_bdev_ctrlr->trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
+ fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n",
+ trtype,
+ nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->name);
+ } else {
+ const char *adrfam;
+
+ adrfam = spdk_nvme_transport_id_adrfam_str(nvme_bdev_ctrlr->trid->adrfam);
+ prchk_flags = spdk_nvme_prchk_flags_str(nvme_bdev_ctrlr->prchk_flags);
+
+ if (adrfam) {
+ fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s",
+ trtype, adrfam,
+ nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->trid->trsvcid,
+ nvme_bdev_ctrlr->trid->subnqn, nvme_bdev_ctrlr->name);
+ } else {
+ fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s",
+ trtype,
+ nvme_bdev_ctrlr->trid->traddr, nvme_bdev_ctrlr->trid->trsvcid,
+ nvme_bdev_ctrlr->trid->subnqn, nvme_bdev_ctrlr->name);
+ }
+
+ if (prchk_flags) {
+ fprintf(fp, " \"%s\"\n", prchk_flags);
+ } else {
+ fprintf(fp, "\n");
+ }
+ }
+ }
+
+ fprintf(fp, "\n"
+ "# The number of attempts per I/O when an I/O fails. Do not include\n"
+ "# this key to get the default behavior.\n");
+ fprintf(fp, "RetryCount %d\n", g_opts.retry_count);
+ fprintf(fp, "\n"
+ "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n");
+ fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us);
+
+ fprintf(fp, "\n"
+ "# Action to take on command time out. Only valid when Timeout is greater\n"
+ "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n"
+ "# the command, or 'None' to just print a message but do nothing.\n"
+ "# Admin command timeouts will always result in a reset.\n");
+ switch (g_opts.action_on_timeout) {
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
+ fprintf(fp, "ActionOnTimeout None\n");
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
+ fprintf(fp, "ActionOnTimeout Reset\n");
+ break;
+ case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
+ fprintf(fp, "ActionOnTimeout Abort\n");
+ break;
+ }
+
+ fprintf(fp, "\n"
+ "# Set how often the admin queue is polled for asynchronous events.\n"
+ "# Units in microseconds.\n");
+ fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us);
+ fprintf(fp, "IOPollRate %" PRIu64"\n", g_opts.nvme_ioq_poll_period_us);
+ fprintf(fp, "\n"
+ "# Disable handling of hotplug (runtime insert and remove) events,\n"
+ "# users can set to Yes if want to enable it.\n"
+ "# Default: No\n");
+ fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No");
+ fprintf(fp, "\n"
+ "# Set how often the hotplug is processed for insert and remove events."
+ "# Units in microseconds.\n");
+ fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us);
+ if (g_nvme_hostnqn) {
+ fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn);
+ }
+ fprintf(fp, "DelayCmdSubmit %s\n", g_opts.delay_cmd_submit ? "True" : "False");
+
+ fprintf(fp, "\n");
+}
+
+static void
+nvme_ctrlr_config_json_standard_namespace(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns)
+{
+ /* nop */
+}
+
+static void
+nvme_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns)
+{
+ g_config_json_namespace_fn[ns->type](w, ns);
+}
+
+static int
+bdev_nvme_config_json(struct spdk_json_write_ctx *w)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct spdk_nvme_transport_id *trid;
+ const char *action;
+ uint32_t nsid;
+
+ if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
+ action = "reset";
+ } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
+ action = "abort";
+ } else {
+ action = "none";
+ }
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "action_on_timeout", action);
+ spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
+ spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count);
+ spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
+ spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
+ spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
+ spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
+ spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
+ spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
+ spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
+ spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
+ trid = nvme_bdev_ctrlr->trid;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name);
+ nvme_bdev_dump_trid_json(trid, w);
+ spdk_json_write_named_bool(w, "prchk_reftag",
+ (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
+ spdk_json_write_named_bool(w, "prchk_guard",
+ (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) {
+ if (!nvme_bdev_ctrlr->namespaces[nsid]->populated) {
+ continue;
+ }
+
+ nvme_namespace_config_json(w, nvme_bdev_ctrlr->namespaces[nsid]);
+ }
+ }
+
+ /* Dump as last parameter to give all NVMe bdevs chance to be constructed
+ * before enabling hotplug poller.
+ */
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
+ spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return 0;
+}
+
+struct spdk_nvme_ctrlr *
+bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
+{
+ if (!bdev || bdev->module != &nvme_if) {
+ return NULL;
+ }
+
+ return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_bdev_ctrlr->ctrlr;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME)
diff --git a/src/spdk/module/bdev/nvme/bdev_nvme.h b/src/spdk/module/bdev/nvme/bdev_nvme.h
new file mode 100644
index 000000000..417c21cad
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_nvme.h
@@ -0,0 +1,90 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_NVME_H
+#define SPDK_BDEV_NVME_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/queue.h"
+#include "spdk/nvme.h"
+#include "spdk/bdev_module.h"
+
+#include "common.h"
+
+enum spdk_bdev_timeout_action {
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0,
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET,
+ SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT,
+};
+
+struct spdk_bdev_nvme_opts {
+ enum spdk_bdev_timeout_action action_on_timeout;
+ uint64_t timeout_us;
+ uint32_t retry_count;
+ uint32_t arbitration_burst;
+ uint32_t low_priority_weight;
+ uint32_t medium_priority_weight;
+ uint32_t high_priority_weight;
+ uint64_t nvme_adminq_poll_period_us;
+ uint64_t nvme_ioq_poll_period_us;
+ uint32_t io_queue_requests;
+ bool delay_cmd_submit;
+};
+
+struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
+void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
+int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
+int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);
+
+int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_host_id *hostid,
+ const char *base_name,
+ const char **names,
+ uint32_t count,
+ const char *hostnqn,
+ uint32_t prchk_flags,
+ spdk_bdev_create_nvme_fn cb_fn,
+ void *cb_ctx);
+struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev);
+
+/**
+ * Delete NVMe controller with all bdevs on top of it.
+ * Requires to pass name of NVMe controller.
+ *
+ * \param name NVMe controller name
+ * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found
+ */
+int bdev_nvme_delete(const char *name);
+
+#endif /* SPDK_BDEV_NVME_H */
diff --git a/src/spdk/module/bdev/nvme/bdev_nvme_cuse_rpc.c b/src/spdk/module/bdev/nvme/bdev_nvme_cuse_rpc.c
new file mode 100644
index 000000000..c116c510d
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_nvme_cuse_rpc.c
@@ -0,0 +1,152 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/nvme.h"
+
+#include "spdk_internal/log.h"
+
+struct rpc_nvme_cuse_register {
+ char *name;
+};
+
+static void
+free_rpc_nvme_cuse_register(struct rpc_nvme_cuse_register *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_nvme_cuse_register_decoders[] = {
+ {"name", offsetof(struct rpc_nvme_cuse_register, name), spdk_json_decode_string},
+};
+
+static void
+rpc_nvme_cuse_register(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_nvme_cuse_register req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_bdev_ctrlr *bdev_ctrlr = NULL;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_nvme_cuse_register_decoders,
+ SPDK_COUNTOF(rpc_nvme_cuse_register_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(req.name);
+ if (!bdev_ctrlr) {
+ SPDK_ERRLOG("No such controller\n");
+ spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
+ goto cleanup;
+ }
+
+ rc = spdk_nvme_cuse_register(bdev_ctrlr->ctrlr);
+ if (rc) {
+ SPDK_ERRLOG("Failed to register CUSE devices: %s\n", spdk_strerror(-rc));
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+cleanup:
+ free_rpc_nvme_cuse_register(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_cuse_register", rpc_nvme_cuse_register, SPDK_RPC_RUNTIME)
+
+struct rpc_nvme_cuse_unregister {
+ char *name;
+};
+
+static void
+free_rpc_nvme_cuse_unregister(struct rpc_nvme_cuse_unregister *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_nvme_cuse_unregister_decoders[] = {
+ {"name", offsetof(struct rpc_nvme_cuse_unregister, name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_nvme_cuse_unregister(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_nvme_cuse_unregister req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_bdev_ctrlr *bdev_ctrlr = NULL;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_nvme_cuse_unregister_decoders,
+ SPDK_COUNTOF(rpc_nvme_cuse_unregister_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(req.name);
+ if (!bdev_ctrlr) {
+ SPDK_ERRLOG("No such controller\n");
+ spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
+ goto cleanup;
+ }
+
+ rc = spdk_nvme_cuse_unregister(bdev_ctrlr->ctrlr);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+cleanup:
+ free_rpc_nvme_cuse_unregister(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_cuse_unregister", rpc_nvme_cuse_unregister, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/module/bdev/nvme/bdev_nvme_rpc.c b/src/spdk/module/bdev/nvme/bdev_nvme_rpc.c
new file mode 100644
index 000000000..299da4023
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_nvme_rpc.c
@@ -0,0 +1,842 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_nvme.h"
+#include "common.h"
+
+#include "spdk/config.h"
+
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+
+#include "spdk_internal/log.h"
+#include "spdk/bdev_module.h"
+
+struct open_descriptors {
+ void *desc;
+ struct spdk_bdev *bdev;
+ TAILQ_ENTRY(open_descriptors) tqlst;
+ struct spdk_thread *thread;
+};
+typedef TAILQ_HEAD(, open_descriptors) open_descriptors_t;
+
+static int
+rpc_decode_action_on_timeout(const struct spdk_json_val *val, void *out)
+{
+ enum spdk_bdev_timeout_action *action = out;
+
+ if (spdk_json_strequal(val, "none") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE;
+ } else if (spdk_json_strequal(val, "abort") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
+ } else if (spdk_json_strequal(val, "reset") == true) {
+ *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: action_on_timeout\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {
+ {"action_on_timeout", offsetof(struct spdk_bdev_nvme_opts, action_on_timeout), rpc_decode_action_on_timeout, true},
+ {"timeout_us", offsetof(struct spdk_bdev_nvme_opts, timeout_us), spdk_json_decode_uint64, true},
+ {"retry_count", offsetof(struct spdk_bdev_nvme_opts, retry_count), spdk_json_decode_uint32, true},
+ {"arbitration_burst", offsetof(struct spdk_bdev_nvme_opts, arbitration_burst), spdk_json_decode_uint32, true},
+ {"low_priority_weight", offsetof(struct spdk_bdev_nvme_opts, low_priority_weight), spdk_json_decode_uint32, true},
+ {"medium_priority_weight", offsetof(struct spdk_bdev_nvme_opts, medium_priority_weight), spdk_json_decode_uint32, true},
+ {"high_priority_weight", offsetof(struct spdk_bdev_nvme_opts, high_priority_weight), spdk_json_decode_uint32, true},
+ {"nvme_adminq_poll_period_us", offsetof(struct spdk_bdev_nvme_opts, nvme_adminq_poll_period_us), spdk_json_decode_uint64, true},
+ {"nvme_ioq_poll_period_us", offsetof(struct spdk_bdev_nvme_opts, nvme_ioq_poll_period_us), spdk_json_decode_uint64, true},
+ {"io_queue_requests", offsetof(struct spdk_bdev_nvme_opts, io_queue_requests), spdk_json_decode_uint32, true},
+ {"delay_cmd_submit", offsetof(struct spdk_bdev_nvme_opts, delay_cmd_submit), spdk_json_decode_bool, true},
+};
+
+static void
+rpc_bdev_nvme_set_options(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct spdk_bdev_nvme_opts opts;
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ bdev_nvme_get_opts(&opts);
+ if (params && spdk_json_decode_object(params, rpc_bdev_nvme_options_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_options_decoders),
+ &opts)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ return;
+ }
+
+ rc = bdev_nvme_set_opts(&opts);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+ return;
+}
+SPDK_RPC_REGISTER("bdev_nvme_set_options", rpc_bdev_nvme_set_options,
+ SPDK_RPC_STARTUP | SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_set_options, set_bdev_nvme_options)
+
+struct rpc_bdev_nvme_hotplug {
+ bool enabled;
+ uint64_t period_us;
+};
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_hotplug_decoders[] = {
+ {"enable", offsetof(struct rpc_bdev_nvme_hotplug, enabled), spdk_json_decode_bool, false},
+ {"period_us", offsetof(struct rpc_bdev_nvme_hotplug, period_us), spdk_json_decode_uint64, true},
+};
+
+static void
+rpc_bdev_nvme_set_hotplug_done(void *ctx)
+{
+ struct spdk_jsonrpc_request *request = ctx;
+ struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request);
+
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+rpc_bdev_nvme_set_hotplug(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_hotplug req = {false, 0};
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_hotplug_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_hotplug_decoders), &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ rc = -EINVAL;
+ goto invalid;
+ }
+
+ rc = bdev_nvme_set_hotplug(req.enabled, req.period_us, rpc_bdev_nvme_set_hotplug_done,
+ request);
+ if (rc) {
+ goto invalid;
+ }
+
+ return;
+invalid:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc));
+}
+SPDK_RPC_REGISTER("bdev_nvme_set_hotplug", rpc_bdev_nvme_set_hotplug, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_set_hotplug, set_bdev_nvme_hotplug)
+
+struct rpc_bdev_nvme_attach_controller {
+ char *name;
+ char *trtype;
+ char *adrfam;
+ char *traddr;
+ char *trsvcid;
+ char *priority;
+ char *subnqn;
+ char *hostnqn;
+ char *hostaddr;
+ char *hostsvcid;
+ bool prchk_reftag;
+ bool prchk_guard;
+};
+
+static void
+free_rpc_bdev_nvme_attach_controller(struct rpc_bdev_nvme_attach_controller *req)
+{
+ free(req->name);
+ free(req->trtype);
+ free(req->adrfam);
+ free(req->traddr);
+ free(req->trsvcid);
+ free(req->priority);
+ free(req->subnqn);
+ free(req->hostnqn);
+ free(req->hostaddr);
+ free(req->hostsvcid);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_attach_controller_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_nvme_attach_controller, name), spdk_json_decode_string},
+ {"trtype", offsetof(struct rpc_bdev_nvme_attach_controller, trtype), spdk_json_decode_string},
+ {"traddr", offsetof(struct rpc_bdev_nvme_attach_controller, traddr), spdk_json_decode_string},
+
+ {"adrfam", offsetof(struct rpc_bdev_nvme_attach_controller, adrfam), spdk_json_decode_string, true},
+ {"trsvcid", offsetof(struct rpc_bdev_nvme_attach_controller, trsvcid), spdk_json_decode_string, true},
+ {"priority", offsetof(struct rpc_bdev_nvme_attach_controller, priority), spdk_json_decode_string, true},
+ {"subnqn", offsetof(struct rpc_bdev_nvme_attach_controller, subnqn), spdk_json_decode_string, true},
+ {"hostnqn", offsetof(struct rpc_bdev_nvme_attach_controller, hostnqn), spdk_json_decode_string, true},
+ {"hostaddr", offsetof(struct rpc_bdev_nvme_attach_controller, hostaddr), spdk_json_decode_string, true},
+ {"hostsvcid", offsetof(struct rpc_bdev_nvme_attach_controller, hostsvcid), spdk_json_decode_string, true},
+
+ {"prchk_reftag", offsetof(struct rpc_bdev_nvme_attach_controller, prchk_reftag), spdk_json_decode_bool, true},
+ {"prchk_guard", offsetof(struct rpc_bdev_nvme_attach_controller, prchk_guard), spdk_json_decode_bool, true}
+};
+
+#define NVME_MAX_BDEVS_PER_RPC 128
+
+struct rpc_bdev_nvme_attach_controller_ctx {
+ struct rpc_bdev_nvme_attach_controller req;
+ uint32_t count;
+ const char *names[NVME_MAX_BDEVS_PER_RPC];
+ struct spdk_jsonrpc_request *request;
+};
+
+static void
+rpc_bdev_nvme_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
+{
+ struct rpc_bdev_nvme_attach_controller_ctx *ctx = cb_ctx;
+ struct spdk_jsonrpc_request *request = ctx->request;
+ struct spdk_json_write_ctx *w;
+ size_t i;
+
+ if (rc < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto exit;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_array_begin(w);
+ for (i = 0; i < bdev_count; i++) {
+ spdk_json_write_string(w, ctx->names[i]);
+ }
+ spdk_json_write_array_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+exit:
+ free_rpc_bdev_nvme_attach_controller(&ctx->req);
+ free(ctx);
+}
+
+static void
+rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_attach_controller_ctx *ctx;
+ struct spdk_nvme_transport_id trid = {};
+ struct spdk_nvme_host_id hostid = {};
+ uint32_t prchk_flags = 0;
+ int rc;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
+ return;
+ }
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_attach_controller_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_attach_controller_decoders),
+ &ctx->req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ /* Parse trstring */
+ rc = spdk_nvme_transport_id_populate_trstring(&trid, ctx->req.trtype);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to parse trtype: %s\n", ctx->req.trtype);
+ spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse trtype: %s",
+ ctx->req.trtype);
+ goto cleanup;
+ }
+
+ /* Parse trtype */
+ rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, ctx->req.trtype);
+ assert(rc == 0);
+
+ /* Parse traddr */
+ snprintf(trid.traddr, sizeof(trid.traddr), "%s", ctx->req.traddr);
+
+ /* Parse adrfam */
+ if (ctx->req.adrfam) {
+ rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, ctx->req.adrfam);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to parse adrfam: %s\n", ctx->req.adrfam);
+ spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse adrfam: %s",
+ ctx->req.adrfam);
+ goto cleanup;
+ }
+ }
+
+ /* Parse trsvcid */
+ if (ctx->req.trsvcid) {
+ snprintf(trid.trsvcid, sizeof(trid.trsvcid), "%s", ctx->req.trsvcid);
+ }
+
+ /* Parse priority for the NVMe-oF transport connection */
+ if (ctx->req.priority) {
+ trid.priority = spdk_strtol(ctx->req.priority, 10);
+ }
+
+ /* Parse subnqn */
+ if (ctx->req.subnqn) {
+ snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", ctx->req.subnqn);
+ }
+
+ if (ctx->req.hostaddr) {
+ snprintf(hostid.hostaddr, sizeof(hostid.hostaddr), "%s", ctx->req.hostaddr);
+ }
+
+ if (ctx->req.hostsvcid) {
+ snprintf(hostid.hostsvcid, sizeof(hostid.hostsvcid), "%s", ctx->req.hostsvcid);
+ }
+
+ if (ctx->req.prchk_reftag) {
+ prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
+ }
+
+ if (ctx->req.prchk_guard) {
+ prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
+ }
+
+ ctx->request = request;
+ ctx->count = NVME_MAX_BDEVS_PER_RPC;
+ rc = bdev_nvme_create(&trid, &hostid, ctx->req.name, ctx->names, ctx->count, ctx->req.hostnqn,
+ prchk_flags, rpc_bdev_nvme_attach_controller_done, ctx);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ return;
+
+cleanup:
+ free_rpc_bdev_nvme_attach_controller(&ctx->req);
+ free(ctx);
+}
+SPDK_RPC_REGISTER("bdev_nvme_attach_controller", rpc_bdev_nvme_attach_controller,
+ SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_attach_controller, construct_nvme_bdev)
+
+static void
+rpc_dump_nvme_controller_info(struct spdk_json_write_ctx *w,
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ struct spdk_nvme_transport_id *trid;
+
+ trid = nvme_bdev_ctrlr->trid;
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name);
+
+#ifdef SPDK_CONFIG_NVME_CUSE
+ size_t cuse_name_size = 128;
+ char cuse_name[cuse_name_size];
+
+ int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_bdev_ctrlr->ctrlr, cuse_name, &cuse_name_size);
+ if (rc == 0) {
+ spdk_json_write_named_string(w, "cuse_device", cuse_name);
+ }
+#endif
+
+ spdk_json_write_named_object_begin(w, "trid");
+ nvme_bdev_dump_trid_json(trid, w);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+struct rpc_bdev_nvme_get_controllers {
+ char *name;
+};
+
+static void
+free_rpc_bdev_nvme_get_controllers(struct rpc_bdev_nvme_get_controllers *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_get_controllers_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_nvme_get_controllers, name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_bdev_nvme_get_controllers(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_get_controllers req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_bdev_ctrlr *ctrlr = NULL;
+
+ if (params && spdk_json_decode_object(params, rpc_bdev_nvme_get_controllers_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_get_controllers_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ if (req.name) {
+ ctrlr = nvme_bdev_ctrlr_get_by_name(req.name);
+ if (ctrlr == NULL) {
+ SPDK_ERRLOG("ctrlr '%s' does not exist\n", req.name);
+ spdk_jsonrpc_send_error_response_fmt(request, EINVAL, "Controller %s does not exist", req.name);
+ goto cleanup;
+ }
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_array_begin(w);
+
+ if (ctrlr != NULL) {
+ rpc_dump_nvme_controller_info(w, ctrlr);
+ } else {
+ for (ctrlr = nvme_bdev_first_ctrlr(); ctrlr; ctrlr = nvme_bdev_next_ctrlr(ctrlr)) {
+ rpc_dump_nvme_controller_info(w, ctrlr);
+ }
+ }
+
+ spdk_json_write_array_end(w);
+
+ spdk_jsonrpc_end_result(request, w);
+
+cleanup:
+ free_rpc_bdev_nvme_get_controllers(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_get_controllers", rpc_bdev_nvme_get_controllers, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_get_controllers, get_nvme_controllers)
+
+struct rpc_bdev_nvme_detach_controller {
+ char *name;
+};
+
+static void
+free_rpc_bdev_nvme_detach_controller(struct rpc_bdev_nvme_detach_controller *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_detach_controller_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_nvme_detach_controller, name), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_nvme_detach_controller(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_detach_controller req = {NULL};
+ struct spdk_json_write_ctx *w;
+ int rc = 0;
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_detach_controller_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_detach_controller_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ rc = bdev_nvme_delete(req.name);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+cleanup:
+ free_rpc_bdev_nvme_detach_controller(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_detach_controller", rpc_bdev_nvme_detach_controller,
+ SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_detach_controller, delete_nvme_controller)
+
+struct rpc_apply_firmware {
+ char *filename;
+ char *bdev_name;
+};
+
+static void
+free_rpc_apply_firmware(struct rpc_apply_firmware *req)
+{
+ free(req->filename);
+ free(req->bdev_name);
+}
+
+static const struct spdk_json_object_decoder rpc_apply_firmware_decoders[] = {
+ {"filename", offsetof(struct rpc_apply_firmware, filename), spdk_json_decode_string},
+ {"bdev_name", offsetof(struct rpc_apply_firmware, bdev_name), spdk_json_decode_string},
+};
+
+struct firmware_update_info {
+ void *fw_image;
+ void *p;
+ unsigned int size;
+ unsigned int size_remaining;
+ unsigned int offset;
+ unsigned int transfer;
+
+ void *desc;
+ struct spdk_io_channel *ch;
+ struct spdk_jsonrpc_request *request;
+ struct spdk_nvme_ctrlr *ctrlr;
+ open_descriptors_t desc_head;
+ struct rpc_apply_firmware *req;
+};
+
+static void
+_apply_firmware_cleanup(void *ctx)
+{
+ struct spdk_bdev_desc *desc = ctx;
+
+ spdk_bdev_close(desc);
+}
+
+static void
+apply_firmware_cleanup(void *cb_arg)
+{
+ struct open_descriptors *opt, *tmp;
+ struct firmware_update_info *firm_ctx = cb_arg;
+
+ if (!firm_ctx) {
+ return;
+ }
+
+ if (firm_ctx->fw_image) {
+ spdk_free(firm_ctx->fw_image);
+ }
+
+ if (firm_ctx->req) {
+ free_rpc_apply_firmware(firm_ctx->req);
+ free(firm_ctx->req);
+ }
+
+ if (firm_ctx->ch) {
+ spdk_put_io_channel(firm_ctx->ch);
+ }
+
+ TAILQ_FOREACH_SAFE(opt, &firm_ctx->desc_head, tqlst, tmp) {
+ TAILQ_REMOVE(&firm_ctx->desc_head, opt, tqlst);
+ /* Close the underlying bdev on its same opened thread. */
+ if (opt->thread && opt->thread != spdk_get_thread()) {
+ spdk_thread_send_msg(opt->thread, _apply_firmware_cleanup, opt->desc);
+ } else {
+ spdk_bdev_close(opt->desc);
+ }
+ free(opt);
+ }
+ free(firm_ctx);
+}
+
+static void
+apply_firmware_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ int rc;
+ struct spdk_json_write_ctx *w;
+ struct firmware_update_info *firm_ctx = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware commit failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((rc = spdk_nvme_ctrlr_reset(firm_ctx->ctrlr)) != 0) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Controller reset failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ w = spdk_jsonrpc_begin_result(firm_ctx->request);
+ spdk_json_write_string(w, "firmware commit succeeded. Controller reset in progress.");
+ spdk_jsonrpc_end_result(firm_ctx->request, w);
+ apply_firmware_cleanup(firm_ctx);
+}
+
+static void
+apply_firmware_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_nvme_cmd cmd = {};
+ struct spdk_nvme_fw_commit fw_commit;
+ int slot = 0;
+ int rc;
+ struct firmware_update_info *firm_ctx = cb_arg;
+ enum spdk_nvme_fw_commit_action commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG;
+
+ if (!success) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware download failed .");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->p += firm_ctx->transfer;
+ firm_ctx->offset += firm_ctx->transfer;
+ firm_ctx->size_remaining -= firm_ctx->transfer;
+
+ switch (firm_ctx->size_remaining) {
+ case 0:
+ /* firmware download completed. Commit firmware */
+ memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
+ fw_commit.fs = slot;
+ fw_commit.ca = commit_action;
+
+ cmd.opc = SPDK_NVME_OPC_FIRMWARE_COMMIT;
+ memcpy(&cmd.cdw10, &fw_commit, sizeof(uint32_t));
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, NULL, 0,
+ apply_firmware_complete_reset, firm_ctx);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware commit failed.");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ break;
+ default:
+ firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+ cmd.opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+ cmd.cdw10 = (firm_ctx->transfer >> 2) - 1;
+ cmd.cdw11 = firm_ctx->offset >> 2;
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, firm_ctx->p,
+ firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "firmware download failed.");
+ spdk_bdev_free_io(bdev_io);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ break;
+ }
+}
+
+static void
+rpc_bdev_nvme_apply_firmware(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ int rc;
+ int fd = -1;
+ struct stat fw_stat;
+ struct spdk_nvme_ctrlr *ctrlr;
+ char msg[1024];
+ struct spdk_bdev *bdev;
+ struct spdk_bdev *bdev2;
+ struct open_descriptors *opt;
+ struct spdk_bdev_desc *desc;
+ struct spdk_nvme_cmd *cmd;
+ struct firmware_update_info *firm_ctx;
+
+ firm_ctx = calloc(1, sizeof(struct firmware_update_info));
+ if (!firm_ctx) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ return;
+ }
+ firm_ctx->fw_image = NULL;
+ TAILQ_INIT(&firm_ctx->desc_head);
+ firm_ctx->request = request;
+
+ firm_ctx->req = calloc(1, sizeof(struct rpc_apply_firmware));
+ if (!firm_ctx->req) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ free(firm_ctx);
+ return;
+ }
+
+ if (spdk_json_decode_object(params, rpc_apply_firmware_decoders,
+ SPDK_COUNTOF(rpc_apply_firmware_decoders), firm_ctx->req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed.");
+ free(firm_ctx->req);
+ free(firm_ctx);
+ return;
+ }
+
+ if ((bdev = spdk_bdev_get_by_name(firm_ctx->req->bdev_name)) == NULL) {
+ snprintf(msg, sizeof(msg), "bdev %s were not found", firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((ctrlr = bdev_nvme_get_ctrlr(bdev)) == NULL) {
+ snprintf(msg, sizeof(msg), "Controller information for %s were not found.",
+ firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ firm_ctx->ctrlr = ctrlr;
+
+ for (bdev2 = spdk_bdev_first(); bdev2; bdev2 = spdk_bdev_next(bdev2)) {
+
+ if (bdev_nvme_get_ctrlr(bdev2) != ctrlr) {
+ continue;
+ }
+
+ if (!(opt = malloc(sizeof(struct open_descriptors)))) {
+ snprintf(msg, sizeof(msg), "Memory allocation error.");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ if ((rc = spdk_bdev_open(bdev2, true, NULL, NULL, &desc)) != 0) {
+ snprintf(msg, sizeof(msg), "Device %s is in use.", firm_ctx->req->bdev_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg);
+ free(opt);
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ /* Save the thread where the base device is opened */
+ opt->thread = spdk_get_thread();
+
+ opt->desc = desc;
+ opt->bdev = bdev;
+ TAILQ_INSERT_TAIL(&firm_ctx->desc_head, opt, tqlst);
+ }
+
+ /*
+ * find a descriptor associated with our bdev
+ */
+ firm_ctx->desc = NULL;
+ TAILQ_FOREACH(opt, &firm_ctx->desc_head, tqlst) {
+ if (opt->bdev == bdev) {
+ firm_ctx->desc = opt->desc;
+ break;
+ }
+ }
+
+ if (!firm_ctx->desc) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "No descriptor were found.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->ch = spdk_bdev_get_io_channel(firm_ctx->desc);
+ if (!firm_ctx->ch) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "No channels were found.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ fd = open(firm_ctx->req->filename, O_RDONLY);
+ if (fd < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "open file failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ rc = fstat(fd, &fw_stat);
+ if (rc < 0) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "fstat failed.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->size = fw_stat.st_size;
+ if (fw_stat.st_size % 4) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Firmware image size is not multiple of 4.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+
+ firm_ctx->fw_image = spdk_zmalloc(firm_ctx->size, 4096, NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!firm_ctx->fw_image) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ firm_ctx->p = firm_ctx->fw_image;
+
+ if (read(fd, firm_ctx->p, firm_ctx->size) != ((ssize_t)(firm_ctx->size))) {
+ close(fd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Read firmware image failed!");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ close(fd);
+
+ firm_ctx->offset = 0;
+ firm_ctx->size_remaining = firm_ctx->size;
+ firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096);
+
+ cmd = malloc(sizeof(struct spdk_nvme_cmd));
+ if (!cmd) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Memory allocation error.");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+ memset(cmd, 0, sizeof(struct spdk_nvme_cmd));
+ cmd->opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD;
+
+ cmd->cdw10 = (firm_ctx->transfer >> 2) - 1;
+ cmd->cdw11 = firm_ctx->offset >> 2;
+
+ rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, cmd, firm_ctx->p,
+ firm_ctx->transfer, apply_firmware_complete, firm_ctx);
+ if (rc) {
+ free(cmd);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Read firmware image failed!");
+ apply_firmware_cleanup(firm_ctx);
+ return;
+ }
+}
+SPDK_RPC_REGISTER("bdev_nvme_apply_firmware", rpc_bdev_nvme_apply_firmware, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_apply_firmware, apply_nvme_firmware)
diff --git a/src/spdk/module/bdev/nvme/bdev_ocssd.c b/src/spdk/module/bdev/nvme/bdev_ocssd.c
new file mode 100644
index 000000000..35f665f40
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_ocssd.c
@@ -0,0 +1,1498 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev_module.h"
+#include "spdk/bdev_zone.h"
+#include "spdk/likely.h"
+#include "spdk/log.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/nvme_ocssd.h"
+#include "spdk/nvme_ocssd_spec.h"
+#include "spdk_internal/log.h"
+#include "spdk/nvme.h"
+#include "common.h"
+#include "bdev_ocssd.h"
+
+struct bdev_ocssd_lba_offsets {
+ uint32_t grp;
+ uint32_t pu;
+ uint32_t chk;
+ uint32_t lbk;
+};
+
+struct bdev_ocssd_zone {
+ uint64_t slba;
+ uint64_t write_pointer;
+ uint64_t capacity;
+ bool busy;
+};
+
+struct bdev_ocssd_io {
+ union {
+ struct {
+ struct bdev_ocssd_zone *zone;
+ size_t iov_pos;
+ size_t iov_off;
+ uint64_t lba[SPDK_NVME_OCSSD_MAX_LBAL_ENTRIES];
+ } io;
+ struct {
+ size_t chunk_offset;
+ struct spdk_ocssd_chunk_information_entry chunk_info;
+ } zone_info;
+ };
+};
+
+struct ocssd_io_channel {
+ struct spdk_poller *pending_poller;
+ TAILQ_HEAD(, spdk_bdev_io) pending_requests;
+};
+
+struct ocssd_bdev {
+ struct nvme_bdev nvme_bdev;
+ struct bdev_ocssd_zone *zones;
+ struct bdev_ocssd_range range;
+};
+
+struct bdev_ocssd_ns {
+ struct spdk_ocssd_geometry_data geometry;
+ struct bdev_ocssd_lba_offsets lba_offsets;
+ bool chunk_notify_pending;
+ uint64_t chunk_notify_count;
+ uint64_t num_outstanding;
+#define CHUNK_NOTIFICATION_ENTRY_COUNT 64
+ struct spdk_ocssd_chunk_notification_entry chunk[CHUNK_NOTIFICATION_ENTRY_COUNT];
+};
+
+struct ocssd_bdev_ctrlr {
+ struct spdk_poller *mm_poller;
+};
+
+static struct bdev_ocssd_ns *
+bdev_ocssd_get_ns_from_nvme(struct nvme_bdev_ns *nvme_ns)
+{
+ return nvme_ns->type_ctx;
+}
+
+static struct bdev_ocssd_ns *
+bdev_ocssd_get_ns_from_bdev(struct ocssd_bdev *ocssd_bdev)
+{
+ return bdev_ocssd_get_ns_from_nvme(ocssd_bdev->nvme_bdev.nvme_ns);
+}
+
+static uint64_t
+bdev_ocssd_num_parallel_units(const struct ocssd_bdev *ocssd_bdev)
+{
+ return ocssd_bdev->range.end - ocssd_bdev->range.begin + 1;
+}
+
+static uint64_t
+bdev_ocssd_num_zones(const struct ocssd_bdev *ocssd_bdev)
+{
+ return ocssd_bdev->nvme_bdev.disk.blockcnt / ocssd_bdev->nvme_bdev.disk.zone_size;
+}
+
+static int
+bdev_ocssd_library_init(void)
+{
+ return 0;
+}
+
+static void
+bdev_ocssd_library_fini(void)
+{
+}
+
+static int
+bdev_ocssd_config_json(struct spdk_json_write_ctx *w)
+{
+ return 0;
+}
+
+void
+bdev_ocssd_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct nvme_bdev *nvme_bdev;
+ struct ocssd_bdev *ocssd_bdev;
+ char range_buf[128];
+ int rc;
+
+ TAILQ_FOREACH(nvme_bdev, &ns->bdevs, tailq) {
+ nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr;
+ ocssd_bdev = SPDK_CONTAINEROF(nvme_bdev, struct ocssd_bdev, nvme_bdev);
+
+ rc = snprintf(range_buf, sizeof(range_buf), "%"PRIu64"-%"PRIu64,
+ ocssd_bdev->range.begin, ocssd_bdev->range.end);
+ if (rc < 0 || rc >= (int)sizeof(range_buf)) {
+ SPDK_ERRLOG("Failed to convert parallel unit range\n");
+ continue;
+ }
+
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "method", "bdev_ocssd_create");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "ctrlr_name", nvme_bdev_ctrlr->name);
+ spdk_json_write_named_string(w, "bdev_name", nvme_bdev->disk.name);
+ spdk_json_write_named_uint32(w, "nsid", nvme_bdev->nvme_ns->id);
+ spdk_json_write_named_string(w, "range", range_buf);
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+ }
+}
+
+static int
+bdev_ocssd_get_ctx_size(void)
+{
+ return sizeof(struct bdev_ocssd_io);
+}
+
+static struct spdk_bdev_module ocssd_if = {
+ .name = "ocssd",
+ .module_init = bdev_ocssd_library_init,
+ .module_fini = bdev_ocssd_library_fini,
+ .config_json = bdev_ocssd_config_json,
+ .get_ctx_size = bdev_ocssd_get_ctx_size,
+};
+
+SPDK_BDEV_MODULE_REGISTER(ocssd, &ocssd_if);
+
+static struct bdev_ocssd_zone *
+bdev_ocssd_get_zone_by_lba(struct ocssd_bdev *ocssd_bdev, uint64_t lba)
+{
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ size_t zone_size = nvme_bdev->disk.zone_size;
+
+ if (lba >= nvme_bdev->disk.blockcnt) {
+ return NULL;
+ }
+
+ return &ocssd_bdev->zones[lba / zone_size];
+}
+
+static struct bdev_ocssd_zone *
+bdev_ocssd_get_zone_by_slba(struct ocssd_bdev *ocssd_bdev, uint64_t slba)
+{
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+
+ if (slba % nvme_bdev->disk.zone_size != 0) {
+ return NULL;
+ }
+
+ return bdev_ocssd_get_zone_by_lba(ocssd_bdev, slba);
+}
+
+static void
+bdev_ocssd_free_bdev(struct ocssd_bdev *ocssd_bdev)
+{
+ if (!ocssd_bdev) {
+ return;
+ }
+
+ free(ocssd_bdev->zones);
+ free(ocssd_bdev->nvme_bdev.disk.name);
+ free(ocssd_bdev);
+}
+
+static int
+bdev_ocssd_destruct(void *ctx)
+{
+ struct ocssd_bdev *ocssd_bdev = ctx;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+
+ nvme_bdev_detach_bdev_from_ns(nvme_bdev);
+ bdev_ocssd_free_bdev(ocssd_bdev);
+
+ return 0;
+}
+
+static void
+bdev_ocssd_translate_lba(struct ocssd_bdev *ocssd_bdev, uint64_t lba, uint64_t *grp,
+ uint64_t *pu, uint64_t *chk, uint64_t *lbk)
+{
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ const struct spdk_ocssd_geometry_data *geo = &ocssd_ns->geometry;
+ const struct bdev_ocssd_range *range = &ocssd_bdev->range;
+ uint64_t addr_shift, punit;
+
+ /* To achieve best performance, we need to make sure that adjacent zones can be accessed
+ * in parallel. We accomplish this by having the following addressing scheme:
+ *
+ * [ zone id ][ zone offset ] User's LBA
+ * [ chunk ][ group ][ parallel unit ][ logical block ] Open Channel's LBA
+ *
+ * which means that neighbouring zones are placed in a different group and parallel unit.
+ */
+ *lbk = lba % geo->clba;
+ addr_shift = geo->clba;
+
+ punit = range->begin + (lba / addr_shift) % bdev_ocssd_num_parallel_units(ocssd_bdev);
+
+ *pu = punit % geo->num_pu;
+ *grp = punit / geo->num_pu;
+
+ addr_shift *= bdev_ocssd_num_parallel_units(ocssd_bdev);
+
+ *chk = (lba / addr_shift) % geo->num_chk;
+}
+
+static uint64_t
+bdev_ocssd_from_disk_lba(struct ocssd_bdev *ocssd_bdev, uint64_t lba)
+{
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ const struct spdk_ocssd_geometry_data *geometry = &ocssd_ns->geometry;
+ const struct bdev_ocssd_lba_offsets *offsets = &ocssd_ns->lba_offsets;
+ const struct bdev_ocssd_range *range = &ocssd_bdev->range;
+ uint64_t lbk, chk, pu, grp, punit;
+
+ lbk = (lba >> offsets->lbk) & ((1 << geometry->lbaf.lbk_len) - 1);
+ chk = (lba >> offsets->chk) & ((1 << geometry->lbaf.chk_len) - 1);
+ pu = (lba >> offsets->pu) & ((1 << geometry->lbaf.pu_len) - 1);
+ grp = (lba >> offsets->grp) & ((1 << geometry->lbaf.grp_len) - 1);
+
+ punit = grp * geometry->num_pu + pu - range->begin;
+
+ return lbk + punit * geometry->clba + chk * geometry->clba *
+ bdev_ocssd_num_parallel_units(ocssd_bdev);
+}
+
+static uint64_t
+bdev_ocssd_to_disk_lba(struct ocssd_bdev *ocssd_bdev, uint64_t lba)
+{
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ const struct bdev_ocssd_lba_offsets *offsets = &ocssd_ns->lba_offsets;
+ uint64_t lbk, chk, pu, grp;
+
+ bdev_ocssd_translate_lba(ocssd_bdev, lba, &grp, &pu, &chk, &lbk);
+
+ return (lbk << offsets->lbk) |
+ (chk << offsets->chk) |
+ (pu << offsets->pu) |
+ (grp << offsets->grp);
+}
+
+static bool
+bdev_ocssd_lba_in_range(struct ocssd_bdev *ocssd_bdev, uint64_t lba)
+{
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ const struct spdk_ocssd_geometry_data *geometry = &ocssd_ns->geometry;
+ const struct bdev_ocssd_lba_offsets *offsets = &ocssd_ns->lba_offsets;
+ const struct bdev_ocssd_range *range = &ocssd_bdev->range;
+ uint64_t pu, grp, punit;
+
+ pu = (lba >> offsets->pu) & ((1 << geometry->lbaf.pu_len) - 1);
+ grp = (lba >> offsets->grp) & ((1 << geometry->lbaf.grp_len) - 1);
+ punit = grp * geometry->num_pu + pu;
+
+ return punit >= range->begin && punit <= range->end;
+}
+
+static void
+bdev_ocssd_reset_sgl(void *cb_arg, uint32_t offset)
+{
+ struct spdk_bdev_io *bdev_io = cb_arg;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ struct iovec *iov;
+
+ ocdev_io->io.iov_pos = 0;
+ ocdev_io->io.iov_off = 0;
+
+ for (; ocdev_io->io.iov_pos < (size_t)bdev_io->u.bdev.iovcnt; ++ocdev_io->io.iov_pos) {
+ iov = &bdev_io->u.bdev.iovs[ocdev_io->io.iov_pos];
+ if (offset < iov->iov_len) {
+ ocdev_io->io.iov_off = offset;
+ return;
+ }
+
+ offset -= iov->iov_len;
+ }
+
+ assert(false && "Invalid offset length");
+}
+
+static int
+bdev_ocssd_next_sge(void *cb_arg, void **address, uint32_t *length)
+{
+ struct spdk_bdev_io *bdev_io = cb_arg;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ struct iovec *iov;
+
+ assert(ocdev_io->io.iov_pos < (size_t)bdev_io->u.bdev.iovcnt);
+ iov = &bdev_io->u.bdev.iovs[ocdev_io->io.iov_pos];
+
+ *address = iov->iov_base;
+ *length = iov->iov_len;
+
+ if (ocdev_io->io.iov_off != 0) {
+ assert(ocdev_io->io.iov_off < iov->iov_len);
+ *address = (char *)*address + ocdev_io->io.iov_off;
+ *length -= ocdev_io->io.iov_off;
+ }
+
+ assert(ocdev_io->io.iov_off + *length == iov->iov_len);
+ ocdev_io->io.iov_off = 0;
+ ocdev_io->io.iov_pos++;
+
+ return 0;
+}
+
+static void
+bdev_ocssd_read_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+
+ spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
+}
+
+static int
+bdev_ocssd_read(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ const size_t zone_size = nvme_bdev->disk.zone_size;
+ uint64_t lba;
+
+ if ((bdev_io->u.bdev.offset_blocks % zone_size) + bdev_io->u.bdev.num_blocks > zone_size) {
+ SPDK_ERRLOG("Tried to cross zone boundary during read command\n");
+ return -EINVAL;
+ }
+
+ ocdev_io->io.iov_pos = 0;
+ ocdev_io->io.iov_off = 0;
+
+ lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);
+
+ return spdk_nvme_ns_cmd_readv_with_md(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair, lba,
+ bdev_io->u.bdev.num_blocks, bdev_ocssd_read_cb,
+ bdev_io, 0, bdev_ocssd_reset_sgl,
+ bdev_ocssd_next_sge, bdev_io->u.bdev.md_buf, 0, 0);
+}
+
+static void
+bdev_ocssd_write_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_ZONE_APPEND) {
+ bdev_io->u.bdev.offset_blocks = ocdev_io->io.zone->write_pointer;
+ }
+
+ ocdev_io->io.zone->write_pointer = bdev_io->u.bdev.offset_blocks +
+ bdev_io->u.bdev.num_blocks;
+ assert(ocdev_io->io.zone->write_pointer <= ocdev_io->io.zone->slba +
+ ocdev_io->io.zone->capacity);
+
+ __atomic_store_n(&ocdev_io->io.zone->busy, false, __ATOMIC_SEQ_CST);
+ spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
+}
+
+static int
+bdev_ocssd_write(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ const size_t zone_size = nvme_bdev->disk.zone_size;
+ uint64_t lba;
+ int rc;
+
+ if ((bdev_io->u.bdev.offset_blocks % zone_size) + bdev_io->u.bdev.num_blocks > zone_size) {
+ SPDK_ERRLOG("Tried to cross zone boundary during write command\n");
+ return -EINVAL;
+ }
+
+ ocdev_io->io.zone = bdev_ocssd_get_zone_by_lba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);
+ if (__atomic_exchange_n(&ocdev_io->io.zone->busy, true, __ATOMIC_SEQ_CST)) {
+ return -EINVAL;
+ }
+
+ ocdev_io->io.iov_pos = 0;
+ ocdev_io->io.iov_off = 0;
+
+ lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);
+ rc = spdk_nvme_ns_cmd_writev_with_md(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair, lba,
+ bdev_io->u.bdev.num_blocks, bdev_ocssd_write_cb,
+ bdev_io, 0, bdev_ocssd_reset_sgl,
+ bdev_ocssd_next_sge, bdev_io->u.bdev.md_buf, 0, 0);
+ if (spdk_unlikely(rc != 0)) {
+ __atomic_store_n(&ocdev_io->io.zone->busy, false, __ATOMIC_SEQ_CST);
+ }
+
+ return rc;
+}
+
+static int
+bdev_ocssd_zone_append(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ struct bdev_ocssd_zone *zone;
+ uint64_t lba;
+ int rc = 0;
+
+ zone = bdev_ocssd_get_zone_by_slba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);
+ if (!zone) {
+ SPDK_ERRLOG("Invalid zone SLBA: %"PRIu64"\n", bdev_io->u.bdev.offset_blocks);
+ return -EINVAL;
+ }
+
+ if (__atomic_exchange_n(&zone->busy, true, __ATOMIC_SEQ_CST)) {
+ return -EAGAIN;
+ }
+
+ if (zone->slba + zone->capacity - zone->write_pointer < bdev_io->u.bdev.num_blocks) {
+ SPDK_ERRLOG("Insufficient number of blocks remaining\n");
+ rc = -ENOSPC;
+ goto out;
+ }
+
+ ocdev_io->io.zone = zone;
+ ocdev_io->io.iov_pos = 0;
+ ocdev_io->io.iov_off = 0;
+
+ lba = bdev_ocssd_to_disk_lba(ocssd_bdev, zone->write_pointer);
+ rc = spdk_nvme_ns_cmd_writev_with_md(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair, lba,
+ bdev_io->u.bdev.num_blocks, bdev_ocssd_write_cb,
+ bdev_io, 0, bdev_ocssd_reset_sgl,
+ bdev_ocssd_next_sge, bdev_io->u.bdev.md_buf, 0, 0);
+out:
+ if (spdk_unlikely(rc != 0)) {
+ __atomic_store_n(&zone->busy, false, __ATOMIC_SEQ_CST);
+ }
+
+ return rc;
+}
+
+static void
+bdev_ocssd_io_get_buf_cb(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io, bool success)
+{
+ int rc;
+
+ if (!success) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ return;
+ }
+
+ rc = bdev_ocssd_read(ioch, bdev_io);
+ if (spdk_likely(rc != 0)) {
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static void
+bdev_ocssd_reset_zone_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+
+ ocdev_io->io.zone->write_pointer = ocdev_io->io.zone->slba;
+ __atomic_store_n(&ocdev_io->io.zone->busy, false, __ATOMIC_SEQ_CST);
+ spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
+}
+
+static int
+bdev_ocssd_reset_zone(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io,
+ uint64_t slba, size_t num_zones)
+{
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ uint64_t offset, zone_size = nvme_bdev->disk.zone_size;
+ int rc;
+
+ if (num_zones > 1) {
+ SPDK_ERRLOG("Exceeded maximum number of zones per single reset: 1\n");
+ return -EINVAL;
+ }
+
+ ocdev_io->io.zone = bdev_ocssd_get_zone_by_slba(ocssd_bdev, slba);
+ if (__atomic_exchange_n(&ocdev_io->io.zone->busy, true, __ATOMIC_SEQ_CST)) {
+ return -EINVAL;
+ }
+
+ for (offset = 0; offset < num_zones; ++offset) {
+ ocdev_io->io.lba[offset] = bdev_ocssd_to_disk_lba(ocssd_bdev,
+ slba + offset * zone_size);
+ }
+
+ rc = spdk_nvme_ocssd_ns_cmd_vector_reset(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair,
+ ocdev_io->io.lba, num_zones, NULL,
+ bdev_ocssd_reset_zone_cb, bdev_io);
+ if (spdk_unlikely(rc != 0)) {
+ __atomic_store_n(&ocdev_io->io.zone->busy, false, __ATOMIC_SEQ_CST);
+ }
+
+ return rc;
+}
+
+static int _bdev_ocssd_get_zone_info(struct spdk_bdev_io *bdev_io);
+
+static void
+bdev_ocssd_fill_zone_info(struct ocssd_bdev *ocssd_bdev, struct spdk_bdev_zone_info *zone_info,
+ const struct spdk_ocssd_chunk_information_entry *chunk_info)
+{
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+
+ zone_info->zone_id = bdev_ocssd_from_disk_lba(ocssd_bdev, chunk_info->slba);
+ zone_info->write_pointer = zone_info->zone_id;
+
+ if (chunk_info->cs.free) {
+ zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
+ } else if (chunk_info->cs.closed) {
+ zone_info->state = SPDK_BDEV_ZONE_STATE_FULL;
+ } else if (chunk_info->cs.open) {
+ zone_info->state = SPDK_BDEV_ZONE_STATE_OPEN;
+ zone_info->write_pointer += chunk_info->wp % nvme_bdev->disk.zone_size;
+ } else if (chunk_info->cs.offline) {
+ zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
+ } else {
+ SPDK_ERRLOG("Unknown chunk state, assuming offline\n");
+ zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
+ }
+
+ if (chunk_info->ct.size_deviate) {
+ zone_info->capacity = chunk_info->cnlb;
+ } else {
+ zone_info->capacity = nvme_bdev->disk.zone_size;
+ }
+}
+
+static void
+bdev_ocssd_zone_info_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_bdev_io *bdev_io = ctx;
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ struct spdk_ocssd_chunk_information_entry *chunk_info = &ocdev_io->zone_info.chunk_info;
+ struct spdk_bdev_zone_info *zone_info;
+ int rc;
+
+ if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
+ spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
+ return;
+ }
+
+ zone_info = ((struct spdk_bdev_zone_info *)bdev_io->u.zone_mgmt.buf) +
+ ocdev_io->zone_info.chunk_offset;
+ bdev_ocssd_fill_zone_info(ocssd_bdev, zone_info, chunk_info);
+
+ if (++ocdev_io->zone_info.chunk_offset == bdev_io->u.zone_mgmt.num_zones) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ } else {
+ rc = _bdev_ocssd_get_zone_info(bdev_io);
+ if (spdk_unlikely(rc != 0)) {
+ if (rc == -ENOMEM) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ } else {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+ }
+}
+
+static int
+_bdev_ocssd_get_zone_info(struct spdk_bdev_io *bdev_io)
+{
+ struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ const struct spdk_ocssd_geometry_data *geo = &ocssd_ns->geometry;
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+ uint64_t lba, grp, pu, chk, lbk, offset;
+
+ lba = bdev_io->u.zone_mgmt.zone_id + ocdev_io->zone_info.chunk_offset *
+ nvme_bdev->disk.zone_size;
+ bdev_ocssd_translate_lba(ocssd_bdev, lba, &grp, &pu, &chk, &lbk);
+ offset = grp * geo->num_pu * geo->num_chk + pu * geo->num_chk + chk;
+
+ return spdk_nvme_ctrlr_cmd_get_log_page(nvme_bdev->nvme_bdev_ctrlr->ctrlr,
+ SPDK_OCSSD_LOG_CHUNK_INFO,
+ spdk_nvme_ns_get_id(nvme_bdev->nvme_ns->ns),
+ &ocdev_io->zone_info.chunk_info,
+ sizeof(ocdev_io->zone_info.chunk_info),
+ offset * sizeof(ocdev_io->zone_info.chunk_info),
+ bdev_ocssd_zone_info_cb, (void *)bdev_io);
+}
+
+static int
+bdev_ocssd_get_zone_info(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
+
+ if (bdev_io->u.zone_mgmt.num_zones < 1) {
+ SPDK_ERRLOG("Invalid number of zones: %"PRIu32"\n", bdev_io->u.zone_mgmt.num_zones);
+ return -EINVAL;
+ }
+
+ if (bdev_io->u.zone_mgmt.zone_id % bdev_io->bdev->zone_size != 0) {
+ SPDK_ERRLOG("Unaligned zone LBA: %"PRIu64"\n", bdev_io->u.zone_mgmt.zone_id);
+ return -EINVAL;
+ }
+
+ ocdev_io->zone_info.chunk_offset = 0;
+
+ return _bdev_ocssd_get_zone_info(bdev_io);
+}
+
+static int
+bdev_ocssd_zone_management(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->u.zone_mgmt.zone_action) {
+ case SPDK_BDEV_ZONE_RESET:
+ return bdev_ocssd_reset_zone(ioch, bdev_io, bdev_io->u.zone_mgmt.zone_id,
+ bdev_io->u.zone_mgmt.num_zones);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void bdev_ocssd_submit_request(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io);
+
+static int
+bdev_ocssd_poll_pending(void *ctx)
+{
+ struct spdk_io_channel *ioch = ctx;
+ struct nvme_io_channel *nvme_ioch;
+ struct ocssd_io_channel *ocssd_ioch;
+ struct spdk_bdev_io *bdev_io;
+ TAILQ_HEAD(, spdk_bdev_io) pending_requests;
+ int num_requests = 0;
+
+ nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ ocssd_ioch = nvme_ioch->ocssd_ioch;
+
+ TAILQ_INIT(&pending_requests);
+ TAILQ_SWAP(&ocssd_ioch->pending_requests, &pending_requests, spdk_bdev_io, module_link);
+
+ while ((bdev_io = TAILQ_FIRST(&pending_requests))) {
+ TAILQ_REMOVE(&pending_requests, bdev_io, module_link);
+ bdev_ocssd_submit_request(ioch, bdev_io);
+ num_requests++;
+ }
+
+ if (TAILQ_EMPTY(&ocssd_ioch->pending_requests)) {
+ spdk_poller_pause(ocssd_ioch->pending_poller);
+ }
+
+ return num_requests;
+}
+
+static void
+bdev_ocssd_delay_request(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
+ struct ocssd_io_channel *ocssd_ioch = nvme_ioch->ocssd_ioch;
+
+ TAILQ_INSERT_TAIL(&ocssd_ioch->pending_requests, bdev_io, module_link);
+ spdk_poller_resume(ocssd_ioch->pending_poller);
+}
+
+static void
+bdev_ocssd_submit_request(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
+{
+ int rc = 0;
+
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_ocssd_io_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ rc = bdev_ocssd_write(ioch, bdev_io);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
+ rc = bdev_ocssd_zone_management(ioch, bdev_io);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
+ rc = bdev_ocssd_get_zone_info(ioch, bdev_io);
+ break;
+
+ case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
+ rc = bdev_ocssd_zone_append(ioch, bdev_io);
+ break;
+
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ if (spdk_unlikely(rc != 0)) {
+ switch (rc) {
+ case -ENOMEM:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
+ break;
+ case -EAGAIN:
+ bdev_ocssd_delay_request(ioch, bdev_io);
+ break;
+ default:
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ break;
+ }
+ }
+}
+
+static bool
+bdev_ocssd_io_type_supported(void *ctx, enum spdk_bdev_io_type type)
+{
+ switch (type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
+ case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static struct spdk_io_channel *
+bdev_ocssd_get_io_channel(void *ctx)
+{
+ struct ocssd_bdev *ocssd_bdev = ctx;
+
+ return spdk_get_io_channel(ocssd_bdev->nvme_bdev.nvme_bdev_ctrlr);
+}
+
+static void
+bdev_ocssd_free_namespace(struct nvme_bdev_ns *nvme_ns)
+{
+ struct nvme_bdev *bdev, *tmp;
+
+ TAILQ_FOREACH_SAFE(bdev, &nvme_ns->bdevs, tailq, tmp) {
+ spdk_bdev_unregister(&bdev->disk, NULL, NULL);
+ }
+
+ free(nvme_ns->type_ctx);
+ nvme_ns->type_ctx = NULL;
+
+ nvme_ctrlr_depopulate_namespace_done(nvme_ns->ctrlr);
+}
+
+static void
+bdev_ocssd_chunk_notification_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct nvme_bdev_ns *nvme_ns = ctx;
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ struct spdk_bdev_media_event event;
+ struct spdk_ocssd_chunk_notification_entry *chunk_entry;
+ struct nvme_bdev *nvme_bdev;
+ struct ocssd_bdev *ocssd_bdev;
+ size_t chunk_id, num_blocks, lba;
+ int rc;
+
+ ocssd_ns->num_outstanding--;
+
+ /* The namespace could have been depopulated in the meantime */
+ if (!nvme_ns->populated) {
+ if (ocssd_ns->num_outstanding == 0) {
+ bdev_ocssd_free_namespace(nvme_ns);
+ }
+
+ return;
+ }
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_ERRLOG("Failed to retrieve chunk notification log\n");
+ return;
+ }
+
+ for (chunk_id = 0; chunk_id < CHUNK_NOTIFICATION_ENTRY_COUNT; ++chunk_id) {
+ chunk_entry = &ocssd_ns->chunk[chunk_id];
+ if (chunk_entry->nc <= ocssd_ns->chunk_notify_count) {
+ break;
+ }
+
+ ocssd_ns->chunk_notify_count = chunk_entry->nc;
+ if (chunk_entry->mask.lblk) {
+ num_blocks = chunk_entry->nlb;
+ } else if (chunk_entry->mask.chunk) {
+ num_blocks = ocssd_ns->geometry.clba;
+ } else if (chunk_entry->mask.pu) {
+ num_blocks = ocssd_ns->geometry.clba * ocssd_ns->geometry.num_chk;
+ } else {
+ SPDK_WARNLOG("Invalid chunk notification mask\n");
+ continue;
+ }
+
+ TAILQ_FOREACH(nvme_bdev, &nvme_ns->bdevs, tailq) {
+ ocssd_bdev = SPDK_CONTAINEROF(nvme_bdev, struct ocssd_bdev, nvme_bdev);
+ if (bdev_ocssd_lba_in_range(ocssd_bdev, chunk_entry->lba)) {
+ break;
+ }
+ }
+
+ if (nvme_bdev == NULL) {
+ SPDK_INFOLOG(SPDK_LOG_BDEV_OCSSD, "Dropping media management event\n");
+ continue;
+ }
+
+ lba = bdev_ocssd_from_disk_lba(ocssd_bdev, chunk_entry->lba);
+ while (num_blocks > 0 && lba < nvme_bdev->disk.blockcnt) {
+ event.offset = lba;
+ event.num_blocks = spdk_min(num_blocks, ocssd_ns->geometry.clba);
+
+ rc = spdk_bdev_push_media_events(&nvme_bdev->disk, &event, 1);
+ if (spdk_unlikely(rc < 0)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_OCSSD, "Failed to push media event: %s\n",
+ spdk_strerror(-rc));
+ break;
+ }
+
+ /* Jump to the next chunk on the same parallel unit */
+ lba += ocssd_ns->geometry.clba * bdev_ocssd_num_parallel_units(ocssd_bdev);
+ num_blocks -= event.num_blocks;
+ }
+ }
+
+ /* If at least one notification has been processed send out media event */
+ if (chunk_id > 0) {
+ TAILQ_FOREACH(nvme_bdev, &nvme_ns->bdevs, tailq) {
+ spdk_bdev_notify_media_management(&nvme_bdev->disk);
+ }
+ }
+
+ /* If we filled the full array of events, there may be more still pending. Set the pending
+ * flag back to true so that we try to get more events again next time the poller runs.
+ */
+ if (chunk_id == CHUNK_NOTIFICATION_ENTRY_COUNT) {
+ ocssd_ns->chunk_notify_pending = true;
+ }
+}
+
+static int
+bdev_ocssd_poll_mm(void *ctx)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = ctx;
+ struct nvme_bdev_ns *nvme_ns;
+ struct bdev_ocssd_ns *ocssd_ns;
+ uint32_t nsid;
+ int rc;
+
+ for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) {
+ nvme_ns = nvme_bdev_ctrlr->namespaces[nsid];
+ if (nvme_ns == NULL || !nvme_ns->populated) {
+ continue;
+ }
+
+ ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ if (ocssd_ns->chunk_notify_pending) {
+ ocssd_ns->chunk_notify_pending = false;
+ ocssd_ns->num_outstanding++;
+
+ rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_bdev_ctrlr->ctrlr,
+ SPDK_OCSSD_LOG_CHUNK_NOTIFICATION,
+ nsid + 1, ocssd_ns->chunk,
+ sizeof(ocssd_ns->chunk[0]) *
+ CHUNK_NOTIFICATION_ENTRY_COUNT,
+ 0, bdev_ocssd_chunk_notification_cb,
+ nvme_ns);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to get chunk notification log page: %s\n",
+ spdk_strerror(-rc));
+ ocssd_ns->num_outstanding--;
+ }
+ }
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+void
+bdev_ocssd_handle_chunk_notification(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ struct bdev_ocssd_ns *ocssd_ns;
+ struct nvme_bdev_ns *nvme_ns;
+ uint32_t nsid;
+
+ for (nsid = 0; nsid < nvme_bdev_ctrlr->num_ns; ++nsid) {
+ nvme_ns = nvme_bdev_ctrlr->namespaces[nsid];
+ if (nvme_ns == NULL || !nvme_ns->populated) {
+ continue;
+ }
+
+ ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ ocssd_ns->chunk_notify_pending = true;
+ }
+}
+
+static struct spdk_bdev_fn_table ocssdlib_fn_table = {
+ .destruct = bdev_ocssd_destruct,
+ .submit_request = bdev_ocssd_submit_request,
+ .io_type_supported = bdev_ocssd_io_type_supported,
+ .get_io_channel = bdev_ocssd_get_io_channel,
+};
+
+struct bdev_ocssd_create_ctx {
+ struct ocssd_bdev *ocssd_bdev;
+ bdev_ocssd_create_cb cb_fn;
+ void *cb_arg;
+ const struct bdev_ocssd_range *range;
+ uint64_t chunk_offset;
+ uint64_t end_chunk_offset;
+ uint64_t num_chunks;
+#define OCSSD_BDEV_CHUNK_INFO_COUNT 128
+ struct spdk_ocssd_chunk_information_entry chunk_info[OCSSD_BDEV_CHUNK_INFO_COUNT];
+};
+
+static void
+bdev_ocssd_create_complete(struct bdev_ocssd_create_ctx *create_ctx, int status)
+{
+ const char *bdev_name = create_ctx->ocssd_bdev->nvme_bdev.disk.name;
+
+ if (spdk_unlikely(status != 0)) {
+ bdev_ocssd_free_bdev(create_ctx->ocssd_bdev);
+ }
+
+ create_ctx->cb_fn(bdev_name, status, create_ctx->cb_arg);
+ free(create_ctx);
+}
+
+static int bdev_ocssd_init_zone(struct bdev_ocssd_create_ctx *create_ctx);
+
+static void
+bdev_ocssd_register_bdev(void *ctx)
+{
+ struct bdev_ocssd_create_ctx *create_ctx = ctx;
+ struct ocssd_bdev *ocssd_bdev = create_ctx->ocssd_bdev;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+ int rc;
+
+ rc = spdk_bdev_register(&nvme_bdev->disk);
+ if (spdk_likely(rc == 0)) {
+ nvme_bdev_attach_bdev_to_ns(nvme_bdev->nvme_ns, nvme_bdev);
+ } else {
+ SPDK_ERRLOG("Failed to register bdev %s\n", nvme_bdev->disk.name);
+ }
+
+ bdev_ocssd_create_complete(create_ctx, rc);
+}
+
+static void
+bdev_occsd_init_zone_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct bdev_ocssd_create_ctx *create_ctx = ctx;
+ struct bdev_ocssd_zone *ocssd_zone;
+ struct ocssd_bdev *ocssd_bdev = create_ctx->ocssd_bdev;
+ struct spdk_bdev_zone_info zone_info = {};
+ uint64_t offset;
+ int rc = 0;
+
+ if (spdk_nvme_cpl_is_error(cpl)) {
+ SPDK_ERRLOG("Chunk information log page failed\n");
+ bdev_ocssd_create_complete(create_ctx, -EIO);
+ return;
+ }
+
+ for (offset = 0; offset < create_ctx->num_chunks; ++offset) {
+ bdev_ocssd_fill_zone_info(ocssd_bdev, &zone_info, &create_ctx->chunk_info[offset]);
+
+ ocssd_zone = bdev_ocssd_get_zone_by_slba(ocssd_bdev, zone_info.zone_id);
+ if (!ocssd_zone) {
+ SPDK_ERRLOG("Received invalid zone starting LBA: %"PRIu64"\n",
+ zone_info.zone_id);
+ bdev_ocssd_create_complete(create_ctx, -EINVAL);
+ return;
+ }
+
+ /* Make sure we're not filling the same zone twice */
+ assert(ocssd_zone->busy);
+
+ ocssd_zone->busy = false;
+ ocssd_zone->slba = zone_info.zone_id;
+ ocssd_zone->capacity = zone_info.capacity;
+ ocssd_zone->write_pointer = zone_info.write_pointer;
+ }
+
+ create_ctx->chunk_offset += create_ctx->num_chunks;
+ if (create_ctx->chunk_offset < create_ctx->end_chunk_offset) {
+ rc = bdev_ocssd_init_zone(create_ctx);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to send chunk info log page\n");
+ bdev_ocssd_create_complete(create_ctx, rc);
+ }
+ } else {
+ /* Make sure all zones have been processed */
+ for (offset = 0; offset < bdev_ocssd_num_zones(ocssd_bdev); ++offset) {
+ assert(!ocssd_bdev->zones[offset].busy);
+ }
+
+ /* Schedule the last bit of work (io_device, bdev registration) to be done in a
+ * context that is not tied to admin command's completion callback.
+ */
+ spdk_thread_send_msg(spdk_get_thread(), bdev_ocssd_register_bdev, create_ctx);
+ }
+}
+
+static int
+bdev_ocssd_init_zone(struct bdev_ocssd_create_ctx *create_ctx)
+{
+ struct ocssd_bdev *ocssd_bdev = create_ctx->ocssd_bdev;
+ struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
+
+ create_ctx->num_chunks = spdk_min(create_ctx->end_chunk_offset - create_ctx->chunk_offset,
+ OCSSD_BDEV_CHUNK_INFO_COUNT);
+ assert(create_ctx->num_chunks > 0);
+
+ return spdk_nvme_ctrlr_cmd_get_log_page(nvme_bdev->nvme_bdev_ctrlr->ctrlr,
+ SPDK_OCSSD_LOG_CHUNK_INFO,
+ spdk_nvme_ns_get_id(nvme_bdev->nvme_ns->ns),
+ &create_ctx->chunk_info,
+ sizeof(create_ctx->chunk_info[0]) *
+ create_ctx->num_chunks,
+ sizeof(create_ctx->chunk_info[0]) *
+ create_ctx->chunk_offset,
+ bdev_occsd_init_zone_cb, create_ctx);
+}
+
+static int
+bdev_ocssd_init_zones(struct bdev_ocssd_create_ctx *create_ctx)
+{
+ struct ocssd_bdev *ocssd_bdev = create_ctx->ocssd_bdev;
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_bdev(ocssd_bdev);
+ struct spdk_bdev *bdev = &ocssd_bdev->nvme_bdev.disk;
+ uint64_t offset;
+
+ ocssd_bdev->zones = calloc(bdev_ocssd_num_zones(ocssd_bdev), sizeof(*ocssd_bdev->zones));
+ if (!ocssd_bdev->zones) {
+ return -ENOMEM;
+ }
+
+ create_ctx->chunk_offset = ocssd_bdev->range.begin * ocssd_ns->geometry.num_chk;
+ create_ctx->end_chunk_offset = create_ctx->chunk_offset + bdev->blockcnt / bdev->zone_size;
+
+ /* Mark all zones as busy and clear it as their info is filled */
+ for (offset = 0; offset < bdev_ocssd_num_zones(ocssd_bdev); ++offset) {
+ ocssd_bdev->zones[offset].busy = true;
+ }
+
+ return bdev_ocssd_init_zone(create_ctx);
+}
+
+static bool
+bdev_ocssd_verify_range(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid,
+ const struct bdev_ocssd_range *range)
+{
+ struct nvme_bdev_ns *nvme_ns = nvme_bdev_ctrlr->namespaces[nsid - 1];
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ const struct spdk_ocssd_geometry_data *geometry = &ocssd_ns->geometry;
+ struct ocssd_bdev *ocssd_bdev;
+ struct nvme_bdev *nvme_bdev;
+ size_t num_punits = geometry->num_pu * geometry->num_grp;
+
+ /* First verify the range is within the geometry */
+ if (range != NULL && (range->begin > range->end || range->end >= num_punits)) {
+ return false;
+ }
+
+ TAILQ_FOREACH(nvme_bdev, &nvme_ns->bdevs, tailq) {
+ ocssd_bdev = SPDK_CONTAINEROF(nvme_bdev, struct ocssd_bdev, nvme_bdev);
+
+ /* Only verify bdevs created on the same namespace */
+ if (spdk_nvme_ns_get_id(nvme_bdev->nvme_ns->ns) != nsid) {
+ continue;
+ }
+
+ /* Empty range means whole namespace should be used */
+ if (range == NULL) {
+ return false;
+ }
+
+ /* Make sure the range doesn't overlap with any other range */
+ if (range->begin <= ocssd_bdev->range.end &&
+ range->end >= ocssd_bdev->range.begin) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void
+bdev_ocssd_create_bdev(const char *ctrlr_name, const char *bdev_name, uint32_t nsid,
+ const struct bdev_ocssd_range *range, bdev_ocssd_create_cb cb_fn,
+ void *cb_arg)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct bdev_ocssd_create_ctx *create_ctx = NULL;
+ struct nvme_bdev *nvme_bdev = NULL;
+ struct ocssd_bdev *ocssd_bdev = NULL;
+ struct spdk_nvme_ns *ns;
+ struct nvme_bdev_ns *nvme_ns;
+ struct bdev_ocssd_ns *ocssd_ns;
+ struct spdk_ocssd_geometry_data *geometry;
+ int rc = 0;
+
+ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(ctrlr_name);
+ if (!nvme_bdev_ctrlr) {
+ SPDK_ERRLOG("Unable to find controller %s\n", ctrlr_name);
+ rc = -ENODEV;
+ goto error;
+ }
+
+ ns = spdk_nvme_ctrlr_get_ns(nvme_bdev_ctrlr->ctrlr, nsid);
+ if (!ns) {
+ SPDK_ERRLOG("Unable to retrieve namespace %"PRIu32"\n", nsid);
+ rc = -ENODEV;
+ goto error;
+ }
+
+ if (!spdk_nvme_ns_is_active(ns)) {
+ SPDK_ERRLOG("Namespace %"PRIu32" is inactive\n", nsid);
+ rc = -EACCES;
+ goto error;
+ }
+
+ assert(nsid <= nvme_bdev_ctrlr->num_ns);
+ nvme_ns = nvme_bdev_ctrlr->namespaces[nsid - 1];
+ if (nvme_ns == NULL) {
+ SPDK_ERRLOG("Namespace %"PRIu32" is not initialized\n", nsid);
+ rc = -EINVAL;
+ goto error;
+ }
+
+ ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ if (ocssd_ns == NULL) {
+ SPDK_ERRLOG("Namespace %"PRIu32" is not an OCSSD namespace\n", nsid);
+ rc = -EINVAL;
+ goto error;
+ }
+
+ if (spdk_bdev_get_by_name(bdev_name) != NULL) {
+ SPDK_ERRLOG("Device with provided name (%s) already exists\n", bdev_name);
+ rc = -EEXIST;
+ goto error;
+ }
+
+ if (!bdev_ocssd_verify_range(nvme_bdev_ctrlr, nsid, range)) {
+ SPDK_ERRLOG("Invalid parallel unit range\n");
+ rc = -EINVAL;
+ goto error;
+ }
+
+ ocssd_bdev = calloc(1, sizeof(*ocssd_bdev));
+ if (!ocssd_bdev) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ create_ctx = calloc(1, sizeof(*create_ctx));
+ if (!create_ctx) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ create_ctx->ocssd_bdev = ocssd_bdev;
+ create_ctx->cb_fn = cb_fn;
+ create_ctx->cb_arg = cb_arg;
+ create_ctx->range = range;
+
+ nvme_bdev = &ocssd_bdev->nvme_bdev;
+ nvme_bdev->nvme_ns = nvme_ns;
+ nvme_bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr;
+ geometry = &ocssd_ns->geometry;
+
+ if (range != NULL) {
+ ocssd_bdev->range = *range;
+ } else {
+ ocssd_bdev->range.begin = 0;
+ ocssd_bdev->range.end = geometry->num_grp * geometry->num_pu - 1;
+ }
+
+ nvme_bdev->disk.name = strdup(bdev_name);
+ if (!nvme_bdev->disk.name) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ nvme_bdev->disk.product_name = "Open Channel SSD";
+ nvme_bdev->disk.ctxt = ocssd_bdev;
+ nvme_bdev->disk.fn_table = &ocssdlib_fn_table;
+ nvme_bdev->disk.module = &ocssd_if;
+ nvme_bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
+ nvme_bdev->disk.zoned = true;
+ nvme_bdev->disk.blockcnt = bdev_ocssd_num_parallel_units(ocssd_bdev) *
+ geometry->num_chk * geometry->clba;
+ nvme_bdev->disk.zone_size = geometry->clba;
+ nvme_bdev->disk.max_open_zones = geometry->maxoc;
+ nvme_bdev->disk.optimal_open_zones = bdev_ocssd_num_parallel_units(ocssd_bdev);
+ nvme_bdev->disk.write_unit_size = geometry->ws_opt;
+ nvme_bdev->disk.media_events = true;
+
+ if (geometry->maxocpu != 0 && geometry->maxocpu != geometry->maxoc) {
+ SPDK_WARNLOG("Maximum open chunks per PU is not zero. Reducing the maximum "
+ "number of open zones: %"PRIu32" -> %"PRIu32"\n",
+ geometry->maxoc, geometry->maxocpu);
+ nvme_bdev->disk.max_open_zones = geometry->maxocpu;
+ }
+
+ rc = bdev_ocssd_init_zones(create_ctx);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to initialize zones on bdev %s\n", nvme_bdev->disk.name);
+ goto error;
+ }
+
+ return;
+error:
+ bdev_ocssd_free_bdev(ocssd_bdev);
+ cb_fn(NULL, rc, cb_arg);
+ free(create_ctx);
+}
+
+struct bdev_ocssd_delete_ctx {
+ bdev_ocssd_delete_cb cb_fn;
+ void *cb_arg;
+};
+
+static void
+bdev_ocssd_unregister_cb(void *cb_arg, int status)
+{
+ struct bdev_ocssd_delete_ctx *delete_ctx = cb_arg;
+
+ delete_ctx->cb_fn(status, delete_ctx->cb_arg);
+ free(delete_ctx);
+}
+
+void
+bdev_ocssd_delete_bdev(const char *bdev_name, bdev_ocssd_delete_cb cb_fn, void *cb_arg)
+{
+ struct spdk_bdev *bdev;
+ struct bdev_ocssd_delete_ctx *delete_ctx;
+
+ bdev = spdk_bdev_get_by_name(bdev_name);
+ if (!bdev) {
+ SPDK_ERRLOG("Unable to find bdev %s\n", bdev_name);
+ cb_fn(-ENODEV, cb_arg);
+ return;
+ }
+
+ if (bdev->module != &ocssd_if) {
+ SPDK_ERRLOG("Specified bdev %s is not an OCSSD bdev\n", bdev_name);
+ cb_fn(-EINVAL, cb_arg);
+ return;
+ }
+
+ delete_ctx = calloc(1, sizeof(*delete_ctx));
+ if (!delete_ctx) {
+ SPDK_ERRLOG("Unable to allocate deletion context\n");
+ cb_fn(-ENOMEM, cb_arg);
+ return;
+ }
+
+ delete_ctx->cb_fn = cb_fn;
+ delete_ctx->cb_arg = cb_arg;
+
+ spdk_bdev_unregister(bdev, bdev_ocssd_unregister_cb, delete_ctx);
+}
+
+struct bdev_ocssd_populate_ns_ctx {
+ struct nvme_async_probe_ctx *nvme_ctx;
+ struct nvme_bdev_ns *nvme_ns;
+};
+
+static void
+bdev_ocssd_geometry_cb(void *_ctx, const struct spdk_nvme_cpl *cpl)
+{
+ struct bdev_ocssd_populate_ns_ctx *ctx = _ctx;
+ struct nvme_bdev_ns *nvme_ns = ctx->nvme_ns;
+ struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
+ int rc = 0;
+
+ if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
+ SPDK_ERRLOG("Failed to retrieve geometry for namespace %"PRIu32"\n", nvme_ns->id);
+ free(nvme_ns->type_ctx);
+ nvme_ns->type_ctx = NULL;
+ rc = -EIO;
+ } else {
+ ocssd_ns->lba_offsets.lbk = 0;
+ ocssd_ns->lba_offsets.chk = ocssd_ns->lba_offsets.lbk +
+ ocssd_ns->geometry.lbaf.lbk_len;
+ ocssd_ns->lba_offsets.pu = ocssd_ns->lba_offsets.chk +
+ ocssd_ns->geometry.lbaf.chk_len;
+ ocssd_ns->lba_offsets.grp = ocssd_ns->lba_offsets.pu +
+ ocssd_ns->geometry.lbaf.pu_len;
+ ocssd_ns->chunk_notify_pending = true;
+ }
+
+ nvme_ctrlr_populate_namespace_done(ctx->nvme_ctx, nvme_ns, rc);
+ free(ctx);
+}
+
+void
+bdev_ocssd_populate_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_bdev_ns *nvme_ns,
+ struct nvme_async_probe_ctx *nvme_ctx)
+{
+ struct bdev_ocssd_ns *ocssd_ns;
+ struct bdev_ocssd_populate_ns_ctx *ctx;
+ struct spdk_nvme_ns *ns;
+ int rc;
+
+ ns = spdk_nvme_ctrlr_get_ns(nvme_bdev_ctrlr->ctrlr, nvme_ns->id);
+ if (ns == NULL) {
+ nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, -EINVAL);
+ return;
+ }
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, -ENOMEM);
+ return;
+ }
+
+ ocssd_ns = calloc(1, sizeof(*ocssd_ns));
+ if (ocssd_ns == NULL) {
+ nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, -ENOMEM);
+ free(ctx);
+ return;
+ }
+
+ nvme_ns->type_ctx = ocssd_ns;
+ nvme_ns->ns = ns;
+ ctx->nvme_ctx = nvme_ctx;
+ ctx->nvme_ns = nvme_ns;
+
+ rc = spdk_nvme_ocssd_ctrlr_cmd_geometry(nvme_bdev_ctrlr->ctrlr, nvme_ns->id,
+ &ocssd_ns->geometry,
+ sizeof(ocssd_ns->geometry),
+ bdev_ocssd_geometry_cb, ctx);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to retrieve OC geometry: %s\n", spdk_strerror(-rc));
+ nvme_ns->type_ctx = NULL;
+ nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, rc);
+ free(ocssd_ns);
+ free(ctx);
+ }
+}
+
+void
+bdev_ocssd_depopulate_namespace(struct nvme_bdev_ns *ns)
+{
+ struct bdev_ocssd_ns *ocssd_ns;
+
+ ocssd_ns = bdev_ocssd_get_ns_from_nvme(ns);
+
+ /* If there are outstanding admin requests, we cannot free the context
+ * here, as they'd write over deallocated memory. Clear the populated
+ * flag, so that the completion callback knows that the namespace is
+ * being depopulated and finish its deallocation once all requests are
+ * completed.
+ */
+ ns->populated = false;
+ if (ocssd_ns->num_outstanding == 0) {
+ bdev_ocssd_free_namespace(ns);
+ }
+}
+
+int
+bdev_ocssd_create_io_channel(struct nvme_io_channel *ioch)
+{
+ struct ocssd_io_channel *ocssd_ioch;
+
+ ocssd_ioch = calloc(1, sizeof(*ocssd_ioch));
+ if (ocssd_ioch == NULL) {
+ return -ENOMEM;
+ }
+
+ ocssd_ioch->pending_poller = SPDK_POLLER_REGISTER(bdev_ocssd_poll_pending,
+ spdk_io_channel_from_ctx(ioch), 0);
+ if (ocssd_ioch->pending_poller == NULL) {
+ SPDK_ERRLOG("Failed to register pending requests poller\n");
+ free(ocssd_ioch);
+ return -ENOMEM;
+ }
+
+ /* Start the poller paused and only resume it once there are pending requests */
+ spdk_poller_pause(ocssd_ioch->pending_poller);
+
+ TAILQ_INIT(&ocssd_ioch->pending_requests);
+ ioch->ocssd_ioch = ocssd_ioch;
+
+ return 0;
+}
+
+void
+bdev_ocssd_destroy_io_channel(struct nvme_io_channel *ioch)
+{
+ spdk_poller_unregister(&ioch->ocssd_ioch->pending_poller);
+ free(ioch->ocssd_ioch);
+}
+
+int
+bdev_ocssd_init_ctrlr(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ struct ocssd_bdev_ctrlr *ocssd_ctrlr;
+
+ ocssd_ctrlr = calloc(1, sizeof(*ocssd_ctrlr));
+ if (!ocssd_ctrlr) {
+ return -ENOMEM;
+ }
+
+ ocssd_ctrlr->mm_poller = SPDK_POLLER_REGISTER(bdev_ocssd_poll_mm, nvme_bdev_ctrlr,
+ 10000ULL);
+ if (!ocssd_ctrlr->mm_poller) {
+ free(ocssd_ctrlr);
+ return -ENOMEM;
+ }
+
+ nvme_bdev_ctrlr->ocssd_ctrlr = ocssd_ctrlr;
+
+ return 0;
+}
+
+void
+bdev_ocssd_fini_ctrlr(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ spdk_poller_unregister(&nvme_bdev_ctrlr->ocssd_ctrlr->mm_poller);
+ free(nvme_bdev_ctrlr->ocssd_ctrlr);
+ nvme_bdev_ctrlr->ocssd_ctrlr = NULL;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_ocssd", SPDK_LOG_BDEV_OCSSD)
diff --git a/src/spdk/module/bdev/nvme/bdev_ocssd.h b/src/spdk/module/bdev/nvme/bdev_ocssd.h
new file mode 100644
index 000000000..89e5a3058
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_ocssd.h
@@ -0,0 +1,67 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_OCSSD_H
+#define SPDK_BDEV_OCSSD_H
+
+#include "spdk/stdinc.h"
+#include "common.h"
+
+struct bdev_ocssd_range {
+ uint64_t begin;
+ uint64_t end;
+};
+
+typedef void (*bdev_ocssd_create_cb)(const char *bdev_name, int status, void *ctx);
+typedef void (*bdev_ocssd_delete_cb)(int status, void *ctx);
+
+void bdev_ocssd_create_bdev(const char *ctrlr_name, const char *bdev_name, uint32_t nsid,
+ const struct bdev_ocssd_range *range,
+ bdev_ocssd_create_cb cb_fn, void *cb_arg);
+void bdev_ocssd_delete_bdev(const char *bdev_name, bdev_ocssd_delete_cb cb_fn, void *cb_arg);
+
+void bdev_ocssd_populate_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
+ struct nvme_bdev_ns *nvme_ns,
+ struct nvme_async_probe_ctx *ctx);
+void bdev_ocssd_depopulate_namespace(struct nvme_bdev_ns *ns);
+void bdev_ocssd_namespace_config_json(struct spdk_json_write_ctx *w, struct nvme_bdev_ns *ns);
+
+int bdev_ocssd_create_io_channel(struct nvme_io_channel *ioch);
+void bdev_ocssd_destroy_io_channel(struct nvme_io_channel *ioch);
+
+int bdev_ocssd_init_ctrlr(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+void bdev_ocssd_fini_ctrlr(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+
+void bdev_ocssd_handle_chunk_notification(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+
+#endif /* SPDK_BDEV_OCSSD_H */
diff --git a/src/spdk/module/bdev/nvme/bdev_ocssd_rpc.c b/src/spdk/module/bdev/nvme/bdev_ocssd_rpc.c
new file mode 100644
index 000000000..47c5acdb3
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/bdev_ocssd_rpc.c
@@ -0,0 +1,197 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/rpc.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/log.h"
+#include "spdk/likely.h"
+#include "bdev_ocssd.h"
+
+#define BDEV_OCSSD_DEFAULT_NSID 1
+
+struct rpc_create_ocssd_bdev {
+ char *ctrlr_name;
+ char *bdev_name;
+ uint32_t nsid;
+ char *range;
+};
+
+static const struct spdk_json_object_decoder rpc_create_ocssd_bdev_decoders[] = {
+ {"ctrlr_name", offsetof(struct rpc_create_ocssd_bdev, ctrlr_name), spdk_json_decode_string},
+ {"bdev_name", offsetof(struct rpc_create_ocssd_bdev, bdev_name), spdk_json_decode_string},
+ {"nsid", offsetof(struct rpc_create_ocssd_bdev, nsid), spdk_json_decode_uint32, true},
+ {"range", offsetof(struct rpc_create_ocssd_bdev, range), spdk_json_decode_string, true},
+};
+
+static void
+free_rpc_create_ocssd_bdev(struct rpc_create_ocssd_bdev *rpc)
+{
+ free(rpc->ctrlr_name);
+ free(rpc->bdev_name);
+ free(rpc->range);
+}
+
+struct rpc_bdev_ocssd_create_ctx {
+ struct spdk_jsonrpc_request *request;
+ struct rpc_create_ocssd_bdev rpc;
+ struct bdev_ocssd_range range;
+};
+
+static void
+rpc_bdev_ocssd_create_done(const char *bdev_name, int status, void *_ctx)
+{
+ struct rpc_bdev_ocssd_create_ctx *ctx = _ctx;
+ struct spdk_json_write_ctx *w;
+
+ if (status != 0) {
+ spdk_jsonrpc_send_error_response(ctx->request, status, spdk_strerror(-status));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(ctx->request);
+ spdk_json_write_string(w, bdev_name);
+ spdk_jsonrpc_end_result(ctx->request, w);
+out:
+ free_rpc_create_ocssd_bdev(&ctx->rpc);
+ free(ctx);
+}
+
+static void
+rpc_bdev_ocssd_create(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ struct rpc_bdev_ocssd_create_ctx *ctx;
+ struct bdev_ocssd_range *range = NULL;
+ int rc;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
+ return;
+ }
+
+ ctx->rpc.nsid = BDEV_OCSSD_DEFAULT_NSID;
+ ctx->request = request;
+
+ if (spdk_json_decode_object(params, rpc_create_ocssd_bdev_decoders,
+ SPDK_COUNTOF(rpc_create_ocssd_bdev_decoders),
+ &ctx->rpc)) {
+ spdk_jsonrpc_send_error_response(request, -EINVAL, "Failed to parse the request");
+ goto out;
+ }
+
+ if (ctx->rpc.range != NULL) {
+ rc = sscanf(ctx->rpc.range, "%"PRIu64"-%"PRIu64,
+ &ctx->range.begin, &ctx->range.end);
+ if (rc != 2) {
+ spdk_jsonrpc_send_error_response(request, -EINVAL, "Failed to parse range");
+ goto out;
+ }
+
+ range = &ctx->range;
+ }
+
+ bdev_ocssd_create_bdev(ctx->rpc.ctrlr_name, ctx->rpc.bdev_name, ctx->rpc.nsid,
+ range, rpc_bdev_ocssd_create_done, ctx);
+ return;
+out:
+ free_rpc_create_ocssd_bdev(&ctx->rpc);
+ free(ctx);
+}
+
+SPDK_RPC_REGISTER("bdev_ocssd_create", rpc_bdev_ocssd_create, SPDK_RPC_RUNTIME)
+
+struct rpc_delete_ocssd_bdev {
+ char *name;
+};
+
+static const struct spdk_json_object_decoder rpc_delete_ocssd_bdev_decoders[] = {
+ {"name", offsetof(struct rpc_delete_ocssd_bdev, name), spdk_json_decode_string},
+};
+
+static void
+free_rpc_delete_ocssd_bdev(struct rpc_delete_ocssd_bdev *rpc)
+{
+ free(rpc->name);
+}
+
+struct rpc_bdev_ocssd_delete_ctx {
+ struct spdk_jsonrpc_request *request;
+ struct rpc_delete_ocssd_bdev rpc;
+};
+
+static void
+rpc_bdev_ocssd_delete_done(int status, void *_ctx)
+{
+ struct rpc_bdev_ocssd_delete_ctx *ctx = _ctx;
+ struct spdk_json_write_ctx *w;
+
+ if (status != 0) {
+ spdk_jsonrpc_send_error_response(ctx->request, status, spdk_strerror(-status));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(ctx->request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(ctx->request, w);
+out:
+ free_rpc_delete_ocssd_bdev(&ctx->rpc);
+ free(ctx);
+}
+
+static void
+rpc_bdev_ocssd_delete(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ struct rpc_bdev_ocssd_delete_ctx *ctx;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
+ return;
+ }
+
+ ctx->request = request;
+ if (spdk_json_decode_object(params, rpc_delete_ocssd_bdev_decoders,
+ SPDK_COUNTOF(rpc_delete_ocssd_bdev_decoders),
+ &ctx->rpc)) {
+ spdk_jsonrpc_send_error_response(request, -EINVAL, "Failed to parse the request");
+ free_rpc_delete_ocssd_bdev(&ctx->rpc);
+ free(ctx);
+ return;
+ }
+
+ bdev_ocssd_delete_bdev(ctx->rpc.name, rpc_bdev_ocssd_delete_done, ctx);
+}
+
+SPDK_RPC_REGISTER("bdev_ocssd_delete", rpc_bdev_ocssd_delete, SPDK_RPC_RUNTIME)
diff --git a/src/spdk/module/bdev/nvme/common.c b/src/spdk/module/bdev/nvme/common.c
new file mode 100644
index 000000000..c895f1102
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/common.c
@@ -0,0 +1,204 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/env.h"
+#include "bdev_ocssd.h"
+#include "common.h"
+
+struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
+pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
+bool g_bdev_nvme_module_finish;
+
+struct nvme_bdev_ctrlr *
+nvme_bdev_ctrlr_get(const struct spdk_nvme_transport_id *trid)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
+ if (spdk_nvme_transport_id_compare(trid, nvme_bdev_ctrlr->trid) == 0) {
+ return nvme_bdev_ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+struct nvme_bdev_ctrlr *
+nvme_bdev_ctrlr_get_by_name(const char *name)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+
+ if (name == NULL) {
+ return NULL;
+ }
+
+ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
+ if (strcmp(name, nvme_bdev_ctrlr->name) == 0) {
+ return nvme_bdev_ctrlr;
+ }
+ }
+
+ return NULL;
+}
+
+struct nvme_bdev_ctrlr *
+nvme_bdev_first_ctrlr(void)
+{
+ return TAILQ_FIRST(&g_nvme_bdev_ctrlrs);
+}
+
+struct nvme_bdev_ctrlr *
+nvme_bdev_next_ctrlr(struct nvme_bdev_ctrlr *prev)
+{
+ return TAILQ_NEXT(prev, tailq);
+}
+
+void
+nvme_bdev_dump_trid_json(struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
+{
+ const char *trtype_str;
+ const char *adrfam_str;
+
+ trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
+ if (trtype_str) {
+ spdk_json_write_named_string(w, "trtype", trtype_str);
+ }
+
+ adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
+ if (adrfam_str) {
+ spdk_json_write_named_string(w, "adrfam", adrfam_str);
+ }
+
+ if (trid->traddr[0] != '\0') {
+ spdk_json_write_named_string(w, "traddr", trid->traddr);
+ }
+
+ if (trid->trsvcid[0] != '\0') {
+ spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
+ }
+
+ if (trid->subnqn[0] != '\0') {
+ spdk_json_write_named_string(w, "subnqn", trid->subnqn);
+ }
+}
+
+static void
+nvme_bdev_unregister_cb(void *io_device)
+{
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = io_device;
+ uint32_t i;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq);
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ spdk_nvme_detach(nvme_bdev_ctrlr->ctrlr);
+ spdk_poller_unregister(&nvme_bdev_ctrlr->adminq_timer_poller);
+ free(nvme_bdev_ctrlr->name);
+ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
+ free(nvme_bdev_ctrlr->namespaces[i]);
+ }
+ free(nvme_bdev_ctrlr->namespaces);
+ free(nvme_bdev_ctrlr->trid);
+ free(nvme_bdev_ctrlr);
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
+ spdk_bdev_module_finish_done();
+ return;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
+
+int
+nvme_bdev_ctrlr_destruct(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
+{
+ assert(nvme_bdev_ctrlr->destruct);
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+
+ /* If we have already registered a poller, let that one take care of it. */
+ if (nvme_bdev_ctrlr->destruct_poller != NULL) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return SPDK_POLLER_IDLE;
+ }
+
+ if (nvme_bdev_ctrlr->resetting) {
+ nvme_bdev_ctrlr->destruct_poller =
+ SPDK_POLLER_REGISTER((spdk_poller_fn)nvme_bdev_ctrlr_destruct, nvme_bdev_ctrlr, 1000);
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ return SPDK_POLLER_BUSY;
+ }
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+
+ spdk_poller_unregister(&nvme_bdev_ctrlr->destruct_poller);
+ if (nvme_bdev_ctrlr->opal_dev) {
+ spdk_opal_dev_destruct(nvme_bdev_ctrlr->opal_dev);
+ nvme_bdev_ctrlr->opal_dev = NULL;
+ }
+
+ if (nvme_bdev_ctrlr->ocssd_ctrlr) {
+ bdev_ocssd_fini_ctrlr(nvme_bdev_ctrlr);
+ }
+
+ spdk_io_device_unregister(nvme_bdev_ctrlr, nvme_bdev_unregister_cb);
+ return SPDK_POLLER_BUSY;
+}
+
+void
+nvme_bdev_attach_bdev_to_ns(struct nvme_bdev_ns *nvme_ns, struct nvme_bdev *nvme_disk)
+{
+ nvme_ns->ctrlr->ref++;
+
+ TAILQ_INSERT_TAIL(&nvme_ns->bdevs, nvme_disk, tailq);
+}
+
+void
+nvme_bdev_detach_bdev_from_ns(struct nvme_bdev *nvme_disk)
+{
+ struct nvme_bdev_ctrlr *ctrlr = nvme_disk->nvme_ns->ctrlr;
+
+ pthread_mutex_lock(&g_bdev_nvme_mutex);
+ ctrlr->ref--;
+
+ TAILQ_REMOVE(&nvme_disk->nvme_ns->bdevs, nvme_disk, tailq);
+
+ if (ctrlr->ref == 0 && ctrlr->destruct) {
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+ nvme_bdev_ctrlr_destruct(ctrlr);
+ return;
+ }
+
+ pthread_mutex_unlock(&g_bdev_nvme_mutex);
+}
diff --git a/src/spdk/module/bdev/nvme/common.h b/src/spdk/module/bdev/nvme/common.h
new file mode 100644
index 000000000..c710507a1
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/common.h
@@ -0,0 +1,163 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_COMMON_BDEV_NVME_H
+#define SPDK_COMMON_BDEV_NVME_H
+
+#include "spdk/nvme.h"
+#include "spdk/bdev_module.h"
+#include "spdk/opal.h"
+
+TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr);
+extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs;
+extern pthread_mutex_t g_bdev_nvme_mutex;
+extern bool g_bdev_nvme_module_finish;
+
+#define NVME_MAX_CONTROLLERS 1024
+
+enum nvme_bdev_ns_type {
+ NVME_BDEV_NS_UNKNOWN = 0,
+ NVME_BDEV_NS_STANDARD = 1,
+ NVME_BDEV_NS_OCSSD = 2,
+};
+
+struct nvme_bdev_ns {
+ uint32_t id;
+ enum nvme_bdev_ns_type type;
+ /** Marks whether this data structure has its bdevs
+ * populated for the associated namespace. It is used
+ * to keep track if we need manage the populated
+ * resources when a newly active namespace is found,
+ * or when a namespace becomes inactive.
+ */
+ bool populated;
+ struct spdk_nvme_ns *ns;
+ struct nvme_bdev_ctrlr *ctrlr;
+ TAILQ_HEAD(, nvme_bdev) bdevs;
+ void *type_ctx;
+};
+
+struct ocssd_bdev_ctrlr;
+
+struct nvme_bdev_ctrlr {
+ /**
+ * points to pinned, physically contiguous memory region;
+ * contains 4KB IDENTIFY structure for controller which is
+ * target for CONTROLLER IDENTIFY command during initialization
+ */
+ struct spdk_nvme_ctrlr *ctrlr;
+ struct spdk_nvme_transport_id *trid;
+ char *name;
+ int ref;
+ bool resetting;
+ bool destruct;
+ /**
+ * PI check flags. This flags is set to NVMe controllers created only
+ * through bdev_nvme_attach_controller RPC or .INI config file. Hot added
+ * NVMe controllers are not included.
+ */
+ uint32_t prchk_flags;
+ uint32_t num_ns;
+ /** Array of pointers to namespaces indexed by nsid - 1 */
+ struct nvme_bdev_ns **namespaces;
+
+ struct spdk_opal_dev *opal_dev;
+
+ struct spdk_poller *adminq_timer_poller;
+ struct spdk_poller *destruct_poller;
+ struct spdk_thread *thread;
+
+ struct ocssd_bdev_ctrlr *ocssd_ctrlr;
+
+ /** linked list pointer for device list */
+ TAILQ_ENTRY(nvme_bdev_ctrlr) tailq;
+};
+
+struct nvme_bdev {
+ struct spdk_bdev disk;
+ struct nvme_bdev_ns *nvme_ns;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ TAILQ_ENTRY(nvme_bdev) tailq;
+};
+
+struct nvme_bdev_poll_group {
+ struct spdk_nvme_poll_group *group;
+ struct spdk_poller *poller;
+ bool collect_spin_stat;
+ uint64_t spin_ticks;
+ uint64_t start_ticks;
+ uint64_t end_ticks;
+};
+
+typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc);
+
+struct nvme_async_probe_ctx {
+ struct spdk_nvme_probe_ctx *probe_ctx;
+ const char *base_name;
+ const char **names;
+ uint32_t count;
+ uint32_t prchk_flags;
+ struct spdk_poller *poller;
+ struct spdk_nvme_transport_id trid;
+ struct spdk_nvme_ctrlr_opts opts;
+ spdk_bdev_create_nvme_fn cb_fn;
+ void *cb_ctx;
+ uint32_t populates_in_progress;
+};
+
+struct ocssd_io_channel;
+
+struct nvme_io_channel {
+ struct spdk_nvme_qpair *qpair;
+ struct nvme_bdev_poll_group *group;
+ TAILQ_HEAD(, spdk_bdev_io) pending_resets;
+ struct ocssd_io_channel *ocssd_ioch;
+};
+
+void nvme_ctrlr_populate_namespace_done(struct nvme_async_probe_ctx *ctx,
+ struct nvme_bdev_ns *ns, int rc);
+void nvme_ctrlr_depopulate_namespace_done(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+
+struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get(const struct spdk_nvme_transport_id *trid);
+struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name);
+struct nvme_bdev_ctrlr *nvme_bdev_first_ctrlr(void);
+struct nvme_bdev_ctrlr *nvme_bdev_next_ctrlr(struct nvme_bdev_ctrlr *prev);
+
+void nvme_bdev_dump_trid_json(struct spdk_nvme_transport_id *trid,
+ struct spdk_json_write_ctx *w);
+
+int nvme_bdev_ctrlr_destruct(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+void nvme_bdev_attach_bdev_to_ns(struct nvme_bdev_ns *nvme_ns, struct nvme_bdev *nvme_disk);
+void nvme_bdev_detach_bdev_from_ns(struct nvme_bdev *nvme_disk);
+
+#endif /* SPDK_COMMON_BDEV_NVME_H */
diff --git a/src/spdk/module/bdev/nvme/nvme_rpc.c b/src/spdk/module/bdev/nvme/nvme_rpc.c
new file mode 100644
index 000000000..e6a938384
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/nvme_rpc.c
@@ -0,0 +1,492 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#include "bdev_nvme.h"
+#include "common.h"
+#include "spdk/base64.h"
+
+enum spdk_nvme_rpc_type {
+ NVME_ADMIN_CMD = 1,
+ NVME_IO_CMD,
+};
+
+struct rpc_bdev_nvme_send_cmd_req {
+ char *name;
+ int cmd_type;
+ int data_direction;
+ uint32_t timeout_ms;
+ uint32_t data_len;
+ uint32_t md_len;
+
+ struct spdk_nvme_cmd *cmdbuf;
+ char *data;
+ char *md;
+};
+
+struct rpc_bdev_nvme_send_cmd_resp {
+ char *cpl_text;
+ char *data_text;
+ char *md_text;
+};
+
+struct rpc_bdev_nvme_send_cmd_ctx {
+ struct spdk_jsonrpc_request *jsonrpc_request;
+ struct rpc_bdev_nvme_send_cmd_req req;
+ struct rpc_bdev_nvme_send_cmd_resp resp;
+ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
+ struct spdk_io_channel *ctrlr_io_ch;
+};
+
+static void
+free_rpc_bdev_nvme_send_cmd_ctx(struct rpc_bdev_nvme_send_cmd_ctx *ctx)
+{
+ assert(ctx != NULL);
+
+ free(ctx->req.name);
+ free(ctx->req.cmdbuf);
+ spdk_free(ctx->req.data);
+ spdk_free(ctx->req.md);
+ free(ctx->resp.cpl_text);
+ free(ctx->resp.data_text);
+ free(ctx->resp.md_text);
+ free(ctx);
+}
+
+static int
+rpc_bdev_nvme_send_cmd_resp_construct(struct rpc_bdev_nvme_send_cmd_resp *resp,
+ struct rpc_bdev_nvme_send_cmd_req *req,
+ const struct spdk_nvme_cpl *cpl)
+{
+ resp->cpl_text = malloc(spdk_base64_get_encoded_strlen(sizeof(*cpl)) + 1);
+ if (!resp->cpl_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->cpl_text, cpl, sizeof(*cpl));
+
+ if (req->data_direction == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ if (req->data_len) {
+ resp->data_text = malloc(spdk_base64_get_encoded_strlen(req->data_len) + 1);
+ if (!resp->data_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->data_text, req->data, req->data_len);
+ }
+ if (req->md_len) {
+ resp->md_text = malloc(spdk_base64_get_encoded_strlen(req->md_len) + 1);
+ if (!resp->md_text) {
+ return -ENOMEM;
+ }
+ spdk_base64_urlsafe_encode(resp->md_text, req->md, req->md_len);
+ }
+ }
+
+ return 0;
+}
+
+static void
+rpc_bdev_nvme_send_cmd_complete(struct rpc_bdev_nvme_send_cmd_ctx *ctx,
+ const struct spdk_nvme_cpl *cpl)
+{
+ struct spdk_jsonrpc_request *request = ctx->jsonrpc_request;
+ struct spdk_json_write_ctx *w;
+ int ret;
+
+ ret = rpc_bdev_nvme_send_cmd_resp_construct(&ctx->resp, &ctx->req, cpl);
+ if (ret) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ spdk_strerror(-ret));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_object_begin(w);
+ spdk_json_write_named_string(w, "cpl", ctx->resp.cpl_text);
+
+ if (ctx->resp.data_text) {
+ spdk_json_write_named_string(w, "data", ctx->resp.data_text);
+ }
+
+ if (ctx->resp.md_text) {
+ spdk_json_write_named_string(w, "metadata", ctx->resp.md_text);
+ }
+
+ spdk_json_write_object_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_nvme_send_cmd_ctx(ctx);
+ return;
+}
+
+static void
+nvme_rpc_bdev_nvme_cb(void *ref, const struct spdk_nvme_cpl *cpl)
+{
+ struct rpc_bdev_nvme_send_cmd_ctx *ctx = (struct rpc_bdev_nvme_send_cmd_ctx *)ref;
+
+ if (ctx->ctrlr_io_ch) {
+ spdk_put_io_channel(ctx->ctrlr_io_ch);
+ ctx->ctrlr_io_ch = NULL;
+ }
+
+ rpc_bdev_nvme_send_cmd_complete(ctx, cpl);
+}
+
+static int
+nvme_rpc_admin_cmd_bdev_nvme(struct rpc_bdev_nvme_send_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t nbytes, uint32_t timeout_ms)
+{
+ struct nvme_bdev_ctrlr *_nvme_ctrlr = ctx->nvme_bdev_ctrlr;
+ int ret;
+
+ ret = spdk_nvme_ctrlr_cmd_admin_raw(_nvme_ctrlr->ctrlr, cmd, buf,
+ nbytes, nvme_rpc_bdev_nvme_cb, ctx);
+
+ return ret;
+}
+
+static int
+nvme_rpc_io_cmd_bdev_nvme(struct rpc_bdev_nvme_send_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd,
+ void *buf, uint32_t nbytes, void *md_buf, uint32_t md_len,
+ uint32_t timeout_ms)
+{
+ struct nvme_bdev_ctrlr *_nvme_ctrlr = ctx->nvme_bdev_ctrlr;
+ struct spdk_nvme_qpair *io_qpair;
+ int ret;
+
+ ctx->ctrlr_io_ch = spdk_get_io_channel(_nvme_ctrlr->ctrlr);
+ io_qpair = bdev_nvme_get_io_qpair(ctx->ctrlr_io_ch);
+
+ ret = spdk_nvme_ctrlr_cmd_io_raw_with_md(_nvme_ctrlr->ctrlr, io_qpair,
+ cmd, buf, nbytes, md_buf, nvme_rpc_bdev_nvme_cb, ctx);
+ if (ret) {
+ spdk_put_io_channel(ctx->ctrlr_io_ch);
+ }
+
+ return ret;
+
+}
+
+static int
+rpc_bdev_nvme_send_cmd_exec(struct rpc_bdev_nvme_send_cmd_ctx *ctx)
+{
+ struct rpc_bdev_nvme_send_cmd_req *req = &ctx->req;
+ int ret = -EINVAL;
+
+ switch (req->cmd_type) {
+ case NVME_ADMIN_CMD:
+ ret = nvme_rpc_admin_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+ req->data_len, req->timeout_ms);
+ break;
+ case NVME_IO_CMD:
+ ret = nvme_rpc_io_cmd_bdev_nvme(ctx, req->cmdbuf, req->data,
+ req->data_len, req->md, req->md_len, req->timeout_ms);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+rpc_decode_cmd_type(const struct spdk_json_val *val, void *out)
+{
+ int *cmd_type = out;
+
+ if (spdk_json_strequal(val, "admin") == true) {
+ *cmd_type = NVME_ADMIN_CMD;
+ } else if (spdk_json_strequal(val, "io") == true) {
+ *cmd_type = NVME_IO_CMD;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: cmd_type\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+rpc_decode_data_direction(const struct spdk_json_val *val, void *out)
+{
+ int *data_direction = out;
+
+ if (spdk_json_strequal(val, "h2c") == true) {
+ *data_direction = SPDK_NVME_DATA_HOST_TO_CONTROLLER;
+ } else if (spdk_json_strequal(val, "c2h") == true) {
+ *data_direction = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
+ } else {
+ SPDK_NOTICELOG("Invalid parameter value: data_direction\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+rpc_decode_cmdbuf(const struct spdk_json_val *val, void *out)
+{
+ char *text = NULL;
+ size_t text_strlen, raw_len;
+ struct spdk_nvme_cmd *cmdbuf, **_cmdbuf = out;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+
+ text_strlen = strlen(text);
+ raw_len = spdk_base64_get_decoded_len(text_strlen);
+ cmdbuf = malloc(raw_len);
+ if (!cmdbuf) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = spdk_base64_urlsafe_decode(cmdbuf, &raw_len, text);
+ if (rc) {
+ free(cmdbuf);
+ goto out;
+ }
+ if (raw_len != sizeof(*cmdbuf)) {
+ rc = -EINVAL;
+ free(cmdbuf);
+ goto out;
+ }
+
+ *_cmdbuf = cmdbuf;
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_data(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_bdev_nvme_send_cmd_req *req = (struct rpc_bdev_nvme_send_cmd_req *)out;
+ char *text = NULL;
+ size_t text_strlen;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+ text_strlen = strlen(text);
+
+ if (req->data_len) {
+ /* data_len is decoded by param "data_len" */
+ if (req->data_len != spdk_base64_get_decoded_len(text_strlen)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ req->data_len = spdk_base64_get_decoded_len(text_strlen);
+ req->data = spdk_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000,
+ NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!req->data) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ rc = spdk_base64_urlsafe_decode(req->data, (size_t *)&req->data_len, text);
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_data_len(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_bdev_nvme_send_cmd_req *req = (struct rpc_bdev_nvme_send_cmd_req *)out;
+ uint32_t data_len;
+ int rc;
+
+ rc = spdk_json_decode_uint32(val, &data_len);
+ if (rc) {
+ return rc;
+ }
+
+ if (req->data_len) {
+ /* data_len is decoded by param "data" */
+ if (req->data_len != data_len) {
+ rc = -EINVAL;
+ }
+ } else {
+ req->data_len = data_len;
+ req->data = spdk_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000,
+ NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!req->data) {
+ rc = -ENOMEM;
+ }
+ }
+
+ return rc;
+}
+
+static int
+rpc_decode_metadata(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_bdev_nvme_send_cmd_req *req = (struct rpc_bdev_nvme_send_cmd_req *)out;
+ char *text = NULL;
+ size_t text_strlen;
+ int rc;
+
+ rc = spdk_json_decode_string(val, &text);
+ if (rc) {
+ return rc = val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL;
+ }
+ text_strlen = strlen(text);
+
+ if (req->md_len) {
+ /* md_len is decoded by param "metadata_len" */
+ if (req->md_len != spdk_base64_get_decoded_len(text_strlen)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ req->md_len = spdk_base64_get_decoded_len(text_strlen);
+ req->md = spdk_malloc(req->md_len, 0x1000, NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!req->md) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+
+ rc = spdk_base64_urlsafe_decode(req->md, (size_t *)&req->md_len, text);
+
+out:
+ free(text);
+ return rc;
+}
+
+static int
+rpc_decode_metadata_len(const struct spdk_json_val *val, void *out)
+{
+ struct rpc_bdev_nvme_send_cmd_req *req = (struct rpc_bdev_nvme_send_cmd_req *)out;
+ uint32_t md_len;
+ int rc;
+
+ rc = spdk_json_decode_uint32(val, &md_len);
+ if (rc) {
+ return rc;
+ }
+
+ if (req->md_len) {
+ /* md_len is decoded by param "metadata" */
+ if (req->md_len != md_len) {
+ rc = -EINVAL;
+ }
+ } else {
+ req->md_len = md_len;
+ req->md = spdk_malloc(req->md_len, 0x1000, NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!req->md) {
+ rc = -ENOMEM;
+ }
+ }
+
+ return rc;
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_send_cmd_req_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_nvme_send_cmd_req, name), spdk_json_decode_string},
+ {"cmd_type", offsetof(struct rpc_bdev_nvme_send_cmd_req, cmd_type), rpc_decode_cmd_type},
+ {"data_direction", offsetof(struct rpc_bdev_nvme_send_cmd_req, data_direction), rpc_decode_data_direction},
+ {"cmdbuf", offsetof(struct rpc_bdev_nvme_send_cmd_req, cmdbuf), rpc_decode_cmdbuf},
+ {"timeout_ms", offsetof(struct rpc_bdev_nvme_send_cmd_req, timeout_ms), spdk_json_decode_uint32, true},
+ {"data_len", 0, rpc_decode_data_len, true},
+ {"metadata_len", 0, rpc_decode_metadata_len, true},
+ {"data", 0, rpc_decode_data, true},
+ {"metadata", 0, rpc_decode_metadata, true},
+};
+
+static void
+rpc_bdev_nvme_send_cmd(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_send_cmd_ctx *ctx;
+ int ret, error_code;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx) {
+ SPDK_ERRLOG("Failed at Malloc ctx\n");
+ error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+ ret = -ENOMEM;
+ goto invalid;
+ }
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_send_cmd_req_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_send_cmd_req_decoders),
+ &ctx->req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ctx->nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(ctx->req.name);
+ if (ctx->nvme_bdev_ctrlr == NULL) {
+ SPDK_ERRLOG("Failed at device lookup\n");
+ error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS;
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ctx->jsonrpc_request = request;
+
+ ret = rpc_bdev_nvme_send_cmd_exec(ctx);
+ if (ret < 0) {
+ SPDK_NOTICELOG("Failed at rpc_bdev_nvme_send_cmd_exec\n");
+ error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR;
+ goto invalid;
+ }
+
+ return;
+
+invalid:
+ spdk_jsonrpc_send_error_response(request, error_code, spdk_strerror(-ret));
+ free_rpc_bdev_nvme_send_cmd_ctx(ctx);
+ return;
+}
+SPDK_RPC_REGISTER("bdev_nvme_send_cmd", rpc_bdev_nvme_send_cmd, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_nvme_send_cmd, send_nvme_cmd)
diff --git a/src/spdk/module/bdev/nvme/vbdev_opal.c b/src/spdk/module/bdev/nvme/vbdev_opal.c
new file mode 100644
index 000000000..68281c92b
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/vbdev_opal.c
@@ -0,0 +1,630 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/opal.h"
+#include "spdk/bdev_module.h"
+#include "vbdev_opal.h"
+#include "spdk_internal/log.h"
+#include "spdk/string.h"
+
+/* OPAL locking range only supports operations on nsid=1 for now */
+#define NSID_SUPPORTED 1
+
+struct opal_vbdev {
+ char *name;
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ struct spdk_opal_dev *opal_dev;
+ struct spdk_bdev_part *bdev_part;
+
+ uint8_t locking_range_id;
+ uint64_t range_start;
+ uint64_t range_length;
+ struct vbdev_opal_part_base *opal_base;
+
+ TAILQ_ENTRY(opal_vbdev) tailq;
+};
+
+static TAILQ_HEAD(, opal_vbdev) g_opal_vbdev =
+ TAILQ_HEAD_INITIALIZER(g_opal_vbdev);
+
+struct vbdev_opal_bdev_io {
+ struct spdk_io_channel *ch;
+ struct spdk_bdev_io *bdev_io;
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+struct vbdev_opal_channel {
+ struct spdk_bdev_part_channel part_ch;
+};
+
+struct vbdev_opal_part_base {
+ char *nvme_ctrlr_name;
+ struct spdk_bdev_part_base *part_base;
+ SPDK_BDEV_PART_TAILQ part_tailq;
+ TAILQ_ENTRY(vbdev_opal_part_base) tailq;
+};
+
+static TAILQ_HEAD(, vbdev_opal_part_base) g_opal_base = TAILQ_HEAD_INITIALIZER(g_opal_base);
+
+static void _vbdev_opal_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io);
+
+static void vbdev_opal_examine(struct spdk_bdev *bdev);
+
+static void
+vbdev_opal_delete(struct opal_vbdev *opal_bdev)
+{
+ TAILQ_REMOVE(&g_opal_vbdev, opal_bdev, tailq);
+ free(opal_bdev->name);
+ free(opal_bdev);
+ opal_bdev = NULL;
+}
+
+static void
+vbdev_opal_clear(void)
+{
+ struct opal_vbdev *opal_bdev, *tmp;
+
+ TAILQ_FOREACH_SAFE(opal_bdev, &g_opal_vbdev, tailq, tmp) {
+ vbdev_opal_delete(opal_bdev);
+ }
+}
+
+static int
+vbdev_opal_init(void)
+{
+ /* TODO */
+ return 0;
+}
+
+static void
+vbdev_opal_fini(void)
+{
+ vbdev_opal_clear();
+}
+
+static int
+vbdev_opal_get_ctx_size(void)
+{
+ return sizeof(struct vbdev_opal_bdev_io);
+}
+
+/* delete all the config of the same base bdev */
+static void
+vbdev_opal_delete_all_base_config(struct vbdev_opal_part_base *base)
+{
+ char *nvme_ctrlr_name = base->nvme_ctrlr_name;
+ struct opal_vbdev *bdev, *tmp_bdev;
+
+ TAILQ_FOREACH_SAFE(bdev, &g_opal_vbdev, tailq, tmp_bdev) {
+ if (!strcmp(nvme_ctrlr_name, bdev->nvme_ctrlr->name)) {
+ vbdev_opal_delete(bdev);
+ }
+ }
+}
+
+static int
+_vbdev_opal_destruct(void *ctx)
+{
+ struct spdk_bdev_part *part = ctx;
+
+ return spdk_bdev_part_free(part);
+}
+
+static void
+vbdev_opal_base_free(void *ctx)
+{
+ struct vbdev_opal_part_base *base = ctx;
+
+ TAILQ_REMOVE(&g_opal_base, base, tailq);
+
+ free(base->nvme_ctrlr_name);
+ free(base);
+}
+
+static void
+vbdev_opal_resubmit_io(void *arg)
+{
+ struct vbdev_opal_bdev_io *io_ctx = (struct vbdev_opal_bdev_io *)arg;
+
+ _vbdev_opal_submit_request(io_ctx->ch, io_ctx->bdev_io);
+}
+
+static void
+vbdev_opal_queue_io(struct vbdev_opal_bdev_io *io_ctx)
+{
+ struct vbdev_opal_channel *ch = spdk_io_channel_get_ctx(io_ctx->ch);
+ int rc;
+
+ io_ctx->bdev_io_wait.bdev = io_ctx->bdev_io->bdev;
+ io_ctx->bdev_io_wait.cb_fn = vbdev_opal_resubmit_io;
+ io_ctx->bdev_io_wait.cb_arg = io_ctx;
+
+ rc = spdk_bdev_queue_io_wait(io_ctx->bdev_io->bdev, ch->part_ch.base_ch, &io_ctx->bdev_io_wait);
+
+ if (rc != 0) {
+ SPDK_ERRLOG("Queue io failed in vbdev_opal_queue_io: %d\n", rc);
+ spdk_bdev_io_complete(io_ctx->bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static void
+_vbdev_opal_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
+{
+ struct vbdev_opal_channel *ch = spdk_io_channel_get_ctx(_ch);
+ struct vbdev_opal_bdev_io *io_ctx = (struct vbdev_opal_bdev_io *)bdev_io->driver_ctx;
+ int rc;
+
+ rc = spdk_bdev_part_submit_request(&ch->part_ch, bdev_io);
+ if (rc) {
+ if (rc == -ENOMEM) {
+ SPDK_DEBUGLOG(SPDK_LOG_VBDEV_OPAL, "opal: no memory, queue io.\n");
+ io_ctx->ch = _ch;
+ io_ctx->bdev_io = bdev_io;
+ vbdev_opal_queue_io(io_ctx);
+ } else {
+ SPDK_ERRLOG("opal: error on io submission, rc=%d.\n", rc);
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+ }
+}
+
+static void
+vbdev_opal_io_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
+{
+ if (!success) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ _vbdev_opal_submit_request(ch, bdev_io);
+}
+
+static void
+vbdev_opal_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, vbdev_opal_io_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ break;
+ default:
+ _vbdev_opal_submit_request(ch, bdev_io);
+ break;
+ }
+}
+
+struct spdk_opal_locking_range_info *
+vbdev_opal_get_info_from_bdev(const char *opal_bdev_name, const char *password)
+{
+ struct opal_vbdev *vbdev;
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int locking_range_id;
+ int rc;
+
+ TAILQ_FOREACH(vbdev, &g_opal_vbdev, tailq) {
+ if (strcmp(vbdev->name, opal_bdev_name) == 0) {
+ break;
+ }
+ }
+
+ if (vbdev == NULL) {
+ SPDK_ERRLOG("%s not found\n", opal_bdev_name);
+ return NULL;
+ }
+
+ nvme_ctrlr = vbdev->nvme_ctrlr;
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("can't find nvme_ctrlr of %s\n", vbdev->name);
+ return NULL;
+ }
+
+ locking_range_id = vbdev->locking_range_id;
+ rc = spdk_opal_cmd_get_locking_range_info(nvme_ctrlr->opal_dev, password,
+ OPAL_ADMIN1, locking_range_id);
+ if (rc) {
+ SPDK_ERRLOG("Get locking range info error: %d\n", rc);
+ return NULL;
+ }
+
+ return spdk_opal_get_locking_range_info(nvme_ctrlr->opal_dev, locking_range_id);
+}
+
+static int
+vbdev_opal_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct spdk_bdev_part *part = ctx;
+ struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(part);
+ uint64_t offset = spdk_bdev_part_get_offset_blocks(part);
+
+ spdk_json_write_named_object_begin(w, "opal");
+
+ spdk_json_write_named_string(w, "base_bdev", spdk_bdev_get_name(base_bdev));
+ spdk_json_write_named_uint64(w, "offset_blocks", offset);
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+vbdev_opal_base_bdev_hotremove_cb(void *_part_base)
+{
+ struct spdk_bdev_part_base *part_base = _part_base;
+ struct vbdev_opal_part_base *base = spdk_bdev_part_base_get_ctx(part_base);
+
+ spdk_bdev_part_base_hotremove(part_base, spdk_bdev_part_base_get_tailq(part_base));
+ vbdev_opal_delete_all_base_config(base);
+}
+
+static bool
+vbdev_opal_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ struct spdk_bdev_part *part = ctx;
+ struct spdk_bdev *base_bdev = spdk_bdev_part_get_base_bdev(part);
+
+ return spdk_bdev_io_type_supported(base_bdev, io_type);
+}
+
+static struct spdk_bdev_fn_table opal_vbdev_fn_table = {
+ .destruct = _vbdev_opal_destruct,
+ .submit_request = vbdev_opal_submit_request,
+ .io_type_supported = vbdev_opal_io_type_supported,
+ .dump_info_json = vbdev_opal_dump_info_json,
+ .write_config_json = NULL,
+};
+
+static struct spdk_bdev_module opal_if = {
+ .name = "opal",
+ .module_init = vbdev_opal_init,
+ .module_fini = vbdev_opal_fini,
+ .get_ctx_size = vbdev_opal_get_ctx_size,
+ .examine_config = vbdev_opal_examine,
+ .config_json = NULL,
+};
+
+SPDK_BDEV_MODULE_REGISTER(opal, &opal_if)
+
+int
+vbdev_opal_create(const char *nvme_ctrlr_name, uint32_t nsid, uint8_t locking_range_id,
+ uint64_t range_start, uint64_t range_length, const char *password)
+{
+ int rc;
+ char *opal_vbdev_name;
+ char *base_bdev_name;
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ struct opal_vbdev *opal_bdev;
+ struct vbdev_opal_part_base *opal_part_base = NULL;
+ struct spdk_bdev_part *part_bdev;
+ struct nvme_bdev *nvme_bdev;
+
+ if (nsid != NSID_SUPPORTED) {
+ SPDK_ERRLOG("nsid %d not supported", nsid);
+ return -EINVAL;
+ }
+
+ nvme_ctrlr = nvme_bdev_ctrlr_get_by_name(nvme_ctrlr_name);
+ if (!nvme_ctrlr) {
+ SPDK_ERRLOG("get nvme ctrlr failed\n");
+ return -ENODEV;
+ }
+
+ if (!nvme_ctrlr->opal_dev) {
+ SPDK_ERRLOG("Opal not supported\n");
+ return -ENOTSUP;
+ }
+
+ opal_bdev = calloc(1, sizeof(struct opal_vbdev));
+ if (!opal_bdev) {
+ SPDK_ERRLOG("allocation for opal_bdev failed\n");
+ return -ENOMEM;
+ }
+
+ opal_bdev->locking_range_id = locking_range_id;
+ opal_bdev->range_start = range_start;
+ opal_bdev->range_length = range_length;
+
+ opal_bdev->nvme_ctrlr = nvme_ctrlr;
+ opal_bdev->opal_dev = nvme_ctrlr->opal_dev;
+
+ nvme_bdev = TAILQ_FIRST(&nvme_ctrlr->namespaces[nsid - 1]->bdevs);
+ assert(nvme_bdev != NULL);
+ base_bdev_name = nvme_bdev->disk.name;
+
+ /* traverse base list to see if part_base is already create for this base bdev */
+ TAILQ_FOREACH(opal_part_base, &g_opal_base, tailq) {
+ if (!strcmp(spdk_bdev_part_base_get_bdev_name(opal_part_base->part_base), base_bdev_name)) {
+ break;
+ }
+ }
+
+ /* If there is not a corresponding opal_part_base, a new opal_part_base will be created.
+ For each new part_base, there will be one tailq to store all the parts of this base */
+ if (opal_part_base == NULL) {
+ opal_part_base = calloc(1, sizeof(*opal_part_base));
+ if (opal_part_base == NULL) {
+ SPDK_ERRLOG("Could not allocate opal_part_base\n");
+ free(opal_bdev);
+ return -ENOMEM;
+ }
+ TAILQ_INIT(&opal_part_base->part_tailq);
+
+ opal_part_base->part_base = spdk_bdev_part_base_construct(spdk_bdev_get_by_name(base_bdev_name),
+ vbdev_opal_base_bdev_hotremove_cb, &opal_if,
+ &opal_vbdev_fn_table, &opal_part_base->part_tailq, vbdev_opal_base_free,
+ opal_part_base, sizeof(struct vbdev_opal_channel), NULL, NULL);
+ if (opal_part_base->part_base == NULL) {
+ SPDK_ERRLOG("Could not allocate part_base\n");
+ free(opal_bdev);
+ free(opal_part_base);
+ return -ENOMEM;
+ }
+ opal_part_base->nvme_ctrlr_name = strdup(nvme_ctrlr_name);
+ if (opal_part_base->nvme_ctrlr_name == NULL) {
+ free(opal_bdev);
+ spdk_bdev_part_base_free(opal_part_base->part_base);
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&g_opal_base, opal_part_base, tailq);
+ }
+ assert(opal_part_base != NULL);
+ opal_bdev->opal_base = opal_part_base;
+
+ part_bdev = calloc(1, sizeof(struct spdk_bdev_part));
+ if (!part_bdev) {
+ SPDK_ERRLOG("Could not allocate part_bdev\n");
+ free(opal_bdev);
+ return -ENOMEM;
+ }
+
+ TAILQ_INSERT_TAIL(&g_opal_vbdev, opal_bdev, tailq);
+ opal_vbdev_name = spdk_sprintf_alloc("%sr%" PRIu8, base_bdev_name,
+ opal_bdev->locking_range_id); /* e.g.: nvme0n1r1 */
+ if (opal_vbdev_name == NULL) {
+ SPDK_ERRLOG("Could not allocate opal_vbdev_name\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ opal_bdev->name = opal_vbdev_name;
+ rc = spdk_opal_cmd_setup_locking_range(opal_bdev->opal_dev, OPAL_ADMIN1,
+ opal_bdev->locking_range_id, opal_bdev->range_start,
+ opal_bdev->range_length, password);
+ if (rc) {
+ SPDK_ERRLOG("Error construct %s\n", opal_vbdev_name);
+ goto err;
+ }
+
+ rc = spdk_bdev_part_construct(part_bdev, opal_bdev->opal_base->part_base, opal_vbdev_name,
+ opal_bdev->range_start, opal_bdev->range_length, "Opal locking range");
+ if (rc) {
+ SPDK_ERRLOG("Could not allocate bdev part\n");
+ goto err;
+ }
+
+ /* lock this bdev initially */
+ rc = spdk_opal_cmd_lock_unlock(opal_bdev->opal_dev, OPAL_ADMIN1, OPAL_RWLOCK, locking_range_id,
+ password);
+ if (rc) {
+ SPDK_ERRLOG("Error lock %s\n", opal_vbdev_name);
+ goto err;
+ }
+
+ opal_bdev->bdev_part = part_bdev;
+ return 0;
+
+err:
+ vbdev_opal_delete(opal_bdev);
+ free(part_bdev);
+ return rc;
+}
+
+static void
+vbdev_opal_destruct_bdev(struct opal_vbdev *opal_bdev)
+{
+ struct spdk_bdev_part *part = opal_bdev->bdev_part;
+
+ assert(opal_bdev->opal_base != NULL);
+ assert(part != NULL);
+
+ if (opal_bdev->range_start == spdk_bdev_part_get_offset_blocks(part)) {
+ spdk_bdev_unregister(spdk_bdev_part_get_bdev(part), NULL, NULL);
+ }
+ vbdev_opal_delete(opal_bdev);
+}
+
+int
+vbdev_opal_destruct(const char *bdev_name, const char *password)
+{
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int locking_range_id;
+ int rc;
+ struct opal_vbdev *opal_bdev;
+
+ TAILQ_FOREACH(opal_bdev, &g_opal_vbdev, tailq) {
+ if (strcmp(opal_bdev->name, bdev_name) == 0) {
+ break;
+ }
+ }
+
+ if (opal_bdev == NULL) {
+ SPDK_ERRLOG("%s not found\n", bdev_name);
+ rc = -ENODEV;
+ goto err;
+ }
+
+ locking_range_id = opal_bdev->locking_range_id;
+
+ nvme_ctrlr = opal_bdev->nvme_ctrlr;
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("can't find nvme_ctrlr of %s\n", bdev_name);
+ return -ENODEV;
+ }
+
+ /* secure erase locking range */
+ rc = spdk_opal_cmd_secure_erase_locking_range(nvme_ctrlr->opal_dev, OPAL_ADMIN1, locking_range_id,
+ password);
+ if (rc) {
+ SPDK_ERRLOG("opal erase locking range failed\n");
+ goto err;
+ }
+
+ /* reset the locking range to 0 */
+ rc = spdk_opal_cmd_setup_locking_range(nvme_ctrlr->opal_dev, OPAL_ADMIN1, locking_range_id, 0,
+ 0, password);
+ if (rc) {
+ SPDK_ERRLOG("opal reset locking range failed\n");
+ goto err;
+ }
+
+ spdk_opal_free_locking_range_info(opal_bdev->opal_dev, locking_range_id);
+ vbdev_opal_destruct_bdev(opal_bdev);
+ return 0;
+
+err:
+ return rc;
+}
+
+static void
+vbdev_opal_examine(struct spdk_bdev *bdev)
+{
+ /* TODO */
+ spdk_bdev_module_examine_done(&opal_if);
+}
+
+int
+vbdev_opal_set_lock_state(const char *bdev_name, uint16_t user_id, const char *password,
+ const char *lock_state)
+{
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int locking_range_id;
+ int rc;
+ enum spdk_opal_lock_state state_flag;
+ struct opal_vbdev *opal_bdev;
+
+ TAILQ_FOREACH(opal_bdev, &g_opal_vbdev, tailq) {
+ if (strcmp(opal_bdev->name, bdev_name) == 0) {
+ break;
+ }
+ }
+
+ if (opal_bdev == NULL) {
+ SPDK_ERRLOG("%s not found\n", bdev_name);
+ return -ENODEV;
+ }
+
+ nvme_ctrlr = opal_bdev->nvme_ctrlr;
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("can't find nvme_ctrlr of %s\n", opal_bdev->name);
+ return -ENODEV;
+ }
+
+ if (strcasecmp(lock_state, "READWRITE") == 0) {
+ state_flag = OPAL_READWRITE;
+ } else if (strcasecmp(lock_state, "READONLY") == 0) {
+ state_flag = OPAL_READONLY;
+ } else if (strcasecmp(lock_state, "RWLOCK") == 0) {
+ state_flag = OPAL_RWLOCK;
+ } else {
+ SPDK_ERRLOG("Invalid OPAL lock state input\n");
+ return -EINVAL;
+ }
+
+ locking_range_id = opal_bdev->locking_range_id;
+ rc = spdk_opal_cmd_lock_unlock(nvme_ctrlr->opal_dev, user_id, state_flag, locking_range_id,
+ password);
+ if (rc) {
+ SPDK_ERRLOG("%s lock/unlock failure: %d\n", bdev_name, rc);
+ }
+
+ return rc;
+}
+
+int
+vbdev_opal_enable_new_user(const char *bdev_name, const char *admin_password, uint16_t user_id,
+ const char *user_password)
+{
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int locking_range_id;
+ int rc;
+ struct opal_vbdev *opal_bdev;
+
+ TAILQ_FOREACH(opal_bdev, &g_opal_vbdev, tailq) {
+ if (strcmp(opal_bdev->name, bdev_name) == 0) {
+ break;
+ }
+ }
+
+ if (opal_bdev == NULL) {
+ SPDK_ERRLOG("%s not found\n", bdev_name);
+ return -ENODEV;
+ }
+
+ nvme_ctrlr = opal_bdev->nvme_ctrlr;
+ if (nvme_ctrlr == NULL) {
+ SPDK_ERRLOG("can't find nvme_ctrlr of %s\n", opal_bdev->name);
+ return -ENODEV;
+ }
+
+ rc = spdk_opal_cmd_enable_user(nvme_ctrlr->opal_dev, user_id, admin_password);
+ if (rc) {
+ SPDK_ERRLOG("%s enable user error: %d\n", bdev_name, rc);
+ return rc;
+ }
+
+ rc = spdk_opal_cmd_set_new_passwd(nvme_ctrlr->opal_dev, user_id, user_password, admin_password,
+ true);
+ if (rc) {
+ SPDK_ERRLOG("%s set user password error: %d\n", bdev_name, rc);
+ return rc;
+ }
+
+ locking_range_id = opal_bdev->locking_range_id;
+ rc = spdk_opal_cmd_add_user_to_locking_range(nvme_ctrlr->opal_dev, user_id, locking_range_id,
+ OPAL_READONLY, admin_password);
+ if (rc) {
+ SPDK_ERRLOG("%s add user READONLY priority error: %d\n", bdev_name, rc);
+ return rc;
+ }
+
+ rc = spdk_opal_cmd_add_user_to_locking_range(nvme_ctrlr->opal_dev, user_id, locking_range_id,
+ OPAL_READWRITE, admin_password);
+ if (rc) {
+ SPDK_ERRLOG("%s add user READWRITE priority error: %d\n", bdev_name, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("vbdev_opal", SPDK_LOG_VBDEV_OPAL)
diff --git a/src/spdk/module/bdev/nvme/vbdev_opal.h b/src/spdk/module/bdev/nvme/vbdev_opal.h
new file mode 100644
index 000000000..0b2fd731f
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/vbdev_opal.h
@@ -0,0 +1,54 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SPDK_VBDEV_OPAL_H
+#define SPDK_VBDEV_OPAL_H
+
+#include "spdk/bdev_module.h"
+#include "bdev_nvme.h"
+#include "common.h"
+
+int vbdev_opal_create(const char *nvme_ctrlr_name, uint32_t nsid, uint8_t locking_range_id,
+ uint64_t range_start, uint64_t range_length, const char *password);
+
+struct spdk_opal_locking_range_info *vbdev_opal_get_info_from_bdev(const char *opal_bdev_name,
+ const char *password);
+
+int vbdev_opal_destruct(const char *bdev_name, const char *password);
+
+int vbdev_opal_enable_new_user(const char *bdev_name, const char *admin_password,
+ uint16_t user_id, const char *user_password);
+
+int vbdev_opal_set_lock_state(const char *bdev_name, uint16_t user_id, const char *password,
+ const char *lock_state);
+
+#endif
diff --git a/src/spdk/module/bdev/nvme/vbdev_opal_rpc.c b/src/spdk/module/bdev/nvme/vbdev_opal_rpc.c
new file mode 100644
index 000000000..ee270ef35
--- /dev/null
+++ b/src/spdk/module/bdev/nvme/vbdev_opal_rpc.c
@@ -0,0 +1,453 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+#include "vbdev_opal.h"
+
+struct rpc_bdev_nvme_opal_init {
+ char *nvme_ctrlr_name;
+ char *password;
+};
+
+static void
+free_rpc_bdev_nvme_opal_init(struct rpc_bdev_nvme_opal_init *req)
+{
+ free(req->nvme_ctrlr_name);
+ free(req->password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_opal_init_decoders[] = {
+ {"nvme_ctrlr_name", offsetof(struct rpc_bdev_nvme_opal_init, nvme_ctrlr_name), spdk_json_decode_string},
+ {"password", offsetof(struct rpc_bdev_nvme_opal_init, password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_nvme_opal_init(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_opal_init req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_opal_init_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_opal_init_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ /* check if opal supported */
+ nvme_ctrlr = nvme_bdev_ctrlr_get_by_name(req.nvme_ctrlr_name);
+ if (nvme_ctrlr == NULL || nvme_ctrlr->opal_dev == NULL) {
+ SPDK_ERRLOG("%s not support opal\n", req.nvme_ctrlr_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ /* take ownership */
+ rc = spdk_opal_cmd_take_ownership(nvme_ctrlr->opal_dev, req.password);
+ if (rc) {
+ SPDK_ERRLOG("Take ownership failure: %d\n", rc);
+ switch (rc) {
+ case -EBUSY:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "SP Busy, try again later");
+ break;
+ case -EACCES:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "This drive is already enabled");
+ break;
+ default:
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ }
+ goto out;
+ }
+
+ /* activate locking SP */
+ rc = spdk_opal_cmd_activate_locking_sp(nvme_ctrlr->opal_dev, req.password);
+ if (rc) {
+ SPDK_ERRLOG("Activate locking SP failure: %d\n", rc);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_nvme_opal_init(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_opal_init", rpc_bdev_nvme_opal_init, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_nvme_opal_revert {
+ char *nvme_ctrlr_name;
+ char *password;
+};
+
+static void
+free_rpc_bdev_nvme_opal_revert(struct rpc_bdev_nvme_opal_revert *req)
+{
+ free(req->nvme_ctrlr_name);
+ free(req->password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_nvme_opal_revert_decoders[] = {
+ {"nvme_ctrlr_name", offsetof(struct rpc_bdev_nvme_opal_revert, nvme_ctrlr_name), spdk_json_decode_string},
+ {"password", offsetof(struct rpc_bdev_nvme_opal_revert, password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_nvme_opal_revert(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_nvme_opal_revert req = {};
+ struct spdk_json_write_ctx *w;
+ struct nvme_bdev_ctrlr *nvme_ctrlr;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_nvme_opal_revert_decoders,
+ SPDK_COUNTOF(rpc_bdev_nvme_opal_revert_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ /* check if opal supported */
+ nvme_ctrlr = nvme_bdev_ctrlr_get_by_name(req.nvme_ctrlr_name);
+ if (nvme_ctrlr == NULL || nvme_ctrlr->opal_dev == NULL) {
+ SPDK_ERRLOG("%s not support opal\n", req.nvme_ctrlr_name);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ /* TODO: delete all opal vbdev before revert TPer */
+
+ rc = spdk_opal_cmd_revert_tper(nvme_ctrlr->opal_dev, req.password);
+ if (rc) {
+ SPDK_ERRLOG("Revert TPer failure: %d\n", rc);
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_nvme_opal_revert(&req);
+}
+SPDK_RPC_REGISTER("bdev_nvme_opal_revert", rpc_bdev_nvme_opal_revert, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_opal_create {
+ char *nvme_ctrlr_name;
+ uint32_t nsid;
+ uint16_t locking_range_id;
+ uint64_t range_start;
+ uint64_t range_length;
+ char *password;
+};
+
+static void
+free_rpc_bdev_opal_create(struct rpc_bdev_opal_create *req)
+{
+ free(req->nvme_ctrlr_name);
+ free(req->password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_opal_create_decoders[] = {
+ {"nvme_ctrlr_name", offsetof(struct rpc_bdev_opal_create, nvme_ctrlr_name), spdk_json_decode_string},
+ {"nsid", offsetof(struct rpc_bdev_opal_create, nsid), spdk_json_decode_uint32},
+ {"locking_range_id", offsetof(struct rpc_bdev_opal_create, locking_range_id), spdk_json_decode_uint16},
+ {"range_start", offsetof(struct rpc_bdev_opal_create, range_start), spdk_json_decode_uint64},
+ {"range_length", offsetof(struct rpc_bdev_opal_create, range_length), spdk_json_decode_uint64},
+ {"password", offsetof(struct rpc_bdev_opal_create, password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_opal_create(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_opal_create req = {};
+ struct spdk_json_write_ctx *w;
+ char *opal_bdev_name;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_opal_create_decoders,
+ SPDK_COUNTOF(rpc_bdev_opal_create_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = vbdev_opal_create(req.nvme_ctrlr_name, req.nsid, req.locking_range_id, req.range_start,
+ req.range_length, req.password);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Failed to create opal vbdev from '%s': %s",
+ req.nvme_ctrlr_name, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ opal_bdev_name = spdk_sprintf_alloc("%sn%dr%d", req.nvme_ctrlr_name, req.nsid,
+ req.locking_range_id);
+ spdk_json_write_string(w, opal_bdev_name);
+ spdk_jsonrpc_end_result(request, w);
+ free(opal_bdev_name);
+
+out:
+ free_rpc_bdev_opal_create(&req);
+}
+SPDK_RPC_REGISTER("bdev_opal_create", rpc_bdev_opal_create, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_opal_get_info {
+ char *bdev_name;
+ char *password;
+};
+
+static void
+free_rpc_bdev_opal_get_info(struct rpc_bdev_opal_get_info *req)
+{
+ free(req->bdev_name);
+ free(req->password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_opal_get_info_decoders[] = {
+ {"bdev_name", offsetof(struct rpc_bdev_opal_get_info, bdev_name), spdk_json_decode_string},
+ {"password", offsetof(struct rpc_bdev_opal_get_info, password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_opal_get_info(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_opal_get_info req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_opal_locking_range_info *info;
+
+ if (spdk_json_decode_object(params, rpc_bdev_opal_get_info_decoders,
+ SPDK_COUNTOF(rpc_bdev_opal_get_info_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ info = vbdev_opal_get_info_from_bdev(req.bdev_name, req.password);
+ if (info == NULL) {
+ SPDK_ERRLOG("Get opal info failure\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "name", req.bdev_name);
+ spdk_json_write_named_uint64(w, "range_start", info->range_start);
+ spdk_json_write_named_uint64(w, "range_length", info->range_length);
+ spdk_json_write_named_bool(w, "read_lock_enabled", info->read_lock_enabled);
+ spdk_json_write_named_bool(w, "write_lock_enabled", info->write_lock_enabled);
+ spdk_json_write_named_bool(w, "read_locked", info->read_locked);
+ spdk_json_write_named_bool(w, "write_locked", info->write_locked);
+
+ spdk_json_write_object_end(w);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_opal_get_info(&req);
+}
+SPDK_RPC_REGISTER("bdev_opal_get_info", rpc_bdev_opal_get_info, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_opal_delete {
+ char *bdev_name;
+ char *password;
+};
+
+static void
+free_rpc_bdev_opal_delete(struct rpc_bdev_opal_delete *req)
+{
+ free(req->bdev_name);
+ free(req->password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_opal_delete_decoders[] = {
+ {"bdev_name", offsetof(struct rpc_bdev_opal_delete, bdev_name), spdk_json_decode_string},
+ {"password", offsetof(struct rpc_bdev_opal_delete, password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_opal_delete(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_opal_delete req = {};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_opal_delete_decoders,
+ SPDK_COUNTOF(rpc_bdev_opal_delete_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = vbdev_opal_destruct(req.bdev_name, req.password);
+ if (rc < 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+out:
+ free_rpc_bdev_opal_delete(&req);
+}
+SPDK_RPC_REGISTER("bdev_opal_delete", rpc_bdev_opal_delete, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_opal_set_lock_state {
+ char *bdev_name;
+ uint16_t user_id;
+ char *password;
+ char *lock_state;
+};
+
+static void
+free_rpc_bdev_opal_set_lock_state(struct rpc_bdev_opal_set_lock_state *req)
+{
+ free(req->bdev_name);
+ free(req->password);
+ free(req->lock_state);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_opal_set_lock_state_decoders[] = {
+ {"bdev_name", offsetof(struct rpc_bdev_opal_set_lock_state, bdev_name), spdk_json_decode_string},
+ {"user_id", offsetof(struct rpc_bdev_opal_set_lock_state, user_id), spdk_json_decode_uint16},
+ {"password", offsetof(struct rpc_bdev_opal_set_lock_state, password), spdk_json_decode_string},
+ {"lock_state", offsetof(struct rpc_bdev_opal_set_lock_state, lock_state), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_opal_set_lock_state(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_opal_set_lock_state req = {};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_opal_set_lock_state_decoders,
+ SPDK_COUNTOF(rpc_bdev_opal_set_lock_state_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = vbdev_opal_set_lock_state(req.bdev_name, req.user_id, req.password, req.lock_state);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_opal_set_lock_state(&req);
+}
+SPDK_RPC_REGISTER("bdev_opal_set_lock_state", rpc_bdev_opal_set_lock_state, SPDK_RPC_RUNTIME)
+
+struct rpc_bdev_opal_new_user {
+ char *bdev_name;
+ char *admin_password;
+ uint16_t user_id;
+ char *user_password;
+};
+
+static void
+free_rpc_bdev_opal_new_user(struct rpc_bdev_opal_new_user *req)
+{
+ free(req->bdev_name);
+ free(req->admin_password);
+ free(req->user_password);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_opal_new_user_decoders[] = {
+ {"bdev_name", offsetof(struct rpc_bdev_opal_new_user, bdev_name), spdk_json_decode_string},
+ {"admin_password", offsetof(struct rpc_bdev_opal_new_user, admin_password), spdk_json_decode_string},
+ {"user_id", offsetof(struct rpc_bdev_opal_new_user, user_id), spdk_json_decode_uint16},
+ {"user_password", offsetof(struct rpc_bdev_opal_new_user, user_password), spdk_json_decode_string},
+};
+
+static void
+rpc_bdev_opal_new_user(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_opal_new_user req = {};
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_opal_new_user_decoders,
+ SPDK_COUNTOF(rpc_bdev_opal_new_user_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto out;
+ }
+
+ rc = vbdev_opal_enable_new_user(req.bdev_name, req.admin_password, req.user_id,
+ req.user_password);
+ if (rc != 0) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(-rc));
+ goto out;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+
+out:
+ free_rpc_bdev_opal_new_user(&req);
+}
+SPDK_RPC_REGISTER("bdev_opal_new_user", rpc_bdev_opal_new_user, SPDK_RPC_RUNTIME)