diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/lib/bdev/nvme | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/bdev/nvme')
-rw-r--r-- | src/spdk/lib/bdev/nvme/Makefile | 40 | ||||
-rw-r--r-- | src/spdk/lib/bdev/nvme/bdev_nvme.c | 1856 | ||||
-rw-r--r-- | src/spdk/lib/bdev/nvme/bdev_nvme.h | 112 | ||||
-rw-r--r-- | src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c | 740 | ||||
-rw-r--r-- | src/spdk/lib/bdev/nvme/nvme_rpc.c | 487 |
5 files changed, 3235 insertions, 0 deletions
diff --git a/src/spdk/lib/bdev/nvme/Makefile b/src/spdk/lib/bdev/nvme/Makefile new file mode 100644 index 00000000..c5a40c74 --- /dev/null +++ b/src/spdk/lib/bdev/nvme/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c +LIBNAME = bdev_nvme + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.c b/src/spdk/lib/bdev/nvme/bdev_nvme.c new file mode 100644 index 00000000..07c3b6ce --- /dev/null +++ b/src/spdk/lib/bdev/nvme/bdev_nvme.c @@ -0,0 +1,1856 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "bdev_nvme.h" + +#include "spdk/config.h" +#include "spdk/conf.h" +#include "spdk/endian.h" +#include "spdk/bdev.h" +#include "spdk/json.h" +#include "spdk/nvme.h" +#include "spdk/thread.h" +#include "spdk/string.h" +#include "spdk/likely.h" +#include "spdk/util.h" + +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" + +static void bdev_nvme_get_spdk_running_config(FILE *fp); +static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); + +struct nvme_io_channel { + struct spdk_nvme_qpair *qpair; + struct spdk_poller *poller; + + bool collect_spin_stat; + uint64_t spin_ticks; + uint64_t start_ticks; + uint64_t end_ticks; +}; + +struct nvme_bdev_io { + /** array of iovecs to transfer. */ + struct iovec *iovs; + + /** Number of iovecs in iovs array. */ + int iovcnt; + + /** Current iovec position. */ + int iovpos; + + /** Offset in current iovec. */ + uint32_t iov_offset; + + /** Saved status for admin passthru completion event. */ + struct spdk_nvme_cpl cpl; + + /** Originating thread */ + struct spdk_thread *orig_thread; +}; + +enum data_direction { + BDEV_DISK_READ = 0, + BDEV_DISK_WRITE = 1 +}; + +struct nvme_probe_ctx { + size_t count; + struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; + const char *names[NVME_MAX_CONTROLLERS]; + const char *hostnqn; +}; + +static struct spdk_bdev_nvme_opts g_opts = { + .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE, + .timeout_us = 0, + .retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT, + .nvme_adminq_poll_period_us = 1000000ULL, +}; + +#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL +#define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL + +static int g_hot_insert_nvme_controller_index = 0; +static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT; +static bool g_nvme_hotplug_enabled = false; +static struct spdk_thread *g_bdev_nvme_init_thread; +static struct spdk_poller *g_hotplug_poller; +static char *g_nvme_hostnqn = NULL; +static pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER; + +static TAILQ_HEAD(, nvme_ctrlr) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs); + +static int nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr); +static int bdev_nvme_library_init(void); +static void bdev_nvme_library_fini(void); +static int bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, + struct nvme_bdev_io *bio, + int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, + uint64_t lba); +static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); +static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); +static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); +static int nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); + +struct spdk_nvme_qpair * +spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) +{ + struct nvme_io_channel *nvme_ch; + + nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch); + + return nvme_ch->qpair; +} + +struct nvme_ctrlr * +spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name) +{ + struct nvme_ctrlr *_nvme_ctrlr; + + TAILQ_FOREACH(_nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + if (strcmp(ctrlr_name, _nvme_ctrlr->name) == 0) { + return _nvme_ctrlr; + } + } + + return NULL; +} + +struct nvme_ctrlr * +spdk_bdev_nvme_first_ctrlr(void) +{ + return TAILQ_FIRST(&g_nvme_ctrlrs); +} + +struct nvme_ctrlr * +spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev) +{ + return TAILQ_NEXT(prev, tailq); +} + +static int +bdev_nvme_get_ctx_size(void) +{ + return sizeof(struct nvme_bdev_io); +} + +static struct spdk_bdev_module nvme_if = { + .name = "nvme", + .module_init = bdev_nvme_library_init, + .module_fini = bdev_nvme_library_fini, + .config_text = bdev_nvme_get_spdk_running_config, + .config_json = bdev_nvme_config_json, + .get_ctx_size = bdev_nvme_get_ctx_size, + +}; +SPDK_BDEV_MODULE_REGISTER(&nvme_if) + +static int +bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n", + lba_count, lba); + + return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_READ, + iov, iovcnt, lba_count, lba); +} + +static int +bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n", + lba_count, lba); + + return bdev_nvme_queue_cmd(nbdev, nvme_ch->qpair, bio, BDEV_DISK_WRITE, + iov, iovcnt, lba_count, lba); +} + +static int +bdev_nvme_poll(void *arg) +{ + struct nvme_io_channel *ch = arg; + int32_t num_completions; + + if (ch->qpair == NULL) { + return -1; + } + + if (ch->collect_spin_stat && ch->start_ticks == 0) { + ch->start_ticks = spdk_get_ticks(); + } + + num_completions = spdk_nvme_qpair_process_completions(ch->qpair, 0); + + if (ch->collect_spin_stat) { + if (num_completions > 0) { + if (ch->end_ticks != 0) { + ch->spin_ticks += (ch->end_ticks - ch->start_ticks); + ch->end_ticks = 0; + } + ch->start_ticks = 0; + } else { + ch->end_ticks = spdk_get_ticks(); + } + } + + return num_completions; +} + +static int +bdev_nvme_poll_adminq(void *arg) +{ + struct spdk_nvme_ctrlr *ctrlr = arg; + + return spdk_nvme_ctrlr_process_admin_completions(ctrlr); +} + +static void +bdev_nvme_unregister_cb(void *io_device) +{ + struct spdk_nvme_ctrlr *ctrlr = io_device; + + spdk_nvme_detach(ctrlr); +} + +static int +bdev_nvme_destruct(void *ctx) +{ + struct nvme_bdev *nvme_disk = ctx; + struct nvme_ctrlr *nvme_ctrlr = nvme_disk->nvme_ctrlr; + + pthread_mutex_lock(&g_bdev_nvme_mutex); + nvme_ctrlr->ref--; + free(nvme_disk->disk.name); + memset(nvme_disk, 0, sizeof(*nvme_disk)); + if (nvme_ctrlr->ref == 0) { + TAILQ_REMOVE(&g_nvme_ctrlrs, nvme_ctrlr, tailq); + pthread_mutex_unlock(&g_bdev_nvme_mutex); + spdk_io_device_unregister(nvme_ctrlr->ctrlr, bdev_nvme_unregister_cb); + spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller); + free(nvme_ctrlr->name); + free(nvme_ctrlr->bdevs); + free(nvme_ctrlr); + return 0; + } + + pthread_mutex_unlock(&g_bdev_nvme_mutex); + return 0; + +} + +static int +bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio, + uint64_t offset, uint64_t nbytes) +{ + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS); + + return 0; +} + +static void +_bdev_nvme_reset_done(struct spdk_io_channel_iter *i, int status) +{ + void *ctx = spdk_io_channel_iter_get_ctx(i); + int rc = SPDK_BDEV_IO_STATUS_SUCCESS; + + if (status) { + rc = SPDK_BDEV_IO_STATUS_FAILED; + } + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc); +} + +static void +_bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i); + struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch); + + nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + if (!nvme_ch->qpair) { + spdk_for_each_channel_continue(i, -1); + return; + } + + spdk_for_each_channel_continue(i, 0); +} + +static void +_bdev_nvme_reset(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i); + struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i); + int rc; + + if (status) { + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc != 0) { + spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + /* Recreate all of the I/O queue pairs */ + spdk_for_each_channel(ctrlr, + _bdev_nvme_reset_create_qpair, + bio, + _bdev_nvme_reset_done); + + +} + +static void +_bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + int rc; + + rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair); + if (!rc) { + nvme_ch->qpair = NULL; + } + + spdk_for_each_channel_continue(i, rc); +} + +static int +bdev_nvme_reset(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio) +{ + /* First, delete all NVMe I/O queue pairs. */ + spdk_for_each_channel(nbdev->nvme_ctrlr->ctrlr, + _bdev_nvme_reset_destroy_qpair, + bio, + _bdev_nvme_reset); + + return 0; +} + +static int +bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + uint64_t offset_blocks, + uint64_t num_blocks); + +static void +bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + int ret; + + ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + + if (spdk_likely(ret == 0)) { + return; + } else if (ret == -ENOMEM) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); + } else { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static int +_bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + if (nvme_ch->qpair == NULL) { + /* The device is currently resetting */ + return -1; + } + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); + return 0; + + case SPDK_BDEV_IO_TYPE_WRITE: + return bdev_nvme_writev((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks); + + case SPDK_BDEV_IO_TYPE_UNMAP: + return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks); + + case SPDK_BDEV_IO_TYPE_RESET: + return bdev_nvme_reset((struct nvme_bdev *)bdev_io->bdev->ctxt, + (struct nvme_bdev_io *)bdev_io->driver_ctx); + + case SPDK_BDEV_IO_TYPE_FLUSH: + return bdev_nvme_flush((struct nvme_bdev *)bdev_io->bdev->ctxt, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks); + + case SPDK_BDEV_IO_TYPE_NVME_ADMIN: + return bdev_nvme_admin_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + &bdev_io->u.nvme_passthru.cmd, + bdev_io->u.nvme_passthru.buf, + bdev_io->u.nvme_passthru.nbytes); + + case SPDK_BDEV_IO_TYPE_NVME_IO: + return bdev_nvme_io_passthru((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + &bdev_io->u.nvme_passthru.cmd, + bdev_io->u.nvme_passthru.buf, + bdev_io->u.nvme_passthru.nbytes); + + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + return bdev_nvme_io_passthru_md((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + &bdev_io->u.nvme_passthru.cmd, + bdev_io->u.nvme_passthru.buf, + bdev_io->u.nvme_passthru.nbytes, + bdev_io->u.nvme_passthru.md_buf, + bdev_io->u.nvme_passthru.md_len); + + default: + return -EINVAL; + } + return 0; +} + +static void +bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + int rc = _bdev_nvme_submit_request(ch, bdev_io); + + if (spdk_unlikely(rc != 0)) { + if (rc == -ENOMEM) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM); + } else { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } +} + +static bool +bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + struct nvme_bdev *nbdev = ctx; + const struct spdk_nvme_ctrlr_data *cdata; + + switch (io_type) { + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + case SPDK_BDEV_IO_TYPE_RESET: + case SPDK_BDEV_IO_TYPE_FLUSH: + case SPDK_BDEV_IO_TYPE_NVME_ADMIN: + case SPDK_BDEV_IO_TYPE_NVME_IO: + return true; + + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + return spdk_nvme_ns_get_md_size(nbdev->ns) ? true : false; + + case SPDK_BDEV_IO_TYPE_UNMAP: + cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr); + return cdata->oncs.dsm; + + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr); + /* + * If an NVMe controller guarantees reading unallocated blocks returns zero, + * we can implement WRITE_ZEROES as an NVMe deallocate command. + */ + if (cdata->oncs.dsm && + spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->ns) == SPDK_NVME_DEALLOC_READ_00) { + return true; + } + /* + * The NVMe controller write_zeroes function is currently not used by our driver. + * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail. + * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read. + */ + return false; + + default: + return false; + } +} + +static int +bdev_nvme_create_cb(void *io_device, void *ctx_buf) +{ + struct spdk_nvme_ctrlr *ctrlr = io_device; + struct nvme_io_channel *ch = ctx_buf; + +#ifdef SPDK_CONFIG_VTUNE + ch->collect_spin_stat = true; +#else + ch->collect_spin_stat = false; +#endif + + ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + + if (ch->qpair == NULL) { + return -1; + } + + ch->poller = spdk_poller_register(bdev_nvme_poll, ch, 0); + return 0; +} + +static void +bdev_nvme_destroy_cb(void *io_device, void *ctx_buf) +{ + struct nvme_io_channel *ch = ctx_buf; + + spdk_nvme_ctrlr_free_io_qpair(ch->qpair); + spdk_poller_unregister(&ch->poller); +} + +static struct spdk_io_channel * +bdev_nvme_get_io_channel(void *ctx) +{ + struct nvme_bdev *nvme_bdev = ctx; + + return spdk_get_io_channel(nvme_bdev->nvme_ctrlr->ctrlr); +} + +void +spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w) +{ + const char *trtype_str; + const char *adrfam_str; + + trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype); + if (trtype_str) { + spdk_json_write_named_string(w, "trtype", trtype_str); + } + + adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam); + if (adrfam_str) { + spdk_json_write_named_string(w, "adrfam", adrfam_str); + } + + if (trid->traddr[0] != '\0') { + spdk_json_write_named_string(w, "traddr", trid->traddr); + } + + if (trid->trsvcid[0] != '\0') { + spdk_json_write_named_string(w, "trsvcid", trid->trsvcid); + } + + if (trid->subnqn[0] != '\0') { + spdk_json_write_named_string(w, "subnqn", trid->subnqn); + } +} + +static int +bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct nvme_bdev *nvme_bdev = ctx; + struct nvme_ctrlr *nvme_ctrlr = nvme_bdev->nvme_ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + union spdk_nvme_vs_register vs; + union spdk_nvme_csts_register csts; + char buf[128]; + + cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_ctrlr->ctrlr); + vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_ctrlr->ctrlr); + csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_ctrlr->ctrlr); + ns = nvme_bdev->ns; + + spdk_json_write_named_object_begin(w, "nvme"); + + if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + spdk_json_write_named_string(w, "pci_address", nvme_ctrlr->trid.traddr); + } + + spdk_json_write_named_object_begin(w, "trid"); + + spdk_bdev_nvme_dump_trid_json(&nvme_ctrlr->trid, w); + + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "ctrlr_data"); + + spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid); + + snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn); + spdk_str_trim(buf); + spdk_json_write_named_string(w, "model_number", buf); + + snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn); + spdk_str_trim(buf); + spdk_json_write_named_string(w, "serial_number", buf); + + snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr); + spdk_str_trim(buf); + spdk_json_write_named_string(w, "firmware_revision", buf); + + spdk_json_write_named_object_begin(w, "oacs"); + + spdk_json_write_named_uint32(w, "security", cdata->oacs.security); + spdk_json_write_named_uint32(w, "format", cdata->oacs.format); + spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware); + spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage); + + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "vs"); + + spdk_json_write_name(w, "nvme_version"); + if (vs.bits.ter) { + spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter); + } else { + spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr); + } + + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "csts"); + + spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy); + spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs); + + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "ns_data"); + + spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns)); + + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + + return 0; +} + +static void +bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +{ + /* No config per bdev needed */ +} + +static uint64_t +bdev_nvme_get_spin_time(struct spdk_io_channel *ch) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + uint64_t spin_time; + + if (!nvme_ch->collect_spin_stat) { + return 0; + } + + if (nvme_ch->end_ticks != 0) { + nvme_ch->spin_ticks += (nvme_ch->end_ticks - nvme_ch->start_ticks); + nvme_ch->end_ticks = 0; + } + + spin_time = (nvme_ch->spin_ticks * 1000000ULL) / spdk_get_ticks_hz(); + nvme_ch->start_ticks = 0; + nvme_ch->spin_ticks = 0; + + return spin_time; +} + +static const struct spdk_bdev_fn_table nvmelib_fn_table = { + .destruct = bdev_nvme_destruct, + .submit_request = bdev_nvme_submit_request, + .io_type_supported = bdev_nvme_io_type_supported, + .get_io_channel = bdev_nvme_get_io_channel, + .dump_info_json = bdev_nvme_dump_info_json, + .write_config_json = bdev_nvme_write_config_json, + .get_spin_time = bdev_nvme_get_spin_time, +}; + +static int +nvme_ctrlr_create_bdev(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid) +{ + struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr; + struct nvme_bdev *bdev; + struct spdk_nvme_ns *ns; + const struct spdk_uuid *uuid; + const struct spdk_nvme_ctrlr_data *cdata; + int rc; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (!ns) { + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nsid); + return -EINVAL; + } + + bdev = &nvme_ctrlr->bdevs[nsid - 1]; + bdev->id = nsid; + + bdev->nvme_ctrlr = nvme_ctrlr; + bdev->ns = ns; + nvme_ctrlr->ref++; + + bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_ctrlr->name, spdk_nvme_ns_get_id(ns)); + if (!bdev->disk.name) { + nvme_ctrlr->ref--; + memset(bdev, 0, sizeof(*bdev)); + return -ENOMEM; + } + bdev->disk.product_name = "NVMe disk"; + + bdev->disk.write_cache = 0; + if (cdata->vwc.present) { + /* Enable if the Volatile Write Cache exists */ + bdev->disk.write_cache = 1; + } + bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns); + bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); + bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns); + + uuid = spdk_nvme_ns_get_uuid(ns); + if (uuid != NULL) { + bdev->disk.uuid = *uuid; + } + + bdev->disk.ctxt = bdev; + bdev->disk.fn_table = &nvmelib_fn_table; + bdev->disk.module = &nvme_if; + rc = spdk_bdev_register(&bdev->disk); + if (rc) { + free(bdev->disk.name); + nvme_ctrlr->ref--; + memset(bdev, 0, sizeof(*bdev)); + return rc; + } + bdev->active = true; + + return 0; +} + + +static bool +hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr); + + return true; +} + +static struct nvme_ctrlr * +nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid) +{ + struct nvme_ctrlr *nvme_ctrlr; + + TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->trid) == 0) { + return nvme_ctrlr; + } + } + + return NULL; +} + +static struct nvme_ctrlr * +nvme_ctrlr_get_by_name(const char *name) +{ + struct nvme_ctrlr *nvme_ctrlr; + + if (name == NULL) { + return NULL; + } + + TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + if (strcmp(name, nvme_ctrlr->name) == 0) { + return nvme_ctrlr; + } + } + + return NULL; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + struct nvme_probe_ctx *ctx = cb_ctx; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr); + + if (nvme_ctrlr_get(trid)) { + SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", + trid->traddr); + return false; + } + + if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { + bool claim_device = false; + size_t i; + + for (i = 0; i < ctx->count; i++) { + if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { + claim_device = true; + break; + } + } + + if (!claim_device) { + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr); + return false; + } + } + + if (ctx->hostnqn) { + snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); + } + + return true; +} + +static void +spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvme_ctrlr *ctrlr = ctx; + int rc; + + if (spdk_nvme_cpl_is_error(cpl)) { + SPDK_WARNLOG("Abort failed. Resetting controller.\n"); + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc) { + SPDK_ERRLOG("Resetting controller failed.\n"); + } + } +} + +static void +timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, uint16_t cid) +{ + int rc; + union spdk_nvme_csts_register csts; + + SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); + + csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); + if (csts.bits.cfs) { + SPDK_ERRLOG("Controller Fatal Status, reset required\n"); + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc) { + SPDK_ERRLOG("Resetting controller failed.\n"); + } + return; + } + + switch (g_opts.action_on_timeout) { + case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: + if (qpair) { + rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, + spdk_nvme_abort_cpl, ctrlr); + if (rc == 0) { + return; + } + + SPDK_ERRLOG("Unable to send abort. Resetting.\n"); + } + + /* FALLTHROUGH */ + case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc) { + SPDK_ERRLOG("Resetting controller failed.\n"); + } + break; + case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: + break; + } +} + +static void +nvme_ctrlr_deactivate_bdev(struct nvme_bdev *bdev) +{ + spdk_bdev_unregister(&bdev->disk, NULL, NULL); + bdev->active = false; +} + +static void +nvme_ctrlr_update_ns_bdevs(struct nvme_ctrlr *nvme_ctrlr) +{ + struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr; + uint32_t i; + struct nvme_bdev *bdev; + + for (i = 0; i < nvme_ctrlr->num_ns; i++) { + uint32_t nsid = i + 1; + + bdev = &nvme_ctrlr->bdevs[i]; + if (!bdev->active && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { + SPDK_NOTICELOG("NSID %u to be added\n", nsid); + nvme_ctrlr_create_bdev(nvme_ctrlr, nsid); + } + + if (bdev->active && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { + SPDK_NOTICELOG("NSID %u Bdev %s is removed\n", nsid, bdev->disk.name); + nvme_ctrlr_deactivate_bdev(bdev); + } + } + +} + +static void +aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_ctrlr *nvme_ctrlr = arg; + union spdk_nvme_async_event_completion event; + + if (spdk_nvme_cpl_is_error(cpl)) { + SPDK_WARNLOG("AER request execute failed"); + return; + } + + event.raw = cpl->cdw0; + if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) && + (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) { + nvme_ctrlr_update_ns_bdevs(nvme_ctrlr); + } +} + +static int +create_ctrlr(struct spdk_nvme_ctrlr *ctrlr, + const char *name, + const struct spdk_nvme_transport_id *trid) +{ + struct nvme_ctrlr *nvme_ctrlr; + + nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr)); + if (nvme_ctrlr == NULL) { + SPDK_ERRLOG("Failed to allocate device struct\n"); + return -ENOMEM; + } + nvme_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + nvme_ctrlr->bdevs = calloc(nvme_ctrlr->num_ns, sizeof(struct nvme_bdev)); + if (!nvme_ctrlr->bdevs) { + SPDK_ERRLOG("Failed to allocate block devices struct\n"); + free(nvme_ctrlr); + return -ENOMEM; + } + + nvme_ctrlr->adminq_timer_poller = NULL; + nvme_ctrlr->ctrlr = ctrlr; + nvme_ctrlr->ref = 0; + nvme_ctrlr->trid = *trid; + nvme_ctrlr->name = strdup(name); + if (nvme_ctrlr->name == NULL) { + free(nvme_ctrlr->bdevs); + free(nvme_ctrlr); + return -ENOMEM; + } + + spdk_io_device_register(ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb, + sizeof(struct nvme_io_channel), + name); + + if (nvme_ctrlr_create_bdevs(nvme_ctrlr) != 0) { + spdk_io_device_unregister(ctrlr, bdev_nvme_unregister_cb); + free(nvme_ctrlr->bdevs); + free(nvme_ctrlr->name); + free(nvme_ctrlr); + return -1; + } + + nvme_ctrlr->adminq_timer_poller = spdk_poller_register(bdev_nvme_poll_adminq, ctrlr, + g_opts.nvme_adminq_poll_period_us); + + TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq); + + if (g_opts.timeout_us > 0 && g_opts.action_on_timeout != SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE) { + spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us, + timeout_cb, NULL); + } + + spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr); + + return 0; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct nvme_probe_ctx *ctx = cb_ctx; + char *name = NULL; + size_t i; + + if (ctx) { + for (i = 0; i < ctx->count; i++) { + if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { + name = strdup(ctx->names[i]); + break; + } + } + } else { + name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++); + } + if (!name) { + SPDK_ERRLOG("Failed to assign name to NVMe device\n"); + return; + } + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name); + + create_ctrlr(ctrlr, name, trid); + + free(name); +} + +static void +remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) +{ + uint32_t i; + struct nvme_ctrlr *nvme_ctrlr; + struct nvme_bdev *nvme_bdev; + + pthread_mutex_lock(&g_bdev_nvme_mutex); + TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + if (nvme_ctrlr->ctrlr == ctrlr) { + pthread_mutex_unlock(&g_bdev_nvme_mutex); + for (i = 0; i < nvme_ctrlr->num_ns; i++) { + uint32_t nsid = i + 1; + + nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1]; + assert(nvme_bdev->id == nsid); + if (nvme_bdev->active) { + spdk_bdev_unregister(&nvme_bdev->disk, NULL, NULL); + } + } + return; + } + } + pthread_mutex_unlock(&g_bdev_nvme_mutex); +} + +static int +bdev_nvme_hotplug(void *arg) +{ + if (spdk_nvme_probe(NULL, NULL, hotplug_probe_cb, attach_cb, remove_cb) != 0) { + SPDK_ERRLOG("spdk_nvme_probe() failed\n"); + } + + return -1; +} + +void +spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts) +{ + *opts = g_opts; +} + +int +spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts) +{ + if (g_bdev_nvme_init_thread != NULL) { + return -EPERM; + } + + g_opts = *opts; + + return 0; +} +struct set_nvme_hotplug_ctx { + uint64_t period_us; + bool enabled; + spdk_thread_fn fn; + void *fn_ctx; +}; + +static void +set_nvme_hotplug_period_cb(void *_ctx) +{ + struct set_nvme_hotplug_ctx *ctx = _ctx; + + spdk_poller_unregister(&g_hotplug_poller); + if (ctx->enabled) { + g_hotplug_poller = spdk_poller_register(bdev_nvme_hotplug, NULL, ctx->period_us); + } + + g_nvme_hotplug_poll_period_us = ctx->period_us; + g_nvme_hotplug_enabled = ctx->enabled; + if (ctx->fn) { + ctx->fn(ctx->fn_ctx); + } + + free(ctx); +} + +int +spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx) +{ + struct set_nvme_hotplug_ctx *ctx; + + if (enabled == true && !spdk_process_is_primary()) { + return -EPERM; + } + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) { + return -ENOMEM; + } + + period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us; + ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX); + ctx->enabled = enabled; + ctx->fn = cb; + ctx->fn_ctx = cb_ctx; + + spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx); + return 0; +} + +int +spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid, + const char *base_name, + const char **names, size_t *count, + const char *hostnqn) +{ + struct nvme_probe_ctx *probe_ctx; + struct nvme_ctrlr *nvme_ctrlr; + struct nvme_bdev *nvme_bdev; + uint32_t i, nsid; + size_t j; + + if (nvme_ctrlr_get(trid) != NULL) { + SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr); + return -1; + } + + probe_ctx = calloc(1, sizeof(*probe_ctx)); + if (probe_ctx == NULL) { + SPDK_ERRLOG("Failed to allocate probe_ctx\n"); + return -1; + } + + probe_ctx->count = 1; + probe_ctx->trids[0] = *trid; + probe_ctx->names[0] = base_name; + probe_ctx->hostnqn = hostnqn; + if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) { + SPDK_ERRLOG("Failed to probe for new devices\n"); + free(probe_ctx); + return -1; + } + + nvme_ctrlr = nvme_ctrlr_get(trid); + if (!nvme_ctrlr) { + SPDK_ERRLOG("Failed to find new NVMe controller\n"); + free(probe_ctx); + return -1; + } + + /* + * Report the new bdevs that were created in this call. + * There can be more than one bdev per NVMe controller since one bdev is created per namespace. + */ + j = 0; + for (i = 0; i < nvme_ctrlr->num_ns; i++) { + nsid = i + 1; + nvme_bdev = &nvme_ctrlr->bdevs[nsid - 1]; + if (!nvme_bdev->active) { + continue; + } + assert(nvme_bdev->id == nsid); + if (j < *count) { + names[j] = nvme_bdev->disk.name; + j++; + } else { + SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %zu. Unable to return all names of created bdevs\n", + *count); + free(probe_ctx); + return -1; + } + } + + *count = j; + + free(probe_ctx); + return 0; +} + +int +spdk_bdev_nvme_delete(const char *name) +{ + struct nvme_ctrlr *nvme_ctrlr = NULL; + + if (name == NULL) { + return -EINVAL; + } + + nvme_ctrlr = nvme_ctrlr_get_by_name(name); + if (nvme_ctrlr == NULL) { + SPDK_ERRLOG("Failed to find NVMe controller\n"); + return -ENODEV; + } + + remove_cb(NULL, nvme_ctrlr->ctrlr); + return 0; +} + +static int +bdev_nvme_library_init(void) +{ + struct spdk_conf_section *sp; + const char *val; + int rc = 0; + int64_t intval = 0; + size_t i; + struct nvme_probe_ctx *probe_ctx = NULL; + int retry_count; + uint32_t local_nvme_num = 0; + int64_t hotplug_period; + bool hotplug_enabled = g_nvme_hotplug_enabled; + + g_bdev_nvme_init_thread = spdk_get_thread(); + + sp = spdk_conf_find_section(NULL, "Nvme"); + if (sp == NULL) { + goto end; + } + + probe_ctx = calloc(1, sizeof(*probe_ctx)); + if (probe_ctx == NULL) { + SPDK_ERRLOG("Failed to allocate probe_ctx\n"); + rc = -1; + goto end; + } + + if ((retry_count = spdk_conf_section_get_intval(sp, "RetryCount")) < 0) { + if ((retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount")) < 0) { + retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT; + } else { + SPDK_WARNLOG("NvmeRetryCount was renamed to RetryCount\n"); + SPDK_WARNLOG("Please update your configuration file\n"); + } + } + + g_opts.retry_count = retry_count; + + val = spdk_conf_section_get_val(sp, "TimeoutUsec"); + if (val != NULL) { + intval = strtoll(val, NULL, 10); + if (intval == LLONG_MIN || intval == LLONG_MAX) { + SPDK_ERRLOG("Invalid TimeoutUsec value\n"); + rc = -1; + goto end; + } else if (intval < 0) { + intval = 0; + } + } + + g_opts.timeout_us = intval; + + if (g_opts.timeout_us > 0) { + val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); + if (val != NULL) { + if (!strcasecmp(val, "Reset")) { + g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; + } else if (!strcasecmp(val, "Abort")) { + g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; + } + } else { + /* Handle old name for backward compatibility */ + val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout"); + if (val) { + SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n"); + SPDK_WARNLOG("Please update your configuration file\n"); + + if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) { + g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; + } + } + } + } + + intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); + if (intval > 0) { + g_opts.nvme_adminq_poll_period_us = intval; + } + + if (spdk_process_is_primary()) { + hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false); + } + + hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate"); + + g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN"); + probe_ctx->hostnqn = g_nvme_hostnqn; + + for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { + val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); + if (val == NULL) { + break; + } + + rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); + if (rc < 0) { + SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); + rc = -1; + goto end; + } + + val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); + if (val == NULL) { + SPDK_ERRLOG("No name provided for TransportID\n"); + rc = -1; + goto end; + } + + probe_ctx->names[i] = val; + probe_ctx->count++; + + if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ctrlr_opts opts; + + if (nvme_ctrlr_get(&probe_ctx->trids[i])) { + SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", + probe_ctx->trids[i].traddr); + rc = -1; + goto end; + } + + if (probe_ctx->trids[i].subnqn[0] == '\0') { + SPDK_ERRLOG("Need to provide subsystem nqn\n"); + rc = -1; + goto end; + } + + spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts)); + + if (probe_ctx->hostnqn != NULL) { + snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn); + } + + ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts)); + if (ctrlr == NULL) { + SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n", + probe_ctx->trids[i].traddr); + rc = -1; + goto end; + } + + rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i]); + if (rc) { + goto end; + } + } else { + local_nvme_num++; + } + } + + if (local_nvme_num > 0) { + /* used to probe local NVMe device */ + if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, NULL)) { + rc = -1; + goto end; + } + + for (i = 0; i < probe_ctx->count; i++) { + if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { + continue; + } + + if (!nvme_ctrlr_get(&probe_ctx->trids[i])) { + SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); + SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); + } + } + } + + rc = spdk_bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL); + if (rc) { + SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc)); + rc = -1; + } +end: + spdk_nvme_retry_count = g_opts.retry_count; + + free(probe_ctx); + return rc; +} + +static void +bdev_nvme_library_fini(void) +{ + spdk_poller_unregister(&g_hotplug_poller); +} + +static int +nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr) +{ + int rc; + int bdev_created = 0; + uint32_t nsid; + + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(nvme_ctrlr->ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(nvme_ctrlr->ctrlr, nsid)) { + rc = nvme_ctrlr_create_bdev(nvme_ctrlr, nsid); + if (rc == 0) { + bdev_created++; + } + } + + return (bdev_created > 0) ? 0 : -1; +} + +static void +bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref); + + spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc); +} + +static void +bdev_nvme_admin_passthru_completion(void *ctx) +{ + struct nvme_bdev_io *bio = ctx; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); + + spdk_bdev_io_complete_nvme_status(bdev_io, + bio->cpl.status.sct, bio->cpl.status.sc); +} + +static void +bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_bdev_io *bio = ref; + + bio->cpl = *cpl; + spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio); +} + +static void +bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) +{ + struct nvme_bdev_io *bio = ref; + struct iovec *iov; + + bio->iov_offset = sgl_offset; + for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) { + iov = &bio->iovs[bio->iovpos]; + if (bio->iov_offset < iov->iov_len) { + break; + } + + bio->iov_offset -= iov->iov_len; + } +} + +static int +bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) +{ + struct nvme_bdev_io *bio = ref; + struct iovec *iov; + + assert(bio->iovpos < bio->iovcnt); + + iov = &bio->iovs[bio->iovpos]; + + *address = iov->iov_base; + *length = iov->iov_len; + + if (bio->iov_offset) { + assert(bio->iov_offset <= iov->iov_len); + *address += bio->iov_offset; + *length -= bio->iov_offset; + } + + bio->iov_offset += *length; + if (bio->iov_offset == iov->iov_len) { + bio->iovpos++; + bio->iov_offset = 0; + } + + return 0; +} + +static int +bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, + struct nvme_bdev_io *bio, + int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, + uint64_t lba) +{ + int rc; + + bio->iovs = iov; + bio->iovcnt = iovcnt; + bio->iovpos = 0; + bio->iov_offset = 0; + + if (direction == BDEV_DISK_READ) { + rc = spdk_nvme_ns_cmd_readv(bdev->ns, qpair, lba, + lba_count, bdev_nvme_queued_done, bio, 0, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } else { + rc = spdk_nvme_ns_cmd_writev(bdev->ns, qpair, lba, + lba_count, bdev_nvme_queued_done, bio, 0, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } + + if (rc != 0 && rc != -ENOMEM) { + SPDK_ERRLOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc); + } + return rc; +} + +static int +bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + uint64_t offset_blocks, + uint64_t num_blocks) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES]; + struct spdk_nvme_dsm_range *range; + uint64_t offset, remaining; + uint64_t num_ranges_u64; + uint16_t num_ranges; + int rc; + + num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) / + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; + if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) { + SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks); + return -EINVAL; + } + num_ranges = (uint16_t)num_ranges_u64; + + offset = offset_blocks; + remaining = num_blocks; + range = &dsm_ranges[0]; + + /* Fill max-size ranges until the remaining blocks fit into one range */ + while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { + range->attributes.raw = 0; + range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; + range->starting_lba = offset; + + offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; + remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS; + range++; + } + + /* Final range describes the remaining blocks */ + range->attributes.raw = 0; + range->length = remaining; + range->starting_lba = offset; + + rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair, + SPDK_NVME_DSM_ATTR_DEALLOCATE, + dsm_ranges, num_ranges, + bdev_nvme_queued_done, bio); + + return rc; +} + +static int +bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) +{ + uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr); + + if (nbytes > max_xfer_size) { + SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); + return -EINVAL; + } + + bio->orig_thread = spdk_io_channel_get_thread(ch); + + return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_ctrlr->ctrlr, cmd, buf, + (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio); +} + +static int +bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr); + + if (nbytes > max_xfer_size) { + SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); + return -EINVAL; + } + + /* + * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, + * so fill it out automatically. + */ + cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns); + + return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, + (uint32_t)nbytes, bdev_nvme_queued_done, bio); +} + +static int +bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->ns); + uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_ctrlr->ctrlr); + + if (nbytes > max_xfer_size) { + SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size); + return -EINVAL; + } + + if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->ns)) { + SPDK_ERRLOG("invalid meta data buffer size\n"); + return -EINVAL; + } + + /* + * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, + * so fill it out automatically. + */ + cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns); + + return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, + (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); +} + +static void +bdev_nvme_get_spdk_running_config(FILE *fp) +{ + struct nvme_ctrlr *nvme_ctrlr; + + fprintf(fp, "\n[Nvme]"); + fprintf(fp, "\n" + "# NVMe Device Whitelist\n" + "# Users may specify which NVMe devices to claim by their transport id.\n" + "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n" + "# The second argument is the assigned name, which can be referenced from\n" + "# other sections in the configuration file. For NVMe devices, a namespace\n" + "# is automatically appended to each name in the format <YourName>nY, where\n" + "# Y is the NSID (starts at 1).\n"); + + TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + const char *trtype; + + trtype = spdk_nvme_transport_id_trtype_str(nvme_ctrlr->trid.trtype); + if (!trtype) { + continue; + } + + if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n", + trtype, + nvme_ctrlr->trid.traddr, nvme_ctrlr->name); + } else { + const char *adrfam; + + adrfam = spdk_nvme_transport_id_adrfam_str(nvme_ctrlr->trid.adrfam); + + if (adrfam) { + fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n", + trtype, adrfam, + nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid, + nvme_ctrlr->trid.subnqn, nvme_ctrlr->name); + } else { + fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s\n", + trtype, + nvme_ctrlr->trid.traddr, nvme_ctrlr->trid.trsvcid, + nvme_ctrlr->trid.subnqn, nvme_ctrlr->name); + } + + } + } + + fprintf(fp, "\n" + "# The number of attempts per I/O when an I/O fails. Do not include\n" + "# this key to get the default behavior.\n"); + fprintf(fp, "RetryCount %d\n", spdk_nvme_retry_count); + fprintf(fp, "\n" + "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n"); + fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us); + + fprintf(fp, "\n" + "# Action to take on command time out. Only valid when Timeout is greater\n" + "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n" + "# the command, or 'None' to just print a message but do nothing.\n" + "# Admin command timeouts will always result in a reset.\n"); + switch (g_opts.action_on_timeout) { + case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: + fprintf(fp, "ActionOnTimeout None\n"); + break; + case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: + fprintf(fp, "ActionOnTimeout Reset\n"); + break; + case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT: + fprintf(fp, "ActionOnTimeout Abort\n"); + break; + } + + fprintf(fp, "\n" + "# Set how often the admin queue is polled for asynchronous events.\n" + "# Units in microseconds.\n"); + fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us); + fprintf(fp, "\n" + "# Disable handling of hotplug (runtime insert and remove) events,\n" + "# users can set to Yes if want to enable it.\n" + "# Default: No\n"); + fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No"); + fprintf(fp, "\n" + "# Set how often the hotplug is processed for insert and remove events." + "# Units in microseconds.\n"); + fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us); + if (g_nvme_hostnqn) { + fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn); + } + + fprintf(fp, "\n"); +} + +static int +bdev_nvme_config_json(struct spdk_json_write_ctx *w) +{ + struct nvme_ctrlr *nvme_ctrlr; + struct spdk_nvme_transport_id *trid; + const char *action; + + if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) { + action = "reset"; + } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) { + action = "abort"; + } else { + action = "none"; + } + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "method", "set_bdev_nvme_options"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "action_on_timeout", action); + spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us); + spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count); + spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + + pthread_mutex_lock(&g_bdev_nvme_mutex); + TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) { + trid = &nvme_ctrlr->trid; + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "method", "construct_nvme_bdev"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "name", nvme_ctrlr->name); + spdk_bdev_nvme_dump_trid_json(trid, w); + + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + } + + /* Dump as last parameter to give all NVMe bdevs chance to be constructed + * before enabling hotplug poller. + */ + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "set_bdev_nvme_hotplug"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us); + spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); + + pthread_mutex_unlock(&g_bdev_nvme_mutex); + return 0; +} + +struct spdk_nvme_ctrlr * +spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev) +{ + if (!bdev || bdev->module != &nvme_if) { + return NULL; + } + + return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_ctrlr->ctrlr; +} + +SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME) diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme.h b/src/spdk/lib/bdev/nvme/bdev_nvme.h new file mode 100644 index 00000000..b8c458e8 --- /dev/null +++ b/src/spdk/lib/bdev/nvme/bdev_nvme.h @@ -0,0 +1,112 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BDEV_NVME_H +#define SPDK_BDEV_NVME_H + +#include "spdk/stdinc.h" + +#include "spdk/queue.h" +#include "spdk/nvme.h" +#include "spdk/bdev_module.h" + +#define NVME_MAX_CONTROLLERS 1024 + +enum spdk_bdev_timeout_action { + SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, + SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, + SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT, +}; + +struct spdk_bdev_nvme_opts { + enum spdk_bdev_timeout_action action_on_timeout; + uint64_t timeout_us; + uint32_t retry_count; + uint64_t nvme_adminq_poll_period_us; +}; + +struct nvme_ctrlr { + /** + * points to pinned, physically contiguous memory region; + * contains 4KB IDENTIFY structure for controller which is + * target for CONTROLLER IDENTIFY command during initialization + */ + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_transport_id trid; + char *name; + int ref; + uint32_t num_ns; + /** Array of bdevs indexed by nsid - 1 */ + struct nvme_bdev *bdevs; + + struct spdk_poller *adminq_timer_poller; + + /** linked list pointer for device list */ + TAILQ_ENTRY(nvme_ctrlr) tailq; +}; + +struct nvme_bdev { + struct spdk_bdev disk; + struct nvme_ctrlr *nvme_ctrlr; + uint32_t id; + bool active; + struct spdk_nvme_ns *ns; +}; + +void spdk_bdev_nvme_dump_trid_json(struct spdk_nvme_transport_id *trid, + struct spdk_json_write_ctx *w); + +struct spdk_nvme_qpair *spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); +struct nvme_ctrlr *spdk_bdev_nvme_lookup_ctrlr(const char *ctrlr_name); +struct nvme_ctrlr *spdk_bdev_nvme_first_ctrlr(void); +struct nvme_ctrlr *spdk_bdev_nvme_next_ctrlr(struct nvme_ctrlr *prev); +void spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts); +int spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts); +int spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_thread_fn cb, void *cb_ctx); + +int spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid, + const char *base_name, + const char **names, size_t *count, + const char *hostnqn); +struct spdk_nvme_ctrlr *spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); + +/** + * Delete NVMe controller with all bdevs on top of it. + * Requires to pass name of NVMe controller. + * + * \param name NVMe controller name + * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found + */ +int spdk_bdev_nvme_delete(const char *name); + +#endif // SPDK_BDEV_NVME_H diff --git a/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c new file mode 100644 index 00000000..0312a756 --- /dev/null +++ b/src/spdk/lib/bdev/nvme/bdev_nvme_rpc.c @@ -0,0 +1,740 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "bdev_nvme.h" + +#include "spdk/string.h" +#include "spdk/rpc.h" +#include "spdk/util.h" + +#include "spdk_internal/log.h" +#include "spdk/bdev_module.h" + +struct open_descriptors { + void *desc; + struct spdk_bdev *bdev; + TAILQ_ENTRY(open_descriptors) tqlst; +}; +typedef TAILQ_HEAD(, open_descriptors) open_descriptors_t; + +static int +rpc_decode_action_on_timeout(const struct spdk_json_val *val, void *out) +{ + enum spdk_bdev_timeout_action *action = out; + + if (spdk_json_strequal(val, "none") == true) { + *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE; + } else if (spdk_json_strequal(val, "abort") == true) { + *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; + } else if (spdk_json_strequal(val, "reset") == true) { + *action = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; + } else { + SPDK_NOTICELOG("Invalid parameter value: action_on_timeout\n"); + return -EINVAL; + } + + return 0; +} + +static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = { + {"action_on_timeout", offsetof(struct spdk_bdev_nvme_opts, action_on_timeout), rpc_decode_action_on_timeout, true}, + {"timeout_us", offsetof(struct spdk_bdev_nvme_opts, timeout_us), spdk_json_decode_uint64, true}, + {"retry_count", offsetof(struct spdk_bdev_nvme_opts, retry_count), spdk_json_decode_uint32, true}, + {"nvme_adminq_poll_period_us", offsetof(struct spdk_bdev_nvme_opts, nvme_adminq_poll_period_us), spdk_json_decode_uint64, true}, +}; + +static void +spdk_rpc_set_bdev_nvme_options(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct spdk_bdev_nvme_opts opts; + struct spdk_json_write_ctx *w; + int rc; + + spdk_bdev_nvme_get_opts(&opts); + if (params && spdk_json_decode_object(params, rpc_bdev_nvme_options_decoders, + SPDK_COUNTOF(rpc_bdev_nvme_options_decoders), + &opts)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + rc = -EINVAL; + goto invalid; + } + + rc = spdk_bdev_nvme_set_opts(&opts); + if (rc) { + goto invalid; + } + + w = spdk_jsonrpc_begin_result(request); + if (w != NULL) { + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); + } + + return; +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc)); +} +SPDK_RPC_REGISTER("set_bdev_nvme_options", spdk_rpc_set_bdev_nvme_options, SPDK_RPC_STARTUP) + +struct rpc_bdev_nvme_hotplug { + bool enabled; + uint64_t period_us; +}; + +static const struct spdk_json_object_decoder rpc_bdev_nvme_hotplug_decoders[] = { + {"enable", offsetof(struct rpc_bdev_nvme_hotplug, enabled), spdk_json_decode_bool, false}, + {"period_us", offsetof(struct rpc_bdev_nvme_hotplug, period_us), spdk_json_decode_uint64, true}, +}; + +static void +rpc_set_bdev_nvme_hotplug_done(void *ctx) +{ + struct spdk_jsonrpc_request *request = ctx; + struct spdk_json_write_ctx *w = spdk_jsonrpc_begin_result(request); + + if (w != NULL) { + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); + } +} + +static void +spdk_rpc_set_bdev_nvme_hotplug(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_nvme_hotplug req = {false, 0}; + int rc; + + if (spdk_json_decode_object(params, rpc_bdev_nvme_hotplug_decoders, + SPDK_COUNTOF(rpc_bdev_nvme_hotplug_decoders), &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + rc = -EINVAL; + goto invalid; + } + + rc = spdk_bdev_nvme_set_hotplug(req.enabled, req.period_us, rpc_set_bdev_nvme_hotplug_done, + request); + if (rc) { + goto invalid; + } + + return; +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, spdk_strerror(-rc)); +} +SPDK_RPC_REGISTER("set_bdev_nvme_hotplug", spdk_rpc_set_bdev_nvme_hotplug, SPDK_RPC_RUNTIME) + +struct rpc_construct_nvme { + char *name; + char *trtype; + char *adrfam; + char *traddr; + char *trsvcid; + char *subnqn; + char *hostnqn; +}; + +static void +free_rpc_construct_nvme(struct rpc_construct_nvme *req) +{ + free(req->name); + free(req->trtype); + free(req->adrfam); + free(req->traddr); + free(req->trsvcid); + free(req->subnqn); + free(req->hostnqn); +} + +static const struct spdk_json_object_decoder rpc_construct_nvme_decoders[] = { + {"name", offsetof(struct rpc_construct_nvme, name), spdk_json_decode_string}, + {"trtype", offsetof(struct rpc_construct_nvme, trtype), spdk_json_decode_string}, + {"traddr", offsetof(struct rpc_construct_nvme, traddr), spdk_json_decode_string}, + + {"adrfam", offsetof(struct rpc_construct_nvme, adrfam), spdk_json_decode_string, true}, + {"trsvcid", offsetof(struct rpc_construct_nvme, trsvcid), spdk_json_decode_string, true}, + {"subnqn", offsetof(struct rpc_construct_nvme, subnqn), spdk_json_decode_string, true}, + {"hostnqn", offsetof(struct rpc_construct_nvme, hostnqn), spdk_json_decode_string, true} +}; + +#define NVME_MAX_BDEVS_PER_RPC 128 + +static void +spdk_rpc_construct_nvme_bdev(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_construct_nvme req = {}; + struct spdk_json_write_ctx *w; + struct spdk_nvme_transport_id trid = {}; + const char *names[NVME_MAX_BDEVS_PER_RPC]; + size_t count; + size_t i; + int rc; + + if (spdk_json_decode_object(params, rpc_construct_nvme_decoders, + SPDK_COUNTOF(rpc_construct_nvme_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + goto invalid; + } + + /* Parse trtype */ + rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, req.trtype); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse trtype: %s\n", req.trtype); + goto invalid; + } + + /* Parse traddr */ + snprintf(trid.traddr, sizeof(trid.traddr), "%s", req.traddr); + + /* Parse adrfam */ + if (req.adrfam) { + rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, req.adrfam); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse adrfam: %s\n", req.adrfam); + goto invalid; + } + } + + /* Parse trsvcid */ + if (req.trsvcid) { + snprintf(trid.trsvcid, sizeof(trid.trsvcid), "%s", req.trsvcid); + } + + /* Parse subnqn */ + if (req.subnqn) { + snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", req.subnqn); + } + + count = NVME_MAX_BDEVS_PER_RPC; + if (spdk_bdev_nvme_create(&trid, req.name, names, &count, req.hostnqn)) { + goto invalid; + } + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + free_rpc_construct_nvme(&req); + return; + } + + spdk_json_write_array_begin(w); + for (i = 0; i < count; i++) { + spdk_json_write_string(w, names[i]); + } + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); + + free_rpc_construct_nvme(&req); + + return; + +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free_rpc_construct_nvme(&req); +} +SPDK_RPC_REGISTER("construct_nvme_bdev", spdk_rpc_construct_nvme_bdev, SPDK_RPC_RUNTIME) + +static void +spdk_rpc_dump_nvme_controller_info(struct spdk_json_write_ctx *w, + struct nvme_ctrlr *nvme_ctrlr) +{ + struct spdk_nvme_transport_id *trid; + + trid = &nvme_ctrlr->trid; + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "name", nvme_ctrlr->name); + + spdk_json_write_named_object_begin(w, "trid"); + spdk_bdev_nvme_dump_trid_json(trid, w); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); +} + +struct rpc_get_nvme_controllers { + char *name; +}; + +static void +free_rpc_get_nvme_controllers(struct rpc_get_nvme_controllers *r) +{ + free(r->name); +} + +static const struct spdk_json_object_decoder rpc_get_nvme_controllers_decoders[] = { + {"name", offsetof(struct rpc_get_nvme_controllers, name), spdk_json_decode_string, true}, +}; + +static void +spdk_rpc_get_nvme_controllers(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_get_nvme_controllers req = {}; + struct spdk_json_write_ctx *w; + struct nvme_ctrlr *ctrlr = NULL; + + if (params && spdk_json_decode_object(params, rpc_get_nvme_controllers_decoders, + SPDK_COUNTOF(rpc_get_nvme_controllers_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + goto invalid; + } + + if (req.name) { + ctrlr = spdk_bdev_nvme_lookup_ctrlr(req.name); + if (ctrlr == NULL) { + SPDK_ERRLOG("ctrlr '%s' does not exist\n", req.name); + goto invalid; + } + } + + free_rpc_get_nvme_controllers(&req); + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + return; + } + + spdk_json_write_array_begin(w); + + if (ctrlr != NULL) { + spdk_rpc_dump_nvme_controller_info(w, ctrlr); + } else { + for (ctrlr = spdk_bdev_nvme_first_ctrlr(); ctrlr; ctrlr = spdk_bdev_nvme_next_ctrlr(ctrlr)) { + spdk_rpc_dump_nvme_controller_info(w, ctrlr); + } + } + + spdk_json_write_array_end(w); + + spdk_jsonrpc_end_result(request, w); + + return; + +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + + free_rpc_get_nvme_controllers(&req); +} +SPDK_RPC_REGISTER("get_nvme_controllers", spdk_rpc_get_nvme_controllers, SPDK_RPC_RUNTIME) + +struct rpc_delete_nvme { + char *name; +}; + +static void +free_rpc_delete_nvme(struct rpc_delete_nvme *req) +{ + free(req->name); +} + +static const struct spdk_json_object_decoder rpc_delete_nvme_decoders[] = { + {"name", offsetof(struct rpc_delete_nvme, name), spdk_json_decode_string}, +}; + +static void +spdk_rpc_delete_nvme_ctrlr(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_delete_nvme req = {NULL}; + struct spdk_json_write_ctx *w; + int rc = 0; + + if (spdk_json_decode_object(params, rpc_delete_nvme_decoders, + SPDK_COUNTOF(rpc_delete_nvme_decoders), + &req)) { + rc = -EINVAL; + goto invalid; + } + + rc = spdk_bdev_nvme_delete(req.name); + if (rc != 0) { + goto invalid; + } + + free_rpc_delete_nvme(&req); + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + return; + } + + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); + return; + +invalid: + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + spdk_strerror(-rc)); + free_rpc_delete_nvme(&req); +} +SPDK_RPC_REGISTER("delete_nvme_controller", spdk_rpc_delete_nvme_ctrlr, SPDK_RPC_RUNTIME) + +struct rpc_apply_firmware { + char *filename; + char *bdev_name; +}; + +static void +free_rpc_apply_firmware(struct rpc_apply_firmware *req) +{ + free(req->filename); + free(req->bdev_name); +} + +static const struct spdk_json_object_decoder rpc_apply_firmware_decoders[] = { + {"filename", offsetof(struct rpc_apply_firmware, filename), spdk_json_decode_string}, + {"bdev_name", offsetof(struct rpc_apply_firmware, bdev_name), spdk_json_decode_string}, +}; + +struct firmware_update_info { + void *fw_image; + void *p; + unsigned int size; + unsigned int size_remaining; + unsigned int offset; + unsigned int transfer; + + void *desc; + struct spdk_io_channel *ch; + struct spdk_jsonrpc_request *request; + struct spdk_nvme_ctrlr *ctrlr; + open_descriptors_t desc_head; + struct rpc_apply_firmware *req; +}; + +static void +apply_firmware_cleanup(void *cb_arg) +{ + struct open_descriptors *opt, *tmp; + struct firmware_update_info *firm_ctx = cb_arg; + + if (!firm_ctx) { + return; + } + + if (firm_ctx->fw_image) { + spdk_dma_free(firm_ctx->fw_image); + } + + if (firm_ctx->req) { + free_rpc_apply_firmware(firm_ctx->req); + free(firm_ctx->req); + } + TAILQ_FOREACH_SAFE(opt, &firm_ctx->desc_head, tqlst, tmp) { + TAILQ_REMOVE(&firm_ctx->desc_head, opt, tqlst); + spdk_bdev_close(opt->desc); + free(opt); + } + free(firm_ctx); +} + +static void +apply_firmware_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + int rc; + struct spdk_json_write_ctx *w; + struct firmware_update_info *firm_ctx = cb_arg; + + spdk_bdev_free_io(bdev_io); + + if (!success) { + spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "firmware commit failed."); + apply_firmware_cleanup(firm_ctx); + return; + } + + if ((rc = spdk_nvme_ctrlr_reset(firm_ctx->ctrlr)) != 0) { + spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Controller reset failed."); + apply_firmware_cleanup(firm_ctx); + return; + } + + if (!(w = spdk_jsonrpc_begin_result(firm_ctx->request))) { + apply_firmware_cleanup(firm_ctx); + return; + } + + spdk_json_write_string(w, "firmware commit succeeded. Controller reset in progress."); + spdk_jsonrpc_end_result(firm_ctx->request, w); + apply_firmware_cleanup(firm_ctx); +} + +static void +apply_firmware_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_nvme_cmd cmd = {}; + struct spdk_nvme_fw_commit fw_commit; + int slot = 0; + int rc; + struct firmware_update_info *firm_ctx = cb_arg; + enum spdk_nvme_fw_commit_action commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG; + + if (!success) { + spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "firmware download failed ."); + spdk_bdev_free_io(bdev_io); + apply_firmware_cleanup(firm_ctx); + return; + } + + firm_ctx->p += firm_ctx->transfer; + firm_ctx->offset += firm_ctx->transfer; + firm_ctx->size_remaining -= firm_ctx->transfer; + + switch (firm_ctx->size_remaining) { + case 0: + /* firmware download completed. Commit firmware */ + memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit)); + fw_commit.fs = slot; + fw_commit.ca = commit_action; + + cmd.opc = SPDK_NVME_OPC_FIRMWARE_COMMIT; + memcpy(&cmd.cdw10, &fw_commit, sizeof(uint32_t)); + rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, NULL, 0, + apply_firmware_complete_reset, firm_ctx); + if (rc) { + spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "firmware commit failed."); + spdk_bdev_free_io(bdev_io); + apply_firmware_cleanup(firm_ctx); + return; + } + break; + default: + firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096); + cmd.opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; + + cmd.cdw10 = (firm_ctx->transfer >> 2) - 1; + cmd.cdw11 = firm_ctx->offset >> 2; + rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, &cmd, firm_ctx->p, + firm_ctx->transfer, apply_firmware_complete, firm_ctx); + if (rc) { + spdk_jsonrpc_send_error_response(firm_ctx->request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "firmware download failed."); + spdk_bdev_free_io(bdev_io); + apply_firmware_cleanup(firm_ctx); + return; + } + break; + } +} + +static void +spdk_rpc_apply_nvme_firmware(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + int rc; + int fd = -1; + struct stat fw_stat; + struct spdk_nvme_ctrlr *ctrlr; + char msg[1024]; + struct spdk_bdev *bdev; + struct spdk_bdev *bdev2; + struct open_descriptors *opt; + struct spdk_bdev_desc *desc; + struct spdk_nvme_cmd *cmd; + struct firmware_update_info *firm_ctx; + + firm_ctx = malloc(sizeof(struct firmware_update_info)); + if (!firm_ctx) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation error."); + return; + } + firm_ctx->fw_image = NULL; + TAILQ_INIT(&firm_ctx->desc_head); + firm_ctx->request = request; + + firm_ctx->req = malloc(sizeof(struct rpc_apply_firmware)); + if (!firm_ctx->req) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation error."); + free(firm_ctx); + return; + } + + if (spdk_json_decode_object(params, rpc_apply_firmware_decoders, + SPDK_COUNTOF(rpc_apply_firmware_decoders), firm_ctx->req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed."); + free(firm_ctx->req); + free(firm_ctx); + return; + } + + if ((bdev = spdk_bdev_get_by_name(firm_ctx->req->bdev_name)) == NULL) { + snprintf(msg, sizeof(msg), "bdev %s were not found", firm_ctx->req->bdev_name); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg); + apply_firmware_cleanup(firm_ctx); + return; + } + + if ((ctrlr = spdk_bdev_nvme_get_ctrlr(bdev)) == NULL) { + snprintf(msg, sizeof(msg), "Controller information for %s were not found.", + firm_ctx->req->bdev_name); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg); + apply_firmware_cleanup(firm_ctx); + return; + } + firm_ctx->ctrlr = ctrlr; + + for (bdev2 = spdk_bdev_first(); bdev2; bdev2 = spdk_bdev_next(bdev2)) { + + if (spdk_bdev_nvme_get_ctrlr(bdev2) != ctrlr) { + continue; + } + + if (!(opt = malloc(sizeof(struct open_descriptors)))) { + snprintf(msg, sizeof(msg), "Memory allocation error."); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg); + apply_firmware_cleanup(firm_ctx); + return; + } + + if ((rc = spdk_bdev_open(bdev2, true, NULL, NULL, &desc)) != 0) { + snprintf(msg, sizeof(msg), "Device %s is in use.", firm_ctx->req->bdev_name); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, msg); + free(opt); + apply_firmware_cleanup(firm_ctx); + return; + } + + opt->desc = desc; + opt->bdev = bdev; + TAILQ_INSERT_TAIL(&firm_ctx->desc_head, opt, tqlst); + } + + /* + * find a descriptor associated with our bdev + */ + firm_ctx->desc = NULL; + TAILQ_FOREACH(opt, &firm_ctx->desc_head, tqlst) { + if (opt->bdev == bdev) { + firm_ctx->desc = opt->desc; + break; + } + } + + if (!firm_ctx->desc) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "No descriptor were found."); + apply_firmware_cleanup(firm_ctx); + return; + } + + firm_ctx->ch = spdk_bdev_get_io_channel(firm_ctx->desc); + if (!firm_ctx->ch) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "No channels were found."); + apply_firmware_cleanup(firm_ctx); + return; + } + + fd = open(firm_ctx->req->filename, O_RDONLY); + if (fd < 0) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "open file failed."); + apply_firmware_cleanup(firm_ctx); + return; + } + + rc = fstat(fd, &fw_stat); + if (rc < 0) { + close(fd); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "fstat failed."); + apply_firmware_cleanup(firm_ctx); + return; + } + + firm_ctx->size = fw_stat.st_size; + if (fw_stat.st_size % 4) { + close(fd); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Firmware image size is not multiple of 4."); + apply_firmware_cleanup(firm_ctx); + return; + } + + firm_ctx->fw_image = spdk_dma_zmalloc(firm_ctx->size, 4096, NULL); + if (!firm_ctx->fw_image) { + close(fd); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation error."); + apply_firmware_cleanup(firm_ctx); + return; + } + firm_ctx->p = firm_ctx->fw_image; + + if (read(fd, firm_ctx->p, firm_ctx->size) != ((ssize_t)(firm_ctx->size))) { + close(fd); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Read firmware image failed!"); + apply_firmware_cleanup(firm_ctx); + return; + } + close(fd); + + firm_ctx->offset = 0; + firm_ctx->size_remaining = firm_ctx->size; + firm_ctx->transfer = spdk_min(firm_ctx->size_remaining, 4096); + + cmd = malloc(sizeof(struct spdk_nvme_cmd)); + if (!cmd) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Memory allocation error."); + apply_firmware_cleanup(firm_ctx); + return; + } + memset(cmd, 0, sizeof(struct spdk_nvme_cmd)); + cmd->opc = SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; + + cmd->cdw10 = (firm_ctx->transfer >> 2) - 1; + cmd->cdw11 = firm_ctx->offset >> 2; + + rc = spdk_bdev_nvme_admin_passthru(firm_ctx->desc, firm_ctx->ch, cmd, firm_ctx->p, + firm_ctx->transfer, apply_firmware_complete, firm_ctx); + if (rc) { + free(cmd); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Read firmware image failed!"); + apply_firmware_cleanup(firm_ctx); + return; + } +} +SPDK_RPC_REGISTER("apply_nvme_firmware", spdk_rpc_apply_nvme_firmware, SPDK_RPC_RUNTIME) diff --git a/src/spdk/lib/bdev/nvme/nvme_rpc.c b/src/spdk/lib/bdev/nvme/nvme_rpc.c new file mode 100644 index 00000000..b49a7d42 --- /dev/null +++ b/src/spdk/lib/bdev/nvme/nvme_rpc.c @@ -0,0 +1,487 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" + +#include "bdev_nvme.h" +#include "spdk/base64.h" + +enum spdk_nvme_rpc_type { + NVME_ADMIN_CMD = 1, + NVME_IO_CMD, +}; + +struct rpc_send_nvme_cmd_req { + char *name; + int cmd_type; + int data_direction; + uint32_t timeout_ms; + uint32_t data_len; + uint32_t md_len; + + struct spdk_nvme_cmd *cmdbuf; + char *data; + char *md; +}; + +struct rpc_send_nvme_cmd_resp { + char *cpl_text; + char *data_text; + char *md_text; +}; + +struct rpc_send_nvme_cmd_ctx { + struct spdk_jsonrpc_request *jsonrpc_request; + struct rpc_send_nvme_cmd_req req; + struct rpc_send_nvme_cmd_resp resp; + struct nvme_ctrlr *nvme_ctrlr; + struct spdk_io_channel *ctrlr_io_ch; +}; + +static void +free_rpc_send_nvme_cmd_ctx(struct rpc_send_nvme_cmd_ctx *ctx) +{ + assert(ctx != NULL); + + free(ctx->req.name); + free(ctx->req.cmdbuf); + spdk_dma_free(ctx->req.data); + spdk_dma_free(ctx->req.md); + free(ctx->resp.cpl_text); + free(ctx->resp.data_text); + free(ctx->resp.md_text); + free(ctx); +} + +static int +rpc_send_nvme_cmd_resp_construct(struct rpc_send_nvme_cmd_resp *resp, + struct rpc_send_nvme_cmd_req *req, + const struct spdk_nvme_cpl *cpl) +{ + resp->cpl_text = malloc(spdk_base64_get_encoded_strlen(sizeof(*cpl)) + 1); + if (!resp->cpl_text) { + return -ENOMEM; + } + spdk_base64_urlsafe_encode(resp->cpl_text, cpl, sizeof(*cpl)); + + if (req->data_direction == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { + if (req->data_len) { + resp->data_text = malloc(spdk_base64_get_encoded_strlen(req->data_len) + 1); + if (!resp->data_text) { + return -ENOMEM; + } + spdk_base64_urlsafe_encode(resp->data_text, req->data, req->data_len); + } + if (req->md_len) { + resp->md_text = malloc(spdk_base64_get_encoded_strlen(req->md_len) + 1); + if (!resp->md_text) { + return -ENOMEM; + } + spdk_base64_urlsafe_encode(resp->md_text, req->md, req->md_len); + } + } + + return 0; +} + +static void +spdk_rpc_send_nvme_cmd_complete(struct rpc_send_nvme_cmd_ctx *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_jsonrpc_request *request = ctx->jsonrpc_request; + struct spdk_json_write_ctx *w; + int ret; + + ret = rpc_send_nvme_cmd_resp_construct(&ctx->resp, &ctx->req, cpl); + if (ret) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + spdk_strerror(-ret)); + goto out; + } + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + goto out; + } + + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "cpl", ctx->resp.cpl_text); + + if (ctx->resp.data_text) { + spdk_json_write_named_string(w, "data", ctx->resp.data_text); + } + + if (ctx->resp.md_text) { + spdk_json_write_named_string(w, "metadata", ctx->resp.md_text); + } + + spdk_json_write_object_end(w); + spdk_jsonrpc_end_result(request, w); + +out: + free_rpc_send_nvme_cmd_ctx(ctx); + return; +} + +static void +nvme_rpc_bdev_nvme_cb(void *ref, const struct spdk_nvme_cpl *cpl) +{ + struct rpc_send_nvme_cmd_ctx *ctx = (struct rpc_send_nvme_cmd_ctx *)ref; + + if (ctx->ctrlr_io_ch) { + spdk_put_io_channel(ctx->ctrlr_io_ch); + ctx->ctrlr_io_ch = NULL; + } + + spdk_rpc_send_nvme_cmd_complete(ctx, cpl); +} + +static int +nvme_rpc_admin_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd, + void *buf, uint32_t nbytes, uint32_t timeout_ms) +{ + struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr; + int ret; + + ret = spdk_nvme_ctrlr_cmd_admin_raw(_nvme_ctrlr->ctrlr, cmd, buf, + nbytes, nvme_rpc_bdev_nvme_cb, ctx); + + return ret; +} + +static int +nvme_rpc_io_cmd_bdev_nvme(struct rpc_send_nvme_cmd_ctx *ctx, struct spdk_nvme_cmd *cmd, + void *buf, uint32_t nbytes, void *md_buf, uint32_t md_len, + uint32_t timeout_ms) +{ + struct nvme_ctrlr *_nvme_ctrlr = ctx->nvme_ctrlr; + struct spdk_nvme_qpair *io_qpair; + int ret; + + ctx->ctrlr_io_ch = spdk_get_io_channel(_nvme_ctrlr->ctrlr); + io_qpair = spdk_bdev_nvme_get_io_qpair(ctx->ctrlr_io_ch); + + ret = spdk_nvme_ctrlr_cmd_io_raw_with_md(_nvme_ctrlr->ctrlr, io_qpair, + cmd, buf, nbytes, md_buf, nvme_rpc_bdev_nvme_cb, ctx); + if (ret) { + spdk_put_io_channel(ctx->ctrlr_io_ch); + } + + return ret; + +} + +static int +rpc_send_nvme_cmd_exec(struct rpc_send_nvme_cmd_ctx *ctx) +{ + struct rpc_send_nvme_cmd_req *req = &ctx->req; + int ret = -EINVAL; + + switch (req->cmd_type) { + case NVME_ADMIN_CMD: + ret = nvme_rpc_admin_cmd_bdev_nvme(ctx, req->cmdbuf, req->data, + req->data_len, req->timeout_ms); + break; + case NVME_IO_CMD: + ret = nvme_rpc_io_cmd_bdev_nvme(ctx, req->cmdbuf, req->data, + req->data_len, req->md, req->md_len, req->timeout_ms); + break; + } + + return ret; +} + +static int +rpc_decode_cmd_type(const struct spdk_json_val *val, void *out) +{ + int *cmd_type = out; + + if (spdk_json_strequal(val, "admin") == true) { + *cmd_type = NVME_ADMIN_CMD; + } else if (spdk_json_strequal(val, "io") == true) { + *cmd_type = NVME_IO_CMD; + } else { + SPDK_NOTICELOG("Invalid parameter value: cmd_type\n"); + return -EINVAL; + } + + return 0; +} + +static int +rpc_decode_data_direction(const struct spdk_json_val *val, void *out) +{ + int *data_direction = out; + + if (spdk_json_strequal(val, "h2c") == true) { + *data_direction = SPDK_NVME_DATA_HOST_TO_CONTROLLER; + } else if (spdk_json_strequal(val, "c2h") == true) { + *data_direction = SPDK_NVME_DATA_CONTROLLER_TO_HOST; + } else { + SPDK_NOTICELOG("Invalid parameter value: data_direction\n"); + return -EINVAL; + } + + return 0; +} + +static int +rpc_decode_cmdbuf(const struct spdk_json_val *val, void *out) +{ + char *text = NULL; + size_t text_strlen, raw_len; + struct spdk_nvme_cmd *cmdbuf, **_cmdbuf = out; + int rc; + + rc = spdk_json_decode_string(val, &text); + if (rc) { + return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL; + } + + text_strlen = strlen(text); + raw_len = spdk_base64_get_decoded_len(text_strlen); + cmdbuf = malloc(raw_len); + if (!cmdbuf) { + rc = -ENOMEM; + goto out; + } + + rc = spdk_base64_urlsafe_decode(cmdbuf, &raw_len, text); + if (rc) { + goto out; + } + if (raw_len != sizeof(*cmdbuf)) { + rc = -EINVAL; + goto out; + } + + *_cmdbuf = cmdbuf; + +out: + free(text); + return rc; +} + +static int +rpc_decode_data(const struct spdk_json_val *val, void *out) +{ + struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out; + char *text = NULL; + size_t text_strlen; + int rc; + + rc = spdk_json_decode_string(val, &text); + if (rc) { + return val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL; + } + text_strlen = strlen(text); + + if (req->data_len) { + /* data_len is decoded by param "data_len" */ + if (req->data_len != spdk_base64_get_decoded_len(text_strlen)) { + rc = -EINVAL; + goto out; + } + } else { + req->data_len = spdk_base64_get_decoded_len(text_strlen); + req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL); + if (!req->data) { + rc = -ENOMEM; + goto out; + } + } + + rc = spdk_base64_urlsafe_decode(req->data, (size_t *)&req->data_len, text); + +out: + free(text); + return rc; +} + +static int +rpc_decode_data_len(const struct spdk_json_val *val, void *out) +{ + struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out; + uint32_t data_len; + int rc; + + rc = spdk_json_decode_uint32(val, &data_len); + if (rc) { + return rc; + } + + if (req->data_len) { + /* data_len is decoded by param "data" */ + if (req->data_len != data_len) { + rc = -EINVAL; + } + } else { + req->data_len = data_len; + req->data = spdk_dma_malloc(req->data_len > 0x1000 ? req->data_len : 0x1000, 0x1000, NULL); + if (!req->data) { + rc = -ENOMEM; + } + } + + return rc; +} + +static int +rpc_decode_metadata(const struct spdk_json_val *val, void *out) +{ + struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out; + char *text = NULL; + size_t text_strlen; + int rc; + + rc = spdk_json_decode_string(val, &text); + if (rc) { + return rc = val->type == SPDK_JSON_VAL_STRING ? -ENOMEM : -EINVAL; + } + text_strlen = strlen(text); + + if (req->md_len) { + /* md_len is decoded by param "metadata_len" */ + if (req->md_len != spdk_base64_get_decoded_len(text_strlen)) { + rc = -EINVAL; + goto out; + } + } else { + req->md_len = spdk_base64_get_decoded_len(text_strlen); + req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL); + if (!req->md) { + rc = -ENOMEM; + goto out; + } + } + + rc = spdk_base64_urlsafe_decode(req->md, (size_t *)&req->md_len, text); + +out: + free(text); + return rc; +} + +static int +rpc_decode_metadata_len(const struct spdk_json_val *val, void *out) +{ + struct rpc_send_nvme_cmd_req *req = (struct rpc_send_nvme_cmd_req *)out; + uint32_t md_len; + int rc; + + rc = spdk_json_decode_uint32(val, &md_len); + if (rc) { + return rc; + } + + if (req->md_len) { + /* md_len is decoded by param "metadata" */ + if (req->md_len != md_len) { + rc = -EINVAL; + } + } else { + req->md_len = md_len; + req->md = spdk_dma_malloc(req->md_len, 0x1000, NULL); + if (!req->md) { + rc = -ENOMEM; + } + } + + return rc; +} + +static const struct spdk_json_object_decoder rpc_send_nvme_cmd_req_decoders[] = { + {"name", offsetof(struct rpc_send_nvme_cmd_req, name), spdk_json_decode_string}, + {"cmd_type", offsetof(struct rpc_send_nvme_cmd_req, cmd_type), rpc_decode_cmd_type}, + {"data_direction", offsetof(struct rpc_send_nvme_cmd_req, data_direction), rpc_decode_data_direction}, + {"cmdbuf", offsetof(struct rpc_send_nvme_cmd_req, cmdbuf), rpc_decode_cmdbuf}, + {"timeout_ms", offsetof(struct rpc_send_nvme_cmd_req, timeout_ms), spdk_json_decode_uint32, true}, + {"data_len", 0, rpc_decode_data_len, true}, + {"metadata_len", 0, rpc_decode_metadata_len, true}, + {"data", 0, rpc_decode_data, true}, + {"metadata", 0, rpc_decode_metadata, true}, +}; + +static void +spdk_rpc_send_nvme_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_send_nvme_cmd_ctx *ctx; + int ret, error_code; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + SPDK_ERRLOG("Failed at Malloc ctx\n"); + error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR; + ret = -ENOMEM; + goto invalid; + } + + if (spdk_json_decode_object(params, rpc_send_nvme_cmd_req_decoders, + SPDK_COUNTOF(rpc_send_nvme_cmd_req_decoders), + &ctx->req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS; + ret = -EINVAL; + goto invalid; + } + + ctx->nvme_ctrlr = spdk_bdev_nvme_lookup_ctrlr(ctx->req.name); + if (ctx->nvme_ctrlr == NULL) { + SPDK_ERRLOG("Failed at device lookup\n"); + error_code = SPDK_JSONRPC_ERROR_INVALID_PARAMS; + ret = -EINVAL; + goto invalid; + } + + ctx->jsonrpc_request = request; + + ret = rpc_send_nvme_cmd_exec(ctx); + if (ret < 0) { + SPDK_NOTICELOG("Failed at rpc_send_nvme_cmd_exec\n"); + error_code = SPDK_JSONRPC_ERROR_INTERNAL_ERROR; + goto invalid; + } + + return; + +invalid: + spdk_jsonrpc_send_error_response(request, error_code, spdk_strerror(-ret)); + free_rpc_send_nvme_cmd_ctx(ctx); + return; +} +SPDK_RPC_REGISTER("send_nvme_cmd", spdk_rpc_send_nvme_cmd, SPDK_RPC_RUNTIME) |