diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/examples | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
65 files changed, 14215 insertions, 0 deletions
diff --git a/src/spdk/examples/Makefile b/src/spdk/examples/Makefile new file mode 100644 index 00000000..a5fe76ca --- /dev/null +++ b/src/spdk/examples/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += bdev blob ioat nvme sock + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/bdev/Makefile b/src/spdk/examples/bdev/Makefile new file mode 100644 index 00000000..dc1f5221 --- /dev/null +++ b/src/spdk/examples/bdev/Makefile @@ -0,0 +1,48 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-$(CONFIG_FIO_PLUGIN) = fio_plugin +DIRS-y += hello_world + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) + @: + +clean: $(DIRS-y) + @: + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/bdev/fio_plugin/.gitignore b/src/spdk/examples/bdev/fio_plugin/.gitignore new file mode 100644 index 00000000..1b0b36ac --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/src/spdk/examples/bdev/fio_plugin/Makefile b/src/spdk/examples/bdev/fio_plugin/Makefile new file mode 100644 index 00000000..d571c203 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/Makefile @@ -0,0 +1,60 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP := fio_plugin + +C_SRCS = fio_plugin.c +CFLAGS += -I$(CONFIG_FIO_SOURCE_DIR) +LDFLAGS += -shared -rdynamic + +SPDK_LIB_LIST += thread util bdev conf copy rpc jsonrpc json log trace + +LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all: $(APP) + @: + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) $(BLOCKDEV_MODULES_FILES) + $(LINK_C) + +clean: + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/bdev/fio_plugin/README.md b/src/spdk/examples/bdev/fio_plugin/README.md new file mode 100644 index 00000000..65d6ef6d --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/README.md @@ -0,0 +1,69 @@ +# Compiling fio + +Clone the fio source repository from https://github.com/axboe/fio + + git clone https://github.com/axboe/fio + +Then check out the fio 3.3: + + cd fio && git checkout fio-3.3 + +Finally, compile the code: + + make + +# Compiling SPDK + +Clone the SPDK source repository from https://github.com/spdk/spdk + + git clone https://github.com/spdk/spdk + git submodule update --init + +Then, run the SPDK configure script to enable fio (point it to the root of the fio repository): + + cd spdk + ./configure --with-fio=/path/to/fio/repo <other configuration options> + +Finally, build SPDK: + + make + +**Note to advanced users**: These steps assume you're using the DPDK submodule. If you are using your +own version of DPDK, the fio plugin requires that DPDK be compiled with -fPIC. You can compile DPDK +with -fPIC by modifying your DPDK configuration file and adding the line: + + EXTRA_CFLAGS=-fPIC + +# Usage + +To use the SPDK fio plugin with fio, specify the plugin binary using LD_PRELOAD when running +fio and set ioengine=spdk_bdev in the fio configuration file (see example_config.fio in the same +directory as this README). + + LD_PRELOAD=<path to spdk repo>/examples/bdev/fio_plugin/fio_plugin fio + +The fio configuration file must contain one new parameter: + + spdk_conf=./examples/bdev/fio_plugin/bdev.conf + +This must point at an SPDK configuration file. There are a number of example configuration +files in the SPDK repository under etc/spdk. + +You can specify which block device to run against by setting the filename parameter +to the block device name: + + filename=Malloc0 + +Or for NVMe devices: + + filename=Nvme0n1 + +Currently the SPDK fio plugin is limited to the thread usage model, so fio jobs must also specify thread=1 +when using the SPDK fio plugin. + +fio also currently has a race condition on shutdown if dynamically loading the ioengine by specifying the +engine's full path via the ioengine parameter - LD_PRELOAD is recommended to avoid this race condition. + +When testing random workloads, it is recommended to set norandommap=1. fio's random map +processing consumes extra CPU cycles which will degrade performance over time with +the fio_plugin since all I/O are submitted and completed on a single CPU core. diff --git a/src/spdk/examples/bdev/fio_plugin/bdev.conf.in b/src/spdk/examples/bdev/fio_plugin/bdev.conf.in new file mode 100644 index 00000000..948cebe3 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/bdev.conf.in @@ -0,0 +1,3 @@ +[Malloc] + NumberOfLuns 1 + LunSizeInMB 128 diff --git a/src/spdk/examples/bdev/fio_plugin/example_config.fio b/src/spdk/examples/bdev/fio_plugin/example_config.fio new file mode 100644 index 00000000..3a35432e --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/example_config.fio @@ -0,0 +1,16 @@ +[global] +ioengine=spdk_bdev +spdk_conf=./examples/bdev/fio_plugin/bdev.conf.in +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=4k + +[test] +numjobs=1 diff --git a/src/spdk/examples/bdev/fio_plugin/fio_plugin.c b/src/spdk/examples/bdev/fio_plugin/fio_plugin.c new file mode 100644 index 00000000..4406abd4 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/fio_plugin.c @@ -0,0 +1,779 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/copy_engine.h" +#include "spdk/conf.h" +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/queue.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +struct spdk_fio_options { + void *pad; + char *conf; + unsigned mem_mb; + bool mem_single_seg; +}; + +/* Used to pass messages between fio threads */ +struct spdk_fio_msg { + spdk_thread_fn cb_fn; + void *cb_arg; +}; + +/* A polling function */ +struct spdk_fio_poller { + spdk_poller_fn cb_fn; + void *cb_arg; + uint64_t period_microseconds; + + TAILQ_ENTRY(spdk_fio_poller) link; +}; + +struct spdk_fio_request { + struct io_u *io; + struct thread_data *td; +}; + +struct spdk_fio_target { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + + TAILQ_ENTRY(spdk_fio_target) link; +}; + +struct spdk_fio_thread { + struct thread_data *td; /* fio thread context */ + struct spdk_thread *thread; /* spdk thread context */ + struct spdk_ring *ring; /* ring for passing messages to this thread */ + TAILQ_HEAD(, spdk_fio_poller) pollers; /* List of registered pollers on this thread */ + + TAILQ_HEAD(, spdk_fio_target) targets; + + struct io_u **iocq; // io completion queue + unsigned int iocq_count; // number of iocq entries filled by last getevents + unsigned int iocq_size; // number of iocq entries allocated +}; + +static struct spdk_fio_thread *g_init_thread = NULL; +static pthread_t g_init_thread_id = 0; +static bool g_spdk_env_initialized = false; + +static int spdk_fio_init(struct thread_data *td); +static void spdk_fio_cleanup(struct thread_data *td); +static size_t spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread); + +static void +spdk_fio_send_msg(spdk_thread_fn fn, void *ctx, void *thread_ctx) +{ + struct spdk_fio_thread *thread = thread_ctx; + struct spdk_fio_msg *msg; + size_t count; + + msg = calloc(1, sizeof(*msg)); + assert(msg != NULL); + + msg->cb_fn = fn; + msg->cb_arg = ctx; + + count = spdk_ring_enqueue(thread->ring, (void **)&msg, 1); + if (count != 1) { + SPDK_ERRLOG("Unable to send message to thread %p. rc: %lu\n", thread, count); + } +} + +static void +spdk_fio_bdev_init_done(void *cb_arg, int rc) +{ + *(bool *)cb_arg = true; +} + +static struct spdk_poller * +spdk_fio_start_poller(void *thread_ctx, + spdk_poller_fn fn, + void *arg, + uint64_t period_microseconds) +{ + struct spdk_fio_thread *fio_thread = thread_ctx; + struct spdk_fio_poller *fio_poller; + + fio_poller = calloc(1, sizeof(*fio_poller)); + if (!fio_poller) { + SPDK_ERRLOG("Unable to allocate poller\n"); + return NULL; + } + + fio_poller->cb_fn = fn; + fio_poller->cb_arg = arg; + fio_poller->period_microseconds = period_microseconds; + + TAILQ_INSERT_TAIL(&fio_thread->pollers, fio_poller, link); + + return (struct spdk_poller *)fio_poller; +} + +static void +spdk_fio_stop_poller(struct spdk_poller *poller, void *thread_ctx) +{ + struct spdk_fio_poller *fio_poller; + struct spdk_fio_thread *fio_thread = thread_ctx; + + fio_poller = (struct spdk_fio_poller *)poller; + + TAILQ_REMOVE(&fio_thread->pollers, fio_poller, link); + + free(fio_poller); +} + +static int +spdk_fio_init_thread(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + + fio_thread = calloc(1, sizeof(*fio_thread)); + if (!fio_thread) { + SPDK_ERRLOG("failed to allocate thread local context\n"); + return -1; + } + + fio_thread->td = td; + td->io_ops_data = fio_thread; + + fio_thread->ring = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 4096, SPDK_ENV_SOCKET_ID_ANY); + if (!fio_thread->ring) { + SPDK_ERRLOG("failed to allocate ring\n"); + free(fio_thread); + return -1; + } + + fio_thread->thread = spdk_allocate_thread(spdk_fio_send_msg, + spdk_fio_start_poller, + spdk_fio_stop_poller, + fio_thread, + "fio_thread"); + if (!fio_thread->thread) { + spdk_ring_free(fio_thread->ring); + free(fio_thread); + SPDK_ERRLOG("failed to allocate thread\n"); + return -1; + } + + TAILQ_INIT(&fio_thread->pollers); + + fio_thread->iocq_size = td->o.iodepth; + fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + TAILQ_INIT(&fio_thread->targets); + + return 0; +} + +static void * +spdk_init_thread_poll(void *arg) +{ + struct spdk_fio_thread *thread = arg; + int oldstate; + int rc; + + /* Loop until the thread is cancelled */ + while (true) { + rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + spdk_fio_poll_thread(thread); + + rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +static int +spdk_fio_init_env(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + struct spdk_fio_options *eo; + bool done = false; + int rc; + struct spdk_conf *config; + struct spdk_env_opts opts; + size_t count; + + /* Parse the SPDK configuration file */ + eo = td->eo; + if (!eo->conf || !strlen(eo->conf)) { + SPDK_ERRLOG("No configuration file provided\n"); + return -1; + } + + config = spdk_conf_allocate(); + if (!config) { + SPDK_ERRLOG("Unable to allocate configuration file\n"); + return -1; + } + + rc = spdk_conf_read(config, eo->conf); + if (rc != 0) { + SPDK_ERRLOG("Invalid configuration file format\n"); + spdk_conf_free(config); + return -1; + } + if (spdk_conf_first_section(config) == NULL) { + SPDK_ERRLOG("Invalid configuration file format\n"); + spdk_conf_free(config); + return -1; + } + spdk_conf_set_as_default(config); + + /* Initialize the environment library */ + spdk_env_opts_init(&opts); + opts.name = "fio"; + + if (eo->mem_mb) { + opts.mem_size = eo->mem_mb; + } + opts.hugepage_single_segments = eo->mem_single_seg; + + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + spdk_conf_free(config); + return -1; + } + spdk_unaffinitize_thread(); + + /* Create an SPDK thread temporarily */ + rc = spdk_fio_init_thread(td); + if (rc < 0) { + SPDK_ERRLOG("Failed to create initialization thread\n"); + return -1; + } + + g_init_thread = fio_thread = td->io_ops_data; + + /* Initialize the copy engine */ + spdk_copy_engine_initialize(); + + /* Initialize the bdev layer */ + spdk_bdev_initialize(spdk_fio_bdev_init_done, &done); + + /* First, poll until initialization is done. */ + do { + spdk_fio_poll_thread(fio_thread); + } while (!done); + + /* + * Continue polling until there are no more events. + * This handles any final events posted by pollers. + */ + do { + count = spdk_fio_poll_thread(fio_thread); + } while (count > 0); + + /* + * Spawn a thread to continue polling this thread + * occasionally. + */ + + rc = pthread_create(&g_init_thread_id, NULL, &spdk_init_thread_poll, fio_thread); + if (rc != 0) { + SPDK_ERRLOG("Unable to spawn thread to poll admin queue. It won't be polled.\n"); + } + + return 0; +} + +/* Called for each thread to fill in the 'real_file_size' member for + * each file associated with this thread. This is called prior to + * the init operation (spdk_fio_init()) below. This call will occur + * on the initial start up thread if 'create_serialize' is true, or + * on the thread actually associated with 'thread_data' if 'create_serialize' + * is false. + */ +static int +spdk_fio_setup(struct thread_data *td) +{ + unsigned int i; + struct fio_file *f; + + if (!td->o.use_thread) { + SPDK_ERRLOG("must set thread=1 when using spdk plugin\n"); + return -1; + } + + if (!g_spdk_env_initialized) { + if (spdk_fio_init_env(td)) { + SPDK_ERRLOG("failed to initialize\n"); + return -1; + } + + g_spdk_env_initialized = true; + } + + for_each_file(td, f, i) { + struct spdk_bdev *bdev; + + bdev = spdk_bdev_get_by_name(f->file_name); + if (!bdev) { + SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name); + return -1; + } + + f->real_file_size = spdk_bdev_get_num_blocks(bdev) * + spdk_bdev_get_block_size(bdev); + + } + + return 0; +} + +/* Called for each thread, on that thread, shortly after the thread + * starts. + */ +static int +spdk_fio_init(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + unsigned int i; + struct fio_file *f; + int rc; + + spdk_fio_init_thread(td); + + fio_thread = td->io_ops_data; + + for_each_file(td, f, i) { + struct spdk_fio_target *target; + + target = calloc(1, sizeof(*target)); + if (!target) { + SPDK_ERRLOG("Unable to allocate memory for I/O target.\n"); + return -1; + } + + target->bdev = spdk_bdev_get_by_name(f->file_name); + if (!target->bdev) { + SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name); + free(target); + return -1; + } + + rc = spdk_bdev_open(target->bdev, true, NULL, NULL, &target->desc); + if (rc) { + SPDK_ERRLOG("Unable to open bdev %s\n", f->file_name); + free(target); + return -1; + } + + target->ch = spdk_bdev_get_io_channel(target->desc); + if (!target->ch) { + SPDK_ERRLOG("Unable to get I/O channel for bdev.\n"); + spdk_bdev_close(target->desc); + free(target); + return -1; + } + + f->engine_data = target; + + TAILQ_INSERT_TAIL(&fio_thread->targets, target, link); + } + + return 0; +} + +static void +spdk_fio_cleanup_thread(struct spdk_fio_thread *fio_thread) +{ + struct spdk_fio_target *target, *tmp; + + TAILQ_FOREACH_SAFE(target, &fio_thread->targets, link, tmp) { + TAILQ_REMOVE(&fio_thread->targets, target, link); + spdk_put_io_channel(target->ch); + spdk_bdev_close(target->desc); + free(target); + } + + while (spdk_fio_poll_thread(fio_thread) > 0) {} + + spdk_free_thread(); + spdk_ring_free(fio_thread->ring); + free(fio_thread->iocq); + free(fio_thread); +} + +static void +spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + spdk_fio_cleanup_thread(fio_thread); + td->io_ops_data = NULL; +} + +static int +spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + + return 0; +} + +static int +spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int +spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = spdk_dma_zmalloc(total_mem, 0x1000, NULL); + return td->orig_buffer == NULL; +} + +static void +spdk_fio_iomem_free(struct thread_data *td) +{ + spdk_dma_free(td->orig_buffer); +} + +static int +spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + fio_req->io = io_u; + fio_req->td = td; + + io_u->engine_data = fio_req; + + return 0; +} + +static void +spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static void +spdk_fio_completion_cb(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg) +{ + struct spdk_fio_request *fio_req = cb_arg; + struct thread_data *td = fio_req->td; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(fio_thread->iocq_count < fio_thread->iocq_size); + fio_req->io->error = success ? 0 : EIO; + fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; + + spdk_bdev_free_io(bdev_io); +} + +#if FIO_IOOPS_VERSION >= 24 +typedef enum fio_q_status fio_q_status_t; +#else +typedef int fio_q_status_t; +#endif + +static fio_q_status_t +spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_target *target = io_u->file->engine_data; + + assert(fio_req->td == td); + + if (!target) { + SPDK_ERRLOG("Unable to look up correct I/O target.\n"); + fio_req->io->error = ENODEV; + return FIO_Q_COMPLETED; + } + + switch (io_u->ddir) { + case DDIR_READ: + rc = spdk_bdev_read(target->desc, target->ch, + io_u->buf, io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + case DDIR_WRITE: + rc = spdk_bdev_write(target->desc, target->ch, + io_u->buf, io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + case DDIR_TRIM: + rc = spdk_bdev_unmap(target->desc, target->ch, + io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + default: + assert(false); + break; + } + + if (rc == -ENOMEM) { + return FIO_Q_BUSY; + } + + if (rc != 0) { + fio_req->io->error = abs(rc); + return FIO_Q_COMPLETED; + } + + return FIO_Q_QUEUED; +} + +static struct io_u * +spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(event >= 0); + assert((unsigned)event < fio_thread->iocq_count); + return fio_thread->iocq[event]; +} + +static size_t +spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread) +{ + struct spdk_fio_msg *msg; + struct spdk_fio_poller *p, *tmp; + size_t count; + + /* Process new events */ + count = spdk_ring_dequeue(fio_thread->ring, (void **)&msg, 1); + if (count > 0) { + msg->cb_fn(msg->cb_arg); + free(msg); + } + + /* Call all pollers */ + TAILQ_FOREACH_SAFE(p, &fio_thread->pollers, link, tmp) { + p->cb_fn(p->cb_arg); + } + + return count; +} + +static int +spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * 1000000000L + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->iocq_count = 0; + + for (;;) { + spdk_fio_poll_thread(fio_thread); + + if (fio_thread->iocq_count >= min) { + return fio_thread->iocq_count; + } + + if (t) { + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + uint64_t elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + return fio_thread->iocq_count; +} + +static int +spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static struct fio_option options[] = { + { + .name = "spdk_conf", + .lname = "SPDK configuration file", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, conf), + .help = "A SPDK configuration file", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "spdk_mem", + .lname = "SPDK memory in MB", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, mem_mb), + .help = "Amount of memory in MB to allocate for SPDK", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "spdk_single_seg", + .lname = "SPDK switch to create just a single hugetlbfs file", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct spdk_fio_options, mem_single_seg), + .help = "If set to 1, SPDK will use just a single hugetlbfs file", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = NULL, + }, +}; + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk_bdev", + .version = FIO_IOOPS_VERSION, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, + .setup = spdk_fio_setup, + .init = spdk_fio_init, + //.prep = unused, + .queue = spdk_fio_queue, + //.commit = unused, + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + //.errdetails = unused, + //.cancel = unused, + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + //.unlink_file = unused, + //.get_file_size = unused, + //.terminate = unused, + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .option_struct_size = sizeof(struct spdk_fio_options), + .options = options, +}; + +static void fio_init spdk_fio_register(void) +{ + register_ioengine(&ioengine); +} + +static void +spdk_fio_module_finish_done(void *cb_arg) +{ + *(bool *)cb_arg = true; +} + +static void +spdk_fio_finish_env(void) +{ + struct spdk_fio_thread *fio_thread; + bool done = false; + size_t count; + + /* the same thread that called spdk_fio_init_env */ + fio_thread = g_init_thread; + + if (pthread_cancel(g_init_thread_id) == 0) { + pthread_join(g_init_thread_id, NULL); + } + + spdk_bdev_finish(spdk_fio_module_finish_done, &done); + + do { + spdk_fio_poll_thread(fio_thread); + } while (!done); + + do { + count = spdk_fio_poll_thread(fio_thread); + } while (count > 0); + + done = false; + spdk_copy_engine_finish(spdk_fio_module_finish_done, &done); + + do { + spdk_fio_poll_thread(fio_thread); + } while (!done); + + do { + count = spdk_fio_poll_thread(fio_thread); + } while (count > 0); + + spdk_fio_cleanup_thread(fio_thread); +} + +static void fio_exit spdk_fio_unregister(void) +{ + if (g_spdk_env_initialized) { + spdk_fio_finish_env(); + g_spdk_env_initialized = false; + } + unregister_ioengine(&ioengine); +} diff --git a/src/spdk/examples/bdev/fio_plugin/full_bench.fio b/src/spdk/examples/bdev/fio_plugin/full_bench.fio new file mode 100644 index 00000000..f76da18d --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/full_bench.fio @@ -0,0 +1,20 @@ +[global] +ioengine=spdk_bdev +spdk_conf=./examples/bdev/fio_plugin/bdev.conf.in +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +percentile_list=50:99:99.9:99.99:99.999 + +[4k_randread_qd1] +filename=Malloc0 +description="4KiB Random Read QD=1" +bs=4k +rw=randread +iodepth=1 +time_based=1 +ramp_time=0 +runtime=10 diff --git a/src/spdk/examples/bdev/hello_world/.gitignore b/src/spdk/examples/bdev/hello_world/.gitignore new file mode 100644 index 00000000..7bdf9393 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/.gitignore @@ -0,0 +1 @@ +hello_bdev diff --git a/src/spdk/examples/bdev/hello_world/Makefile b/src/spdk/examples/bdev/hello_world/Makefile new file mode 100644 index 00000000..dacc1b81 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/Makefile @@ -0,0 +1,56 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_bdev + +C_SRCS := hello_bdev.c + +SPDK_LIB_LIST = event_bdev event_copy +SPDK_LIB_LIST += bdev copy event thread util conf trace log jsonrpc json rpc + +LIBS += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_MODULES_LINKER_ARGS) $(SOCK_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all : $(APP) + @: + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(BLOCKDEV_MODULES_FILES) $(COPY_MODULES_FILES) $(SOCK_MODULES_FILES) $(LINKER_MODULES) $(ENV_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/bdev/hello_world/bdev.conf b/src/spdk/examples/bdev/hello_world/bdev.conf new file mode 100644 index 00000000..c8504b01 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/bdev.conf @@ -0,0 +1,5 @@ +[Passthru] + PT Malloc1 PT0 +[Malloc] + NumberOfLuns 2 + LunSizeInMB 16 diff --git a/src/spdk/examples/bdev/hello_world/hello_bdev.c b/src/spdk/examples/bdev/hello_world/hello_bdev.c new file mode 100644 index 00000000..cc65b051 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/hello_bdev.c @@ -0,0 +1,294 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/bdev_module.h" + +static char *g_bdev_name = "Malloc0"; + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *bdev_io_channel; + char *buff; + char *bdev_name; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +/* + * Usage function for printing parameters that are specific to this application + */ +static void +hello_bdev_usage(void) +{ + printf(" -b <bdev> name of the bdev to use\n"); +} + +/* + * This function is called to parse the parameters that are specific to this application + */ +static void hello_bdev_parse_arg(int ch, char *arg) +{ + switch (ch) { + case 'b': + g_bdev_name = arg; + break; + } +} + +/* + * Callback function for read io completion. + */ +static void +read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct hello_context_t *hello_context = cb_arg; + + if (success) { + SPDK_NOTICELOG("Read string from bdev : %s\n", hello_context->buff); + } else { + SPDK_ERRLOG("bdev io read error\n"); + } + + /* Complete the bdev io and close the channel */ + spdk_bdev_free_io(bdev_io); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + SPDK_NOTICELOG("Stopping app\n"); + spdk_app_stop(success ? 0 : -1); +} + +static void +hello_read(void *arg) +{ + struct hello_context_t *hello_context = arg; + int rc = 0; + uint32_t length = spdk_bdev_get_block_size(hello_context->bdev); + + SPDK_NOTICELOG("Reading io\n"); + rc = spdk_bdev_read(hello_context->bdev_desc, hello_context->bdev_io_channel, + hello_context->buff, 0, length, read_complete, hello_context); + + if (rc == -ENOMEM) { + SPDK_NOTICELOG("Queueing io\n"); + /* In case we cannot perform I/O now, queue I/O */ + hello_context->bdev_io_wait.bdev = hello_context->bdev; + hello_context->bdev_io_wait.cb_fn = hello_read; + hello_context->bdev_io_wait.cb_arg = hello_context; + spdk_bdev_queue_io_wait(hello_context->bdev, hello_context->bdev_io_channel, + &hello_context->bdev_io_wait); + } else if (rc) { + SPDK_ERRLOG("%s error while reading from bdev: %d\n", spdk_strerror(-rc), rc); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + } +} + +/* + * Callback function for write io completion. + */ +static void +write_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct hello_context_t *hello_context = cb_arg; + uint32_t length; + + /* Complete the I/O */ + spdk_bdev_free_io(bdev_io); + + if (success) { + SPDK_NOTICELOG("bdev io write completed successfully\n"); + } else { + SPDK_ERRLOG("bdev io write error: %d\n", EIO); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + + /* Zero the buffer so that we can use it for reading */ + length = spdk_bdev_get_block_size(hello_context->bdev); + memset(hello_context->buff, 0, length); + + hello_read(hello_context); +} + +static void +hello_write(void *arg) +{ + struct hello_context_t *hello_context = arg; + int rc = 0; + uint32_t length = spdk_bdev_get_block_size(hello_context->bdev); + + SPDK_NOTICELOG("Writing to the bdev\n"); + rc = spdk_bdev_write(hello_context->bdev_desc, hello_context->bdev_io_channel, + hello_context->buff, 0, length, write_complete, hello_context); + + if (rc == -ENOMEM) { + SPDK_NOTICELOG("Queueing io\n"); + /* In case we cannot perform I/O now, queue I/O */ + hello_context->bdev_io_wait.bdev = hello_context->bdev; + hello_context->bdev_io_wait.cb_fn = hello_write; + hello_context->bdev_io_wait.cb_arg = hello_context; + spdk_bdev_queue_io_wait(hello_context->bdev, hello_context->bdev_io_channel, + &hello_context->bdev_io_wait); + } else if (rc) { + SPDK_ERRLOG("%s error while writing to bdev: %d\n", spdk_strerror(-rc), rc); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + } +} + +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1, void *arg2) +{ + struct hello_context_t *hello_context = arg1; + uint32_t blk_size, buf_align; + int rc = 0; + hello_context->bdev = NULL; + hello_context->bdev_desc = NULL; + + SPDK_NOTICELOG("Successfully started the application\n"); + + /* + * Get the bdev. There can be many bdevs configured in + * in the configuration file but this application will only + * use the one input by the user at runtime so we get it via its name. + */ + hello_context->bdev = spdk_bdev_get_by_name(hello_context->bdev_name); + if (hello_context->bdev == NULL) { + SPDK_ERRLOG("Could not find the bdev: %s\n", hello_context->bdev_name); + spdk_app_stop(-1); + return; + } + + /* + * Open the bdev by calling spdk_bdev_open() + * The function will return a descriptor + */ + SPDK_NOTICELOG("Opening the bdev %s\n", hello_context->bdev_name); + rc = spdk_bdev_open(hello_context->bdev, true, NULL, NULL, &hello_context->bdev_desc); + if (rc) { + SPDK_ERRLOG("Could not open bdev: %s\n", hello_context->bdev_name); + spdk_app_stop(-1); + return; + } + + SPDK_NOTICELOG("Opening io channel\n"); + /* Open I/O channel */ + hello_context->bdev_io_channel = spdk_bdev_get_io_channel(hello_context->bdev_desc); + if (hello_context->bdev_io_channel == NULL) { + SPDK_ERRLOG("Could not create bdev I/O channel!!\n"); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + + /* Allocate memory for the write buffer. + * Initialize the write buffer with the string "Hello World!" + */ + blk_size = spdk_bdev_get_block_size(hello_context->bdev); + buf_align = spdk_bdev_get_buf_align(hello_context->bdev); + hello_context->buff = spdk_dma_zmalloc(blk_size, buf_align, NULL); + if (!hello_context->buff) { + SPDK_ERRLOG("Failed to allocate buffer\n"); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + snprintf(hello_context->buff, blk_size, "%s", "Hello World!\n"); + + hello_write(hello_context); +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t hello_context = {}; + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + opts.name = "hello_bdev"; + opts.config_file = "bdev.conf"; + + /* + * The user can provide the config file and bdev name at run time. + * For example, to use Malloc0 in file bdev.conf run with params + * ./hello_bdev -c bdev.conf -b Malloc0 + * To use passthru bdev PT0 run with params + * ./hello_bdev -c bdev.conf -b PT0 + * If none of the parameters are provide the application will use the + * default parameters(-c bdev.conf -b Malloc0). + */ + if ((rc = spdk_app_parse_args(argc, argv, &opts, "b:", NULL, hello_bdev_parse_arg, + hello_bdev_usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { + exit(rc); + } + hello_context.bdev_name = g_bdev_name; + + /* + * spdk_app_start() will block running hello_start() until + * spdk_app_stop() is called by someone (not simply when + * hello_start() returns), or if an error occurs during + * spdk_app_start() before hello_start() runs. + */ + rc = spdk_app_start(&opts, hello_start, &hello_context, NULL); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + } + + /* When the app stops, free up memory that we allocated. */ + spdk_dma_free(hello_context.buff); + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/Makefile b/src/spdk/examples/blob/Makefile new file mode 100644 index 00000000..a297ddb2 --- /dev/null +++ b/src/spdk/examples/blob/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world cli + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/blob/cli/.gitignore b/src/spdk/examples/blob/cli/.gitignore new file mode 100644 index 00000000..6c895d79 --- /dev/null +++ b/src/spdk/examples/blob/cli/.gitignore @@ -0,0 +1 @@ +blobcli diff --git a/src/spdk/examples/blob/cli/Makefile b/src/spdk/examples/blob/cli/Makefile new file mode 100644 index 00000000..7796c40f --- /dev/null +++ b/src/spdk/examples/blob/cli/Makefile @@ -0,0 +1,57 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = blobcli + +C_SRCS := blobcli.c + +SPDK_LIB_LIST = event_bdev event_copy +SPDK_LIB_LIST += blobfs blob bdev blob_bdev copy event thread util conf trace \ + log jsonrpc json rpc + +LIBS += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_NO_LVOL_MODULES_LINKER_ARGS) $(SOCK_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all : $(APP) + @: + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(COPY_MODULES_FILES) $(BLOCKDEV_NO_LVOL_MODULES_FILES) $(SOCK_MODULES_FILES) $(LINKER_MODULES) $(ENV_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/blob/cli/README.md b/src/spdk/examples/blob/cli/README.md new file mode 100644 index 00000000..c457ad4e --- /dev/null +++ b/src/spdk/examples/blob/cli/README.md @@ -0,0 +1,61 @@ +The blobcli tool has several options that are listed by using the -h command +however the three operating modes are covered in more detail here: + +Command Mode +------------ +This is the default and will just execute one command at a time. It's simple +but the downside is that if you are going to interact quite a bit with the +blobstore, the startup time for the application can be cumbersome. + +Shell Mode +---------- +You startup shell mode by using the -S command. At that point you will get +a "blob>" prompt where you can enter any of the commands, including -h, +to execute them. You can stil enter just one at a time but the initial +startup time for the application will not get in the way between commands +anymore so it is much more usable. + +Script (aka test) Mode +---------------------- +In script mode you just supply one command with a filename when you start +the cli, for example `blobcli -T test.bs` will feed the tool the file +called test.bs which contains a series of commands that will all run +automatically and, like shell mode, will only initialize one time so is +quick. + +The script file format (example) is shown below. Comments are allowed and +each line should contain one valid command (and its parameters) only. In +order to operate on blobs via their ID value, use the token $Bn where n +represents the instance of the blob created in the script. + +For example, the line `-s $B0` will operate on the blobid of the first +blob created in the script (0 index based). `$B2` represents the third +blob created in the script. + +If you start test mode with the additional "ignore" option, any invalid +script lines will simply be skipped, otherwise the tool will exit if +it runs into an invalid line (ie './blobcli -T test.bs ignore`). + +Sample test/bs file: +~~~{.sh} +# this is a comment +-i +-s bs +-l bdevs +-n 1 +-s bs +-s $B0 +-n 2 +-s $B1 +-m $B0 Makefile +-d $B0 M.blob +-f $B1 65 +-d $B1 65.blob +-s bs +-x $B0 b0key boval +-x $B1 b1key b1val +-r $B0 b0key +-s $B0 +-s $B1 +-s bs +~~~ diff --git a/src/spdk/examples/blob/cli/blobcli.c b/src/spdk/examples/blob/cli/blobcli.c new file mode 100644 index 00000000..e0b843a8 --- /dev/null +++ b/src/spdk/examples/blob/cli/blobcli.c @@ -0,0 +1,1575 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/blob_bdev.h" +#include "spdk/blob.h" +#include "spdk/log.h" +#include "spdk/version.h" +#include "spdk/string.h" +#include "spdk/uuid.h" + +/* + * The following is not a public header file, but the CLI does expose + * some internals of blobstore for dev/debug puposes so we + * include it here. + */ +#include "../lib/blob/blobstore.h" +static void cli_start(void *arg1, void *arg2); + +static const char *program_name = "blobcli"; +/* default name for .conf file, any name can be used however with -c switch */ +static const char *program_conf = "blobcli.conf"; + +/* + * CMD mode runs one command at a time which can be annoying as the init takes + * a few seconds, so the shell mode, invoked with -S, does the init once and gives + * the user an interactive shell instead. With script mode init is also done just + * once. + */ +enum cli_mode_type { + CLI_MODE_CMD, + CLI_MODE_SHELL, + CLI_MODE_SCRIPT +}; + +enum cli_action_type { + CLI_NONE, + CLI_IMPORT_BLOB, + CLI_DUMP_BLOB, + CLI_FILL, + CLI_REM_XATTR, + CLI_SET_XATTR, + CLI_SET_SUPER, + CLI_SHOW_BS, + CLI_SHOW_BLOB, + CLI_CREATE_BLOB, + CLI_LIST_BDEVS, + CLI_LIST_BLOBS, + CLI_INIT_BS, + CLI_DUMP_BS, + CLI_SHELL_EXIT, + CLI_HELP, +}; + +#define BUFSIZE 255 +#define MAX_ARGS 16 +#define ALIGN_4K 4096 +#define STARTING_IO_UNIT 0 +#define NUM_IO_UNITS 1 + +/* + * The CLI uses the SPDK app framework so is async and callback driven. A + * pointer to this structure is passed to SPDK calls and returned in the + * callbacks for easy access to all the info we may need. + */ +struct cli_context_t { + struct spdk_blob_store *bs; + struct spdk_blob *blob; + struct spdk_bs_dev *bs_dev; + spdk_blob_id blobid; + spdk_blob_id superid; + struct spdk_io_channel *channel; + uint8_t *buff; + uint64_t page_size; + uint64_t io_unit_size; + uint64_t io_unit_count; + uint64_t blob_io_units; + uint64_t bytes_so_far; + FILE *fp; + enum cli_action_type action; + char key[BUFSIZE + 1]; + char value[BUFSIZE + 1]; + char file[BUFSIZE + 1]; + uint64_t filesize; + int fill_value; + char bdev_name[BUFSIZE]; + int rc; + int num_clusters; + enum cli_mode_type cli_mode; + const char *config_file; + int argc; + char *argv[MAX_ARGS]; + bool app_started; + char script_file[BUFSIZE + 1]; +}; + +/* we store a bunch of stuff in a global struct for use by scripting mode */ +#define MAX_SCRIPT_LINES 64 +#define MAX_SCRIPT_BLOBS 16 +struct cli_script_t { + spdk_blob_id blobid[MAX_SCRIPT_BLOBS]; + int blobid_idx; + int max_index; + int cmdline_idx; + bool ignore_errors; + char *cmdline[MAX_SCRIPT_LINES]; +}; +struct cli_script_t g_script; + +/* + * Common printing of commands for CLI and shell modes. + */ +static void +print_cmds(void) +{ + printf("\nCommands include:\n"); + printf("\t-b bdev - name of the block device to use (example: Nvme0n1)\n"); + printf("\t-d <blobid> filename - dump contents of a blob to a file\n"); + printf("\t-D - dump metadata contents of an existing blobstore\n"); + printf("\t-f <blobid> value - fill a blob with a decimal value\n"); + printf("\t-h - this help screen\n"); + printf("\t-i - initialize a blobstore\n"); + printf("\t-l bdevs | blobs - list either available bdevs or existing blobs\n"); + printf("\t-m <blobid> filename - import contents of a file to a blob\n"); + printf("\t-n <# clusters> - create new blob\n"); + printf("\t-p <blobid> - set the superblob to the ID provided\n"); + printf("\t-r <blobid> name - remove xattr name/value pair\n"); + printf("\t-s <blobid> | bs - show blob info or blobstore info\n"); + printf("\t-x <blobid> name value - set xattr name/value pair\n"); + printf("\t-X - exit when in interactive shell mode\n"); + printf("\t-S - enter interactive shell mode\n"); + printf("\t-T <filename> - automated script mode\n"); + printf("\n"); +} + +/* + * Prints usage and relevant error message. + */ +static void +usage(struct cli_context_t *cli_context, char *msg) +{ + if (msg) { + printf("%s", msg); + } + + if (!cli_context || cli_context->cli_mode == CLI_MODE_CMD) { + printf("Version %s\n", SPDK_VERSION_STRING); + printf("Usage: %s [-c SPDK config_file] Command\n", program_name); + printf("\n%s is a command line tool for interacting with blobstore\n", + program_name); + printf("on the underlying device specified in the conf file passed\n"); + printf("in as a command line option.\n"); + } + if (!cli_context || cli_context->cli_mode != CLI_MODE_SCRIPT) { + print_cmds(); + } +} + +/* + * Free up memory that we allocated. + */ +static void +cli_cleanup(struct cli_context_t *cli_context) +{ + if (cli_context->buff) { + spdk_dma_free(cli_context->buff); + } + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + int i; + + for (i = 0; i <= g_script.max_index; i++) { + free(g_script.cmdline[i]); + } + } + free(cli_context); +} + +/* + * Callback routine for the blobstore unload. + */ +static void +unload_complete(void *cb_arg, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + printf("Error %d unloading the bobstore\n", bserrno); + cli_context->rc = bserrno; + } + + /* + * Quit if we're in cmd mode or exiting shell mode, otherwise + * clear the action field and start the main function again. + */ + if (cli_context->cli_mode == CLI_MODE_CMD || + cli_context->action == CLI_SHELL_EXIT) { + spdk_app_stop(cli_context->rc); + } else { + /* when action is CLI_NONE, we know we need to remain in the shell */ + cli_context->bs = NULL; + cli_context->action = CLI_NONE; + cli_start(cli_context, NULL); + } +} + +/* + * Unload the blobstore. + */ +static void +unload_bs(struct cli_context_t *cli_context, char *msg, int bserrno) +{ + if (bserrno) { + printf("%s (err %d)\n", msg, bserrno); + cli_context->rc = bserrno; + } + + if (cli_context->bs) { + if (cli_context->channel) { + spdk_bs_free_io_channel(cli_context->channel); + cli_context->channel = NULL; + } + spdk_bs_unload(cli_context->bs, unload_complete, cli_context); + } else if (cli_context->cli_mode != CLI_MODE_SCRIPT) { + spdk_app_stop(bserrno); + + } +} + +/* + * Callback for closing a blob. + */ +static void +close_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in close callback", + bserrno); + return; + } + unload_bs(cli_context, "", 0); +} + +/* + * Callback function for sync'ing metadata. + */ +static void +sync_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in sync callback", + bserrno); + return; + } + + spdk_blob_close(cli_context->blob, close_cb, cli_context); +} + +static void +resize_cb(void *cb_arg, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + uint64_t total = 0; + + if (bserrno) { + unload_bs(cli_context, "Error in blob resize", + bserrno); + return; + } + + total = spdk_blob_get_num_clusters(cli_context->blob); + printf("blob now has USED clusters of %" PRIu64 "\n", + total); + + /* + * Always a good idea to sync after MD changes or the changes + * may be lost if things aren't closed cleanly. + */ + spdk_blob_sync_md(cli_context->blob, sync_cb, cli_context); +} + +/* + * Callback function for opening a blob after creating. + */ +static void +open_now_resize_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in open completion", + bserrno); + return; + } + cli_context->blob = blob; + + spdk_blob_resize(cli_context->blob, cli_context->num_clusters, + resize_cb, cli_context); +} + +/* + * Callback function for creating a blob. + */ +static void +blob_create_cb(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in blob create callback", + bserrno); + return; + } + + cli_context->blobid = blobid; + printf("New blob id %" PRIu64 "\n", cli_context->blobid); + + /* if we're in script mode, we need info on all blobids for later */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + g_script.blobid[g_script.blobid_idx++] = blobid; + } + + /* We have to open the blob before we can do things like resize. */ + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + open_now_resize_cb, cli_context); +} + +/* + * Callback for get_super where we'll continue on to show blobstore info. + */ +static void +show_bs_cb(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + struct spdk_bs_type bstype; + uint64_t val; + struct spdk_bdev *bdev = NULL; + + if (bserrno && bserrno != -ENOENT) { + unload_bs(cli_context, "Error in get_super callback", + bserrno); + return; + } + cli_context->superid = blobid; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + unload_bs(cli_context, "Error w/bdev in get_super callback", + bserrno); + return; + } + + printf("Blobstore Public Info:\n"); + printf("\tUsing bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + printf("\tAPI Version: %d\n", SPDK_BS_VERSION); + + if (bserrno != -ENOENT) { + printf("\tsuper blob ID: %" PRIu64 "\n", cli_context->superid); + } else { + printf("\tsuper blob ID: none assigned\n"); + } + + printf("\tpage size: %" PRIu64 "\n", cli_context->page_size); + printf("\tio unit size: %" PRIu64 "\n", cli_context->io_unit_size); + + val = spdk_bs_get_cluster_size(cli_context->bs); + printf("\tcluster size: %" PRIu64 "\n", val); + + val = spdk_bs_free_cluster_count(cli_context->bs); + printf("\t# free clusters: %" PRIu64 "\n", val); + + bstype = spdk_bs_get_bstype(cli_context->bs); + spdk_trace_dump(stdout, "\tblobstore type:", &bstype, sizeof(bstype)); + + /* + * Private info isn't accessible via the public API but + * may be useful for debug of blobstore based applications. + */ + printf("\nBlobstore Private Info:\n"); + printf("\tMetadata start (pages): %" PRIu64 "\n", + cli_context->bs->md_start); + printf("\tMetadata length (pages): %d\n", + cli_context->bs->md_len); + + unload_bs(cli_context, "", 0); +} + +/* + * Show detailed info about a particular blob. + */ +static void +show_blob(struct cli_context_t *cli_context) +{ + uint64_t val; + struct spdk_xattr_names *names; + const void *value; + size_t value_len; + char data[BUFSIZE]; + unsigned int i; + + printf("Blob Public Info:\n"); + + printf("blob ID: %" PRIu64 "\n", cli_context->blobid); + + val = spdk_blob_get_num_clusters(cli_context->blob); + printf("# of clusters: %" PRIu64 "\n", val); + + printf("# of bytes: %" PRIu64 "\n", + val * spdk_bs_get_cluster_size(cli_context->bs)); + + val = spdk_blob_get_num_pages(cli_context->blob); + printf("# of pages: %" PRIu64 "\n", val); + + spdk_blob_get_xattr_names(cli_context->blob, &names); + + printf("# of xattrs: %d\n", spdk_xattr_names_get_count(names)); + printf("xattrs:\n"); + for (i = 0; i < spdk_xattr_names_get_count(names); i++) { + spdk_blob_get_xattr_value(cli_context->blob, + spdk_xattr_names_get_name(names, i), + &value, &value_len); + if ((value_len + 1) > sizeof(data)) { + printf("FYI: adjusting size of xattr due to CLI limits.\n"); + value_len = sizeof(data) - 1; + } + memcpy(&data, value, value_len); + data[value_len] = '\0'; + printf("\n(%d) Name:%s\n", i, + spdk_xattr_names_get_name(names, i)); + printf("(%d) Value:\n", i); + spdk_trace_dump(stdout, "", value, value_len); + } + + /* + * Private info isn't accessible via the public API but + * may be useful for debug of blobstore based applications. + */ + printf("\nBlob Private Info:\n"); + switch (cli_context->blob->state) { + case SPDK_BLOB_STATE_DIRTY: + printf("state: DIRTY\n"); + break; + case SPDK_BLOB_STATE_CLEAN: + printf("state: CLEAN\n"); + break; + case SPDK_BLOB_STATE_LOADING: + printf("state: LOADING\n"); + break; + default: + printf("state: UNKNOWN\n"); + break; + } + printf("open ref count: %d\n", + cli_context->blob->open_ref); + + spdk_xattr_names_free(names); +} + +/* + * Callback for getting the first blob, shared with simple blob listing as well. + */ +static void +blob_iter_cb(void *arg1, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + if (bserrno == -ENOENT) { + /* this simply means there are no more blobs */ + unload_bs(cli_context, "", 0); + } else { + unload_bs(cli_context, "Error in blob iter callback", + bserrno); + } + return; + } + + if (cli_context->action == CLI_LIST_BLOBS) { + printf("\nList BLOBS:\n"); + printf("Found blob with ID# %" PRIu64 "\n", + spdk_blob_get_id(blob)); + } else if (spdk_blob_get_id(blob) == cli_context->blobid) { + /* + * Found the blob we're looking for, but we need to finish + * iterating even after showing the info so that internally + * the blobstore logic will close the blob. Or we could + * chose to close it now, either way. + */ + cli_context->blob = blob; + show_blob(cli_context); + } + + spdk_bs_iter_next(cli_context->bs, blob, blob_iter_cb, cli_context); +} + +/* + * Callback for setting the super blob ID. + */ +static void +set_super_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in set_super callback", + bserrno); + return; + } + + printf("Super Blob ID has been set.\n"); + unload_bs(cli_context, "", 0); +} + +/* + * Callback for set_xattr_open where we set or delete xattrs. + */ +static void +set_xattr_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in blob open callback", + bserrno); + return; + } + cli_context->blob = blob; + + if (cli_context->action == CLI_SET_XATTR) { + spdk_blob_set_xattr(cli_context->blob, cli_context->key, + cli_context->value, strlen(cli_context->value) + 1); + printf("Xattr has been set.\n"); + } else { + spdk_blob_remove_xattr(cli_context->blob, cli_context->key); + printf("Xattr has been removed.\n"); + } + + spdk_blob_sync_md(cli_context->blob, sync_cb, cli_context); +} + +/* + * Callback function for reading a blob for dumping to a file. + */ +static void +read_dump_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + uint64_t bytes_written; + + if (bserrno) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error in read completion", + bserrno); + return; + } + + bytes_written = fwrite(cli_context->buff, NUM_IO_UNITS, cli_context->io_unit_size, + cli_context->fp); + if (bytes_written != cli_context->io_unit_size) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error with fwrite", + bserrno); + return; + } + + printf("."); + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + /* perform another read */ + spdk_blob_io_read(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, read_dump_cb, cli_context); + } else { + /* done reading */ + printf("\nFile write complete (to %s).\n", cli_context->file); + fclose(cli_context->fp); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback for write completion on the import of a file to a blob. + */ +static void +write_imp_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + uint64_t bytes_read; + + if (bserrno) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error in write completion", + bserrno); + return; + } + + if (cli_context->bytes_so_far < cli_context->filesize) { + /* perform another file read */ + bytes_read = fread(cli_context->buff, 1, + cli_context->io_unit_size, + cli_context->fp); + cli_context->bytes_so_far += bytes_read; + + /* if this read is < 1 io_unit, fill with 0s */ + if (bytes_read < cli_context->io_unit_size) { + uint8_t *offset = cli_context->buff + bytes_read; + memset(offset, 0, cli_context->io_unit_size - bytes_read); + } + } else { + /* + * Done reading the file, fill the rest of the blob with 0s, + * yeah we're memsetting the same io_unit over and over here + */ + memset(cli_context->buff, 0, cli_context->io_unit_size); + } + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + printf("."); + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_imp_cb, cli_context); + } else { + /* done writing */ + printf("\nBlob import complete (from %s).\n", cli_context->file); + fclose(cli_context->fp); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback for open blobs where we'll continue on dump a blob to a file or + * import a file to a blob. For dump, the resulting file will always be the + * full size of the blob. For import, the blob will fill with the file + * contents first and then 0 out the rest of the blob. + */ +static void +dump_imp_open_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in blob open callback", + bserrno); + return; + } + cli_context->blob = blob; + + /* + * We'll transfer just one io_unit at a time to keep the buffer + * small. This could be bigger of course. + */ + cli_context->buff = spdk_dma_malloc(cli_context->io_unit_size, + ALIGN_4K, NULL); + if (cli_context->buff == NULL) { + printf("Error in allocating memory\n"); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + printf("Working"); + cli_context->blob_io_units = spdk_blob_get_num_io_units(cli_context->blob); + cli_context->io_unit_count = 0; + if (cli_context->action == CLI_DUMP_BLOB) { + cli_context->fp = fopen(cli_context->file, "w"); + if (cli_context->fp == NULL) { + printf("Error in opening file\n"); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + + /* read a io_unit of data from the blob */ + spdk_blob_io_read(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, read_dump_cb, cli_context); + } else { + cli_context->fp = fopen(cli_context->file, "r"); + if (cli_context->fp == NULL) { + printf("Error in opening file: errno %d\n", errno); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + + /* get the filesize then rewind read a io_unit of data from file */ + fseek(cli_context->fp, 0L, SEEK_END); + cli_context->filesize = ftell(cli_context->fp); + rewind(cli_context->fp); + cli_context->bytes_so_far = fread(cli_context->buff, NUM_IO_UNITS, + cli_context->io_unit_size, + cli_context->fp); + + /* if the file is < a io_unit, fill the rest with 0s */ + if (cli_context->filesize < cli_context->io_unit_size) { + uint8_t *offset = + cli_context->buff + cli_context->filesize; + + memset(offset, 0, + cli_context->io_unit_size - cli_context->filesize); + } + + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_imp_cb, cli_context); + } +} + +/* + * Callback function for writing a specific pattern to io_unit 0. + */ +static void +write_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in write completion", + bserrno); + return; + } + printf("."); + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_cb, cli_context); + } else { + /* done writing */ + printf("\nBlob fill complete (with 0x%x).\n", cli_context->fill_value); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback function to fill a blob with a value, callback from open. + */ +static void +fill_blob_cb(void *arg1, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in open callback", + bserrno); + return; + } + + cli_context->blob = blob; + cli_context->io_unit_count = 0; + cli_context->blob_io_units = spdk_blob_get_num_io_units(cli_context->blob); + cli_context->buff = spdk_dma_malloc(cli_context->io_unit_size, + ALIGN_4K, NULL); + if (cli_context->buff == NULL) { + unload_bs(cli_context, "Error in allocating memory", + -ENOMEM); + return; + } + + memset(cli_context->buff, cli_context->fill_value, + cli_context->io_unit_size); + printf("Working"); + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, + STARTING_IO_UNIT, NUM_IO_UNITS, write_cb, cli_context); +} + +/* + * Multiple actions require us to open the bs first so here we use + * a common callback to set a bunch of values and then move on to + * the next step saved off via function pointer. + */ +static void +load_bs_cb(void *arg1, struct spdk_blob_store *bs, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in load callback", + bserrno); + return; + } + + cli_context->bs = bs; + cli_context->page_size = spdk_bs_get_page_size(cli_context->bs); + cli_context->io_unit_size = spdk_bs_get_io_unit_size(cli_context->bs); + cli_context->channel = spdk_bs_alloc_io_channel(cli_context->bs); + if (cli_context->channel == NULL) { + unload_bs(cli_context, "Error in allocating channel", + -ENOMEM); + return; + } + + switch (cli_context->action) { + case CLI_SET_SUPER: + spdk_bs_set_super(cli_context->bs, cli_context->superid, + set_super_cb, cli_context); + break; + case CLI_SHOW_BS: + spdk_bs_get_super(cli_context->bs, show_bs_cb, cli_context); + break; + case CLI_CREATE_BLOB: + spdk_bs_create_blob(cli_context->bs, blob_create_cb, cli_context); + break; + case CLI_SET_XATTR: + case CLI_REM_XATTR: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + set_xattr_cb, cli_context); + break; + case CLI_SHOW_BLOB: + case CLI_LIST_BLOBS: + spdk_bs_iter_first(cli_context->bs, blob_iter_cb, cli_context); + + break; + case CLI_DUMP_BLOB: + case CLI_IMPORT_BLOB: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + dump_imp_open_cb, cli_context); + break; + case CLI_FILL: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + fill_blob_cb, cli_context); + break; + + default: + /* should never get here */ + exit(-1); + break; + } +} + +/* + * Load the blobstore. + */ +static void +load_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + struct spdk_bs_dev *bs_dev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + + bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_load(bs_dev, NULL, load_bs_cb, cli_context); +} + +/* + * Lists all the blobs on this blobstore. + */ +static void +list_bdevs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + printf("\nList bdevs:\n"); + + bdev = spdk_bdev_first(); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + } + while (bdev) { + printf("\tbdev Name: %s\n", spdk_bdev_get_name(bdev)); + printf("\tbdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + bdev = spdk_bdev_next(bdev); + } + + printf("\n"); + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + } else { + cli_context->action = CLI_NONE; + cli_start(cli_context, NULL); + } +} + +/* + * Callback function for initializing a blob. + */ +static void +bs_init_cb(void *cb_arg, struct spdk_blob_store *bs, + int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in bs init callback", + bserrno); + return; + } + cli_context->bs = bs; + printf("blobstore init'd: (%p)\n", cli_context->bs); + + unload_bs(cli_context, "", 0); +} + +/* + * Initialize a new blobstore. + */ +static void +init_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + printf("Init blobstore using bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + + cli_context->bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (cli_context->bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_init(cli_context->bs_dev, NULL, bs_init_cb, + cli_context); +} + +static void +spdk_bsdump_done(void *arg, int bserrno) +{ + struct cli_context_t *cli_context = arg; + + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + } else { + cli_context->action = CLI_NONE; + cli_start(cli_context, NULL); + } +} + +static void +bsdump_print_xattr(FILE *fp, const char *bstype, const char *name, const void *value, + size_t value_len) +{ + if (strncmp(bstype, "BLOBFS", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%.*s", (int)value_len, (char *)value); + } else if (strcmp(name, "length") == 0 && value_len == sizeof(uint64_t)) { + uint64_t length; + + memcpy(&length, value, sizeof(length)); + fprintf(fp, "%" PRIu64, length); + } else { + fprintf(fp, "?"); + } + } else if (strncmp(bstype, "LVOLSTORE", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%s", (char *)value); + } else if (strcmp(name, "uuid") == 0 && value_len == sizeof(struct spdk_uuid)) { + char uuid[SPDK_UUID_STRING_LEN]; + + spdk_uuid_fmt_lower(uuid, sizeof(uuid), (struct spdk_uuid *)value); + fprintf(fp, "%s", uuid); + } else { + fprintf(fp, "?"); + } + } else { + fprintf(fp, "?"); + } +} + +/* + * Dump metadata of an existing blobstore in a human-readable format. + */ +static void +dump_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + printf("Init blobstore using bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + + cli_context->bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (cli_context->bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_dump(cli_context->bs_dev, stdout, bsdump_print_xattr, spdk_bsdump_done, cli_context); +} + +/* + * Common cmd/option parser for command and shell modes. + */ +static bool +cmd_parser(int argc, char **argv, struct cli_context_t *cli_context) +{ + int op; + int cmd_chosen = 0; + char resp; + + while ((op = getopt(argc, argv, "b:c:d:f:hil:m:n:p:r:s:DST:Xx:")) != -1) { + switch (op) { + case 'b': + if (strcmp(cli_context->bdev_name, "") == 0) { + snprintf(cli_context->bdev_name, BUFSIZE, "%s", optarg); + } else { + printf("Current setting for -b is: %s\n", cli_context->bdev_name); + usage(cli_context, "ERROR: -b option can only be set once.\n"); + } + break; + case 'c': + if (cli_context->app_started == false) { + cli_context->config_file = optarg; + } else { + usage(cli_context, "ERROR: -c option not valid during shell mode.\n"); + } + break; + case 'D': + cmd_chosen++; + cli_context->action = CLI_DUMP_BS; + break; + case 'd': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_DUMP_BLOB; + cli_context->blobid = atoll(optarg); + snprintf(cli_context->file, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'f': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_FILL; + cli_context->blobid = atoll(optarg); + cli_context->fill_value = atoi(argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'h': + cmd_chosen++; + cli_context->action = CLI_HELP; + break; + case 'i': + if (cli_context->cli_mode != CLI_MODE_SCRIPT) { + printf("Your entire blobstore will be destroyed. Are you sure? (y/n) "); + if (scanf("%c%*c", &resp)) { + if (resp == 'y' || resp == 'Y') { + cmd_chosen++; + cli_context->action = CLI_INIT_BS; + } else { + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + return false; + } + } + } + } else { + cmd_chosen++; + cli_context->action = CLI_INIT_BS; + } + break; + case 'r': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_REM_XATTR; + cli_context->blobid = atoll(optarg); + snprintf(cli_context->key, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'l': + if (strcmp("bdevs", optarg) == 0) { + cmd_chosen++; + cli_context->action = CLI_LIST_BDEVS; + } else if (strcmp("blobs", optarg) == 0) { + cmd_chosen++; + cli_context->action = CLI_LIST_BLOBS; + } else { + usage(cli_context, "ERROR: invalid option for list\n"); + } + break; + case 'm': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_IMPORT_BLOB; + cli_context->blobid = atoll(optarg); + snprintf(cli_context->file, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'n': + cli_context->num_clusters = atoi(optarg); + if (cli_context->num_clusters > 0) { + cmd_chosen++; + cli_context->action = CLI_CREATE_BLOB; + } else { + usage(cli_context, "ERROR: invalid option for new\n"); + } + break; + case 'p': + cmd_chosen++; + cli_context->action = CLI_SET_SUPER; + cli_context->superid = atoll(optarg); + break; + case 'S': + if (cli_context->cli_mode == CLI_MODE_CMD) { + cmd_chosen++; + cli_context->cli_mode = CLI_MODE_SHELL; + } + cli_context->action = CLI_NONE; + break; + case 's': + cmd_chosen++; + if (strcmp("bs", optarg) == 0) { + cli_context->action = CLI_SHOW_BS; + } else { + cli_context->action = CLI_SHOW_BLOB; + cli_context->blobid = atoll(optarg); + } + break; + case 'T': + if (cli_context->cli_mode == CLI_MODE_CMD) { + cmd_chosen++; + cli_context->cli_mode = CLI_MODE_SCRIPT; + if (argv[optind] && (strcmp("ignore", argv[optind]) == 0)) { + g_script.ignore_errors = true; + } else { + g_script.ignore_errors = false; + } + snprintf(cli_context->script_file, BUFSIZE, "%s", optarg); + } else { + cli_context->action = CLI_NONE; + } + break; + case 'X': + cmd_chosen++; + cli_context->action = CLI_SHELL_EXIT; + break; + case 'x': + if (argv[optind] != NULL || argv[optind + 1] != NULL) { + cmd_chosen++; + cli_context->action = CLI_SET_XATTR; + cli_context->blobid = atoll(optarg); + snprintf(cli_context->key, BUFSIZE, "%s", argv[optind]); + snprintf(cli_context->value, BUFSIZE, "%s", argv[optind + 1]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + default: + usage(cli_context, "ERROR: invalid option\n"); + } + /* only one actual command can be done at a time */ + if (cmd_chosen > 1) { + usage(cli_context, "Error: Please choose only one command\n"); + } + } + + if (cli_context->cli_mode == CLI_MODE_CMD && cmd_chosen == 0) { + usage(cli_context, "Error: Please choose a command.\n"); + } + + /* + * We don't check the local boolean because in some modes it will have been set + * on and earlier command. + */ + if (strcmp(cli_context->bdev_name, "") == 0) { + usage(cli_context, "Error: -b option is required.\n"); + cmd_chosen = 0; + } + + /* in shell mode we'll call getopt multiple times so need to reset its index */ + optind = 0; + return (cmd_chosen == 1); +} + +/* + * In script mode, we parsed a script file at startup and saved off a bunch of cmd + * lines that we now parse with each run of cli_start so we us the same cmd parser + * as cmd and shell modes. + */ +static bool +line_parser(struct cli_context_t *cli_context) +{ + bool cmd_chosen; + char *tok = NULL; + int blob_num = 0; + int start_idx = cli_context->argc; + int i; + + printf("\nSCRIPT NOW PROCESSING: %s\n", g_script.cmdline[g_script.cmdline_idx]); + tok = strtok(g_script.cmdline[g_script.cmdline_idx], " "); + while (tok != NULL) { + /* + * We support one replaceable token right now, a $Bn + * represents the blobid that was created in position n + * so fish this out now and use it here. + */ + cli_context->argv[cli_context->argc] = strdup(tok); + if (tok[0] == '$' && tok[1] == 'B') { + tok += 2; + blob_num = atoi(tok); + if (blob_num >= 0 && blob_num < MAX_SCRIPT_BLOBS) { + cli_context->argv[cli_context->argc] = + realloc(cli_context->argv[cli_context->argc], BUFSIZE); + if (cli_context->argv[cli_context->argc] == NULL) { + printf("ERROR: unable to realloc memory\n"); + spdk_app_stop(-1); + } + if (g_script.blobid[blob_num] == 0) { + printf("ERROR: There is no blob for $B%d\n", + blob_num); + } + snprintf(cli_context->argv[cli_context->argc], BUFSIZE, + "%" PRIu64, g_script.blobid[blob_num]); + } else { + printf("ERROR: Invalid token or exceeded max blobs of %d\n", + MAX_SCRIPT_BLOBS); + } + } + cli_context->argc++; + tok = strtok(NULL, " "); + } + + /* call parse cmd line with user input as args */ + cmd_chosen = cmd_parser(cli_context->argc, &cli_context->argv[0], cli_context); + + /* free strdup memory and reset arg count for next shell interaction */ + for (i = start_idx; i < cli_context->argc; i++) { + free(cli_context->argv[i]); + cli_context->argv[i] = NULL; + } + cli_context->argc = 1; + + g_script.cmdline_idx++; + assert(g_script.cmdline_idx < MAX_SCRIPT_LINES); + + if (cmd_chosen == false) { + printf("ERROR: Invalid script line starting with: %s\n\n", + g_script.cmdline[g_script.cmdline_idx - 1]); + if (g_script.ignore_errors == false) { + printf("** Aborting **\n"); + cli_context->action = CLI_SHELL_EXIT; + cmd_chosen = true; + unload_bs(cli_context, "", 0); + } else { + printf("** Skipping **\n"); + } + } + + return cmd_chosen; +} + +/* + * For script mode, we read a series of commands from a text file and store them + * in a global struct. That, along with the cli_mode that tells us we're in + * script mode is what feeds the rest of the app in the same way as is it were + * getting commands from shell mode. + */ +static void +parse_script(struct cli_context_t *cli_context) +{ + FILE *fp = NULL; + size_t bufsize = BUFSIZE; + int64_t bytes_in = 0; + int i = 0; + + /* initialize global script values */ + for (i = 0; i < MAX_SCRIPT_BLOBS; i++) { + g_script.blobid[i] = 0; + } + g_script.blobid_idx = 0; + g_script.cmdline_idx = 0; + i = 0; + + fp = fopen(cli_context->script_file, "r"); + if (fp == NULL) { + printf("ERROR: unable to open script: %s\n", + cli_context->script_file); + cli_cleanup(cli_context); + exit(-1); + } + + do { + bytes_in = getline(&g_script.cmdline[i], &bufsize, fp); + if (bytes_in > 0) { + /* replace newline with null */ + spdk_str_chomp(g_script.cmdline[i]); + + /* ignore comments */ + if (g_script.cmdline[i][0] != '#') { + i++; + } + } + } while (bytes_in != -1 && i < MAX_SCRIPT_LINES - 1); + fclose(fp); + + /* add an exit cmd in case they didn't */ + g_script.cmdline[i] = realloc(g_script.cmdline[i], BUFSIZE); + if (g_script.cmdline[i] == NULL) { + int j; + + for (j = 0; j < i; j++) { + free(g_script.cmdline[j]); + g_script.cmdline[j] = NULL; + } + unload_bs(cli_context, "ERROR: unable to alloc memory.\n", 0); + } + snprintf(g_script.cmdline[i], BUFSIZE, "%s", "-X"); + g_script.max_index = i; +} + +/* + * Provides for a shell interface as opposed to one shot command line. + */ +static bool +cli_shell(void *arg1, void *arg2) +{ + struct cli_context_t *cli_context = arg1; + char *line = NULL; + ssize_t buf_size = 0; + ssize_t bytes_in = 0; + ssize_t tok_len = 0; + char *tok = NULL; + bool cmd_chosen = false; + int start_idx = cli_context->argc; + int i; + + printf("blob> "); + bytes_in = getline(&line, &buf_size, stdin); + + /* If getline() failed (EOF), exit the shell. */ + if (bytes_in < 0) { + free(line); + cli_context->action = CLI_SHELL_EXIT; + return true; + } + + /* parse input and update cli_context so we can use common option parser */ + if (bytes_in > 0) { + tok = strtok(line, " "); + } + while ((tok != NULL) && (cli_context->argc < MAX_ARGS)) { + cli_context->argv[cli_context->argc] = strdup(tok); + tok_len = strlen(tok); + cli_context->argc++; + tok = strtok(NULL, " "); + } + + /* replace newline on last arg with null */ + if (tok_len) { + spdk_str_chomp(cli_context->argv[cli_context->argc - 1]); + } + + /* call parse cmd line with user input as args */ + cmd_chosen = cmd_parser(cli_context->argc, &cli_context->argv[0], cli_context); + + /* free strdup mem & reset arg count for next shell interaction */ + for (i = start_idx; i < cli_context->argc; i++) { + free(cli_context->argv[i]); + cli_context->argv[i] = NULL; + } + cli_context->argc = 1; + + free(line); + + return cmd_chosen; +} + +/* + * This is the function we pass into the SPDK framework that gets + * called first. + */ +static void +cli_start(void *arg1, void *arg2) +{ + struct cli_context_t *cli_context = arg1; + + /* + * If we're in script mode, we already have a list of commands so + * just need to pull them out one at a time and process them. + */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + while (line_parser(cli_context) == false); + } + + /* + * The initial cmd line options are parsed once before this function is + * called so if there is no action, we're in shell mode and will loop + * here until a a valid option is parsed and returned. + */ + if (cli_context->action == CLI_NONE) { + while (cli_shell(cli_context, NULL) == false); + } + + /* Decide what to do next based on cmd line parsing. */ + switch (cli_context->action) { + case CLI_SET_SUPER: + case CLI_SHOW_BS: + case CLI_CREATE_BLOB: + case CLI_SET_XATTR: + case CLI_REM_XATTR: + case CLI_SHOW_BLOB: + case CLI_LIST_BLOBS: + case CLI_DUMP_BLOB: + case CLI_IMPORT_BLOB: + case CLI_FILL: + load_bs(cli_context); + break; + case CLI_INIT_BS: + init_bs(cli_context); + break; + case CLI_DUMP_BS: + dump_bs(cli_context); + break; + case CLI_LIST_BDEVS: + list_bdevs(cli_context); + break; + case CLI_SHELL_EXIT: + /* + * Because shell mode reuses cmd mode functions, the blobstore + * is loaded/unloaded with every action so we just need to + * stop the framework. For this app there's no need to optimize + * and keep the blobstore open while the app is in shell mode. + */ + spdk_app_stop(0); + break; + case CLI_HELP: + usage(cli_context, ""); + unload_complete(cli_context, 0); + break; + default: + /* should never get here */ + exit(-1); + break; + } +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + struct cli_context_t *cli_context = NULL; + bool cmd_chosen; + int rc = 0; + + if (argc < 2) { + usage(cli_context, "ERROR: Invalid option\n"); + exit(-1); + } + + cli_context = calloc(1, sizeof(struct cli_context_t)); + if (cli_context == NULL) { + printf("ERROR: could not allocate context structure\n"); + exit(-1); + } + + /* default to CMD mode until we've parsed the first parms */ + cli_context->cli_mode = CLI_MODE_CMD; + cli_context->argv[0] = strdup(argv[0]); + cli_context->argc = 1; + + /* parse command line */ + cmd_chosen = cmd_parser(argc, argv, cli_context); + free(cli_context->argv[0]); + cli_context->argv[0] = NULL; + if (cmd_chosen == false) { + cli_cleanup(cli_context); + exit(-1); + } + + /* after displaying help, just exit */ + if (cli_context->action == CLI_HELP) { + usage(cli_context, ""); + cli_cleanup(cli_context); + exit(-1); + } + + /* if they don't supply a conf name, use the default */ + if (!cli_context->config_file) { + cli_context->config_file = program_conf; + } + + /* if the config file doesn't exist, tell them how to make one */ + if (access(cli_context->config_file, F_OK) == -1) { + printf("Error: No config file found.\n"); + printf("To create a config file named 'blobcli.conf' for your NVMe device:\n"); + printf(" <path to spdk>/scripts/gen_nvme.sh > blobcli.conf\n"); + printf("and then re-run the cli tool.\n"); + exit(-1); + } + + /* + * For script mode we keep a bunch of stuff in a global since + * none if it is passed back and forth to SPDK. + */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + /* + * Now we'll build up the global which will direct this run of the app + * as it will have a list (g_script) of all of the commands line by + * line as if they were typed in on the shell at cmd line. + */ + parse_script(cli_context); + } + + /* Set default values in opts struct along with name and conf file. */ + spdk_app_opts_init(&opts); + opts.name = "blobcli"; + opts.config_file = cli_context->config_file; + + cli_context->app_started = true; + rc = spdk_app_start(&opts, cli_start, cli_context, NULL); + if (rc) { + printf("ERROR!\n"); + } + + /* Free up memory that we allocated */ + cli_cleanup(cli_context); + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/hello_world/.gitignore b/src/spdk/examples/blob/hello_world/.gitignore new file mode 100644 index 00000000..683a2255 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/.gitignore @@ -0,0 +1 @@ +hello_blob diff --git a/src/spdk/examples/blob/hello_world/Makefile b/src/spdk/examples/blob/hello_world/Makefile new file mode 100644 index 00000000..7c567fcb --- /dev/null +++ b/src/spdk/examples/blob/hello_world/Makefile @@ -0,0 +1,57 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_blob + +C_SRCS := hello_blob.c + +SPDK_LIB_LIST = event_bdev event_copy +SPDK_LIB_LIST += blobfs blob bdev blob_bdev copy event thread util conf trace \ + log jsonrpc json rpc + +LIBS += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_MODULES_LINKER_ARGS) $(SOCK_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all : $(APP) + @: + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(COPY_MODULES_FILES) $(BLOCKDEV_MODULES_FILES) $(SOCK_MODULES_FILES) $(LINKER_MODULES) $(ENV_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/blob/hello_world/hello_blob.c b/src/spdk/examples/blob/hello_world/hello_blob.c new file mode 100644 index 00000000..ffcc2976 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/hello_blob.c @@ -0,0 +1,496 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/blob_bdev.h" +#include "spdk/blob.h" +#include "spdk/log.h" + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + struct spdk_blob_store *bs; + struct spdk_blob *blob; + spdk_blob_id blobid; + struct spdk_io_channel *channel; + uint8_t *read_buff; + uint8_t *write_buff; + uint64_t page_size; + int rc; +}; + +/* + * Free up memory that we allocated. + */ +static void +hello_cleanup(struct hello_context_t *hello_context) +{ + spdk_dma_free(hello_context->read_buff); + spdk_dma_free(hello_context->write_buff); + free(hello_context); +} + +/* + * Callback routine for the blobstore unload. + */ +static void +unload_complete(void *cb_arg, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + SPDK_ERRLOG("Error %d unloading the bobstore\n", bserrno); + hello_context->rc = bserrno; + } + + spdk_app_stop(hello_context->rc); +} + +/* + * Unload the blobstore, cleaning up as needed. + */ +static void +unload_bs(struct hello_context_t *hello_context, char *msg, int bserrno) +{ + if (bserrno) { + SPDK_ERRLOG("%s (err %d)\n", msg, bserrno); + hello_context->rc = bserrno; + } + if (hello_context->bs) { + if (hello_context->channel) { + spdk_bs_free_io_channel(hello_context->channel); + } + spdk_bs_unload(hello_context->bs, unload_complete, hello_context); + } else { + spdk_app_stop(bserrno); + } +} + +/* + * Callback routine for the deletion of a blob. + */ +static void +delete_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in delete completion", + bserrno); + return; + } + + /* We're all done, we can unload the blobstore. */ + unload_bs(hello_context, "", 0); +} + +/* + * Function for deleting a blob. + */ +static void +delete_blob(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in close completion", + bserrno); + return; + } + + spdk_bs_delete_blob(hello_context->bs, hello_context->blobid, + delete_complete, hello_context); +} + +/* + * Callback function for reading a blob. + */ +static void +read_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + int match_res = -1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in read completion", + bserrno); + return; + } + + /* Now let's make sure things match. */ + match_res = memcmp(hello_context->write_buff, hello_context->read_buff, + hello_context->page_size); + if (match_res) { + unload_bs(hello_context, "Error in data compare", -1); + return; + } else { + SPDK_NOTICELOG("read SUCCESS and data matches!\n"); + } + + /* Now let's close it and delete the blob in the callback. */ + spdk_blob_close(hello_context->blob, delete_blob, hello_context); +} + +/* + * Function for reading a blob. + */ +static void +read_blob(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + + hello_context->read_buff = spdk_dma_malloc(hello_context->page_size, + 0x1000, NULL); + if (hello_context->read_buff == NULL) { + unload_bs(hello_context, "Error in memory allocation", + -ENOMEM); + return; + } + + /* Issue the read and compare the results in the callback. */ + spdk_blob_io_read(hello_context->blob, hello_context->channel, + hello_context->read_buff, 0, 1, read_complete, + hello_context); +} + +/* + * Callback function for writing a blob. + */ +static void +write_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in write completion", + bserrno); + return; + } + + /* Now let's read back what we wrote and make sure it matches. */ + read_blob(hello_context); +} + +/* + * Function for writing to a blob. + */ +static void +blob_write(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + + /* + * Buffers for data transfer need to be allocated via SPDK. We will + * tranfer 1 page of 4K aligned data at offset 0 in the blob. + */ + hello_context->write_buff = spdk_dma_malloc(hello_context->page_size, + 0x1000, NULL); + if (hello_context->write_buff == NULL) { + unload_bs(hello_context, "Error in allocating memory", + -ENOMEM); + return; + } + memset(hello_context->write_buff, 0x5a, hello_context->page_size); + + /* Now we have to allocate a channel. */ + hello_context->channel = spdk_bs_alloc_io_channel(hello_context->bs); + if (hello_context->channel == NULL) { + unload_bs(hello_context, "Error in allocating channel", + -ENOMEM); + return; + } + + /* Let's perform the write, 1 page at offset 0. */ + spdk_blob_io_write(hello_context->blob, hello_context->channel, + hello_context->write_buff, + 0, 1, write_complete, hello_context); +} + +/* + * Callback function for sync'ing metadata. + */ +static void +sync_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in sync callback", + bserrno); + return; + } + + /* Blob has been created & sized & MD sync'd, let's write to it. */ + blob_write(hello_context); +} + +static void +resize_complete(void *cb_arg, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + uint64_t total = 0; + + if (bserrno) { + unload_bs(hello_context, "Error in blob resize", bserrno); + return; + } + + total = spdk_blob_get_num_clusters(hello_context->blob); + SPDK_NOTICELOG("resized blob now has USED clusters of %" PRIu64 "\n", + total); + + /* + * Metadata is stored in volatile memory for performance + * reasons and therefore needs to be synchronized with + * non-volatile storage to make it persistent. This can be + * done manually, as shown here, or if not it will be done + * automatically when the blob is closed. It is always a + * good idea to sync after making metadata changes unless + * it has an unacceptable impact on application performance. + */ + spdk_blob_sync_md(hello_context->blob, sync_complete, hello_context); +} + +/* + * Callback function for opening a blob. + */ +static void +open_complete(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + uint64_t free = 0; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in open completion", + bserrno); + return; + } + + + hello_context->blob = blob; + free = spdk_bs_free_cluster_count(hello_context->bs); + SPDK_NOTICELOG("blobstore has FREE clusters of %" PRIu64 "\n", + free); + + /* + * Before we can use our new blob, we have to resize it + * as the initial size is 0. For this example we'll use the + * full size of the blobstore but it would be expected that + * there'd usually be many blobs of various sizes. The resize + * unit is a cluster. + */ + spdk_blob_resize(hello_context->blob, free, resize_complete, hello_context); +} + +/* + * Callback function for creating a blob. + */ +static void +blob_create_complete(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in blob create callback", + bserrno); + return; + } + + hello_context->blobid = blobid; + SPDK_NOTICELOG("new blob id %" PRIu64 "\n", hello_context->blobid); + + /* We have to open the blob before we can do things like resize. */ + spdk_bs_open_blob(hello_context->bs, hello_context->blobid, + open_complete, hello_context); +} + +/* + * Function for creating a blob. + */ +static void +create_blob(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + spdk_bs_create_blob(hello_context->bs, blob_create_complete, hello_context); +} + +/* + * Callback function for initializing the blobstore. + */ +static void +bs_init_complete(void *cb_arg, struct spdk_blob_store *bs, + int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error init'ing the blobstore", + bserrno); + return; + } + + hello_context->bs = bs; + SPDK_NOTICELOG("blobstore: %p\n", hello_context->bs); + /* + * We will use the page size in allocating buffers, etc., later + * so we'll just save it in out context buffer here. + */ + hello_context->page_size = spdk_bs_get_page_size(hello_context->bs); + + /* + * The blostore has been initialized, let's create a blob. + * Note that we could pass a message back to ourselves using + * spdk_thread_send_msg() if we wanted to keep our processing + * time limited. + */ + create_blob(hello_context); +} + +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1, void *arg2) +{ + struct hello_context_t *hello_context = arg1; + struct spdk_bdev *bdev = NULL; + struct spdk_bs_dev *bs_dev = NULL; + + SPDK_NOTICELOG("entry\n"); + /* + * Get the bdev. For this example it is our malloc (RAM) + * disk configured via hello_blob.conf that was passed + * in when we started the SPDK app framework so we can + * get it via its name. + */ + bdev = spdk_bdev_get_by_name("Malloc0"); + if (bdev == NULL) { + SPDK_ERRLOG("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + + /* + * spdk_bs_init() requires us to fill out the structure + * spdk_bs_dev with a set of callbacks. These callbacks + * implement read, write, and other operations on the + * underlying disks. As a convenience, a utility function + * is provided that creates an spdk_bs_dev that implements + * all of the callbacks by forwarding the I/O to the + * SPDK bdev layer. Other helper functions are also + * available in the blob lib in blob_bdev.c that simply + * make it easier to layer blobstore on top of a bdev. + * However blobstore can be more tightly integrated into + * any lower layer, such as NVMe for example. + */ + bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (bs_dev == NULL) { + SPDK_ERRLOG("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_init(bs_dev, NULL, bs_init_complete, hello_context); +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t *hello_context = NULL; + + SPDK_NOTICELOG("entry\n"); + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + + /* + * Setup a few specifics before we init, for most SPDK cmd line + * apps, the config file will be passed in as an arg but to make + * this example super simple we just hardcode it. We also need to + * specify a name for the app. + */ + opts.name = "hello_blob"; + opts.config_file = "hello_blob.conf"; + + + /* + * Now we'll allocate and intialize the blobstore itself. We + * can pass in an spdk_bs_opts if we want something other than + * the defaults (cluster size, etc), but here we'll just take the + * defaults. We'll also pass in a struct that we'll use for + * callbacks so we've got efficient bookeeping of what we're + * creating. This is an async operation and bs_init_complete() + * will be called when it is complete. + */ + hello_context = calloc(1, sizeof(struct hello_context_t)); + if (hello_context != NULL) { + /* + * spdk_app_start() will block running hello_start() until + * spdk_app_stop() is called by someone (not simply when + * hello_start() returns), or if an error occurs during + * spdk_app_start() before hello_start() runs. + */ + rc = spdk_app_start(&opts, hello_start, hello_context, NULL); + if (rc) { + SPDK_NOTICELOG("ERROR!\n"); + } else { + SPDK_NOTICELOG("SUCCCESS!\n"); + } + /* Free up memory that we allocated */ + hello_cleanup(hello_context); + } else { + SPDK_ERRLOG("Could not alloc hello_context struct!!\n"); + rc = -ENOMEM; + } + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/hello_world/hello_blob.conf b/src/spdk/examples/blob/hello_world/hello_blob.conf new file mode 100644 index 00000000..3fa7e9d9 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/hello_blob.conf @@ -0,0 +1,3 @@ +[Malloc] + NumberOfLuns 1 + LunSizeInMB 16 diff --git a/src/spdk/examples/ioat/Makefile b/src/spdk/examples/ioat/Makefile new file mode 100644 index 00000000..d4d62b91 --- /dev/null +++ b/src/spdk/examples/ioat/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += perf verify + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/ioat/perf/.gitignore b/src/spdk/examples/ioat/perf/.gitignore new file mode 100644 index 00000000..bd14107d --- /dev/null +++ b/src/spdk/examples/ioat/perf/.gitignore @@ -0,0 +1 @@ +perf diff --git a/src/spdk/examples/ioat/perf/Makefile b/src/spdk/examples/ioat/perf/Makefile new file mode 100644 index 00000000..518a3507 --- /dev/null +++ b/src/spdk/examples/ioat/perf/Makefile @@ -0,0 +1,55 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +APP = perf + +C_SRCS := perf.c + +SPDK_LIB_LIST = ioat thread util log + +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all: $(APP) + @: + +$(APP): $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) + $(LINK_C) + +clean: + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/ioat/perf/perf.c b/src/spdk/examples/ioat/perf/perf.c new file mode 100644 index 00000000..fdc22209 --- /dev/null +++ b/src/spdk/examples/ioat/perf/perf.c @@ -0,0 +1,575 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/ioat.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" + +struct user_config { + int xfer_size_bytes; + int queue_depth; + int time_in_sec; + bool verify; + char *core_mask; + int ioat_chan_num; +}; + +struct ioat_device { + struct spdk_ioat_chan *ioat; + TAILQ_ENTRY(ioat_device) tailq; +}; + +static TAILQ_HEAD(, ioat_device) g_devices; +static struct ioat_device *g_next_device; + +static struct user_config g_user_config; + +struct ioat_chan_entry { + struct spdk_ioat_chan *chan; + int ioat_chan_id; + uint64_t xfer_completed; + uint64_t xfer_failed; + uint64_t current_queue_depth; + bool is_draining; + struct spdk_mempool *data_pool; + struct spdk_mempool *task_pool; + struct ioat_chan_entry *next; +}; + +struct worker_thread { + struct ioat_chan_entry *ctx; + struct worker_thread *next; + unsigned core; +}; + +struct ioat_task { + struct ioat_chan_entry *ioat_chan_entry; + void *src; + void *dst; +}; + +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; +static int g_ioat_chan_num = 0; + +static void submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task, + void *dst, void *src); + +static void +construct_user_config(struct user_config *self) +{ + self->xfer_size_bytes = 4096; + self->ioat_chan_num = 1; + self->queue_depth = 256; + self->time_in_sec = 10; + self->verify = false; + self->core_mask = "0x1"; +} + +static void +dump_user_config(struct user_config *self) +{ + printf("User configuration:\n"); + printf("Number of channels: %u\n", self->ioat_chan_num); + printf("Transfer size: %u bytes\n", self->xfer_size_bytes); + printf("Queue depth: %u\n", self->queue_depth); + printf("Run time: %u seconds\n", self->time_in_sec); + printf("Core mask: %s\n", self->core_mask); + printf("Verify: %s\n\n", self->verify ? "Yes" : "No"); +} + +static void +ioat_exit(void) +{ + struct ioat_device *dev; + + while (!TAILQ_EMPTY(&g_devices)) { + dev = TAILQ_FIRST(&g_devices); + TAILQ_REMOVE(&g_devices, dev, tailq); + if (dev->ioat) { + spdk_ioat_detach(dev->ioat); + } + spdk_dma_free(dev); + } +} + +static void +ioat_done(void *cb_arg) +{ + struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; + struct ioat_chan_entry *ioat_chan_entry = ioat_task->ioat_chan_entry; + + if (g_user_config.verify && memcmp(ioat_task->src, ioat_task->dst, g_user_config.xfer_size_bytes)) { + ioat_chan_entry->xfer_failed++; + } else { + ioat_chan_entry->xfer_completed++; + } + + ioat_chan_entry->current_queue_depth--; + + if (ioat_chan_entry->is_draining) { + spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->src); + spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->dst); + spdk_mempool_put(ioat_chan_entry->task_pool, ioat_task); + } else { + submit_single_xfer(ioat_chan_entry, ioat_task, ioat_task->dst, ioat_task->src); + } +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->core = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + struct ioat_chan_entry *entry, *entry1; + + /* Free ioat_chan_entry and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + entry = worker->ctx; + while (entry) { + entry1 = entry->next; + spdk_mempool_free(entry->data_pool); + spdk_mempool_free(entry->task_pool); + free(entry); + entry = entry1; + } + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) +{ + printf(" Found matching device at %04x:%02x:%02x.%x " + "vendor:0x%04x device:0x%04x\n", + spdk_pci_device_get_domain(pci_dev), + spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), + spdk_pci_device_get_func(pci_dev), + spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); + + return true; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) +{ + struct ioat_device *dev; + + if (g_ioat_chan_num >= g_user_config.ioat_chan_num) { + return; + } + + dev = spdk_dma_zmalloc(sizeof(*dev), 0, NULL); + if (dev == NULL) { + printf("Failed to allocate device struct\n"); + return; + } + + dev->ioat = ioat; + g_ioat_chan_num++; + TAILQ_INSERT_TAIL(&g_devices, dev, tailq); +} + +static int +ioat_init(void) +{ + TAILQ_INIT(&g_devices); + + if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { + fprintf(stderr, "ioat_probe() failed\n"); + return 1; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-h help message]\n"); + printf("\t[-c core mask for distributing I/O submission/completion work]\n"); + printf("\t[-q queue depth]\n"); + printf("\t[-n number of channels]\n"); + printf("\t[-o transfer size in bytes]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-v verify copy result if this switch is on]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + construct_user_config(&g_user_config); + while ((op = getopt(argc, argv, "c:hn:o:q:t:v")) != -1) { + switch (op) { + case 'o': + g_user_config.xfer_size_bytes = atoi(optarg); + break; + case 'n': + g_user_config.ioat_chan_num = atoi(optarg); + break; + case 'q': + g_user_config.queue_depth = atoi(optarg); + break; + case 't': + g_user_config.time_in_sec = atoi(optarg); + break; + case 'c': + g_user_config.core_mask = optarg; + break; + case 'v': + g_user_config.verify = true; + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return 1; + } + } + if (!g_user_config.xfer_size_bytes || !g_user_config.queue_depth || + !g_user_config.time_in_sec || !g_user_config.core_mask || + !g_user_config.ioat_chan_num) { + usage(argv[0]); + return 1; + } + + return 0; +} + +static void +drain_io(struct ioat_chan_entry *ioat_chan_entry) +{ + while (ioat_chan_entry->current_queue_depth > 0) { + spdk_ioat_process_events(ioat_chan_entry->chan); + } +} + +static void +submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task, void *dst, + void *src) +{ + ioat_task->ioat_chan_entry = ioat_chan_entry; + ioat_task->src = src; + ioat_task->dst = dst; + + spdk_ioat_submit_copy(ioat_chan_entry->chan, ioat_task, ioat_done, dst, src, + g_user_config.xfer_size_bytes); + + ioat_chan_entry->current_queue_depth++; +} + +static void +submit_xfers(struct ioat_chan_entry *ioat_chan_entry, uint64_t queue_depth) +{ + while (queue_depth-- > 0) { + void *src = NULL, *dst = NULL; + struct ioat_task *ioat_task = NULL; + + src = spdk_mempool_get(ioat_chan_entry->data_pool); + dst = spdk_mempool_get(ioat_chan_entry->data_pool); + ioat_task = spdk_mempool_get(ioat_chan_entry->task_pool); + + submit_single_xfer(ioat_chan_entry, ioat_task, dst, src); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ioat_chan_entry *t = NULL; + + printf("Starting thread on core %u\n", worker->core); + + tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); + + t = worker->ctx; + while (t != NULL) { + // begin to submit transfers + submit_xfers(t, g_user_config.queue_depth); + t = t->next; + } + + while (1) { + t = worker->ctx; + while (t != NULL) { + spdk_ioat_process_events(t->chan); + t = t->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + t = worker->ctx; + while (t != NULL) { + // begin to drain io + t->is_draining = true; + drain_io(t); + t = t->next; + } + + return 0; +} + +static int +init(void) +{ + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "perf"; + opts.core_mask = g_user_config.core_mask; + if (spdk_env_init(&opts) < 0) { + return -1; + } + + return 0; +} + +static int +dump_result(void) +{ + uint64_t total_completed = 0; + uint64_t total_failed = 0; + uint64_t total_xfer_per_sec, total_bw_in_MiBps; + struct worker_thread *worker = g_workers; + + printf("Channel_ID Core Transfers Bandwidth Failed\n"); + printf("-----------------------------------------------------------\n"); + while (worker != NULL) { + struct ioat_chan_entry *t = worker->ctx; + while (t) { + uint64_t xfer_per_sec = t->xfer_completed / g_user_config.time_in_sec; + uint64_t bw_in_MiBps = (t->xfer_completed * g_user_config.xfer_size_bytes) / + (g_user_config.time_in_sec * 1024 * 1024); + + total_completed += t->xfer_completed; + total_failed += t->xfer_failed; + + if (xfer_per_sec) { + printf("%10d%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n", + t->ioat_chan_id, worker->core, xfer_per_sec, + bw_in_MiBps, t->xfer_failed); + } + t = t->next; + } + worker = worker->next; + } + + total_xfer_per_sec = total_completed / g_user_config.time_in_sec; + total_bw_in_MiBps = (total_completed * g_user_config.xfer_size_bytes) / + (g_user_config.time_in_sec * 1024 * 1024); + + printf("===========================================================\n"); + printf("Total:%26" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n", + total_xfer_per_sec, total_bw_in_MiBps, total_failed); + + return total_failed ? 1 : 0; +} + +static struct spdk_ioat_chan * +get_next_chan(void) +{ + struct spdk_ioat_chan *chan; + + if (g_next_device == NULL) { + return NULL; + } + + chan = g_next_device->ioat; + + g_next_device = TAILQ_NEXT(g_next_device, tailq); + + return chan; +} + +static int +associate_workers_with_chan(void) +{ + struct spdk_ioat_chan *chan = get_next_chan(); + struct worker_thread *worker = g_workers; + struct ioat_chan_entry *t; + char buf_pool_name[30], task_pool_name[30]; + int i = 0; + + while (chan != NULL) { + t = calloc(1, sizeof(struct ioat_chan_entry)); + if (!t) { + return -1; + } + + t->ioat_chan_id = i; + snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%d", i); + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", i); + t->data_pool = spdk_mempool_create(buf_pool_name, 512, g_user_config.xfer_size_bytes, + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + t->task_pool = spdk_mempool_create(task_pool_name, 512, sizeof(struct ioat_task), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (!t->data_pool || !t->task_pool) { + fprintf(stderr, "Could not allocate buffer pool.\n"); + spdk_mempool_free(t->data_pool); + spdk_mempool_free(t->task_pool); + free(t); + return 1; + } + printf("Associating ioat_channel %d with core %d\n", i, worker->core); + t->chan = chan; + t->next = worker->ctx; + worker->ctx = t; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + chan = get_next_chan(); + i++; + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + + if (parse_args(argc, argv) != 0) { + return 1; + } + + if (init() != 0) { + return 1; + } + + if (register_workers() != 0) { + rc = -1; + goto cleanup; + } + + if (ioat_init() != 0) { + rc = -1; + goto cleanup; + } + + if (g_ioat_chan_num == 0) { + printf("No channels found\n"); + rc = 0; + goto cleanup; + } + + if (g_user_config.ioat_chan_num > g_ioat_chan_num) { + printf("%d channels are requested, but only %d are found," + "so only test %d channels\n", g_user_config.ioat_chan_num, + g_ioat_chan_num, g_ioat_chan_num); + g_user_config.ioat_chan_num = g_ioat_chan_num; + } + + g_next_device = TAILQ_FIRST(&g_devices); + dump_user_config(&g_user_config); + + if (associate_workers_with_chan() != 0) { + rc = -1; + goto cleanup; + } + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->core != master_core) { + spdk_env_thread_launch_pinned(worker->core, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + if (rc < 0) { + goto cleanup; + } + + spdk_env_thread_wait_all(); + + rc = dump_result(); + +cleanup: + unregister_workers(); + ioat_exit(); + + return rc; +} diff --git a/src/spdk/examples/ioat/verify/.gitignore b/src/spdk/examples/ioat/verify/.gitignore new file mode 100644 index 00000000..0b598736 --- /dev/null +++ b/src/spdk/examples/ioat/verify/.gitignore @@ -0,0 +1 @@ +verify diff --git a/src/spdk/examples/ioat/verify/Makefile b/src/spdk/examples/ioat/verify/Makefile new file mode 100644 index 00000000..d7a7b0b2 --- /dev/null +++ b/src/spdk/examples/ioat/verify/Makefile @@ -0,0 +1,55 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +APP = verify + +C_SRCS := verify.c + +SPDK_LIB_LIST = ioat thread util log + +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all: $(APP) + @: + +$(APP): $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) + $(LINK_C) + +clean: + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/ioat/verify/verify.c b/src/spdk/examples/ioat/verify/verify.c new file mode 100644 index 00000000..c344ba82 --- /dev/null +++ b/src/spdk/examples/ioat/verify/verify.c @@ -0,0 +1,517 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/ioat.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#define SRC_BUFFER_SIZE (512*1024) + +enum ioat_task_type { + IOAT_COPY_TYPE, + IOAT_FILL_TYPE, +}; + +struct user_config { + int queue_depth; + int time_in_sec; + char *core_mask; +}; + +struct ioat_device { + struct spdk_ioat_chan *ioat; + TAILQ_ENTRY(ioat_device) tailq; +}; + +static TAILQ_HEAD(, ioat_device) g_devices; +static struct ioat_device *g_next_device; + +static struct user_config g_user_config; + +struct thread_entry { + struct spdk_ioat_chan *chan; + uint64_t xfer_completed; + uint64_t xfer_failed; + uint64_t fill_completed; + uint64_t fill_failed; + uint64_t current_queue_depth; + unsigned lcore_id; + bool is_draining; + bool init_failed; + struct spdk_mempool *data_pool; + struct spdk_mempool *task_pool; +}; + +struct ioat_task { + enum ioat_task_type type; + struct thread_entry *thread_entry; + void *buffer; + int len; + uint64_t fill_pattern; + void *src; + void *dst; +}; + +static __thread unsigned int seed = 0; + +static unsigned char *g_src; + +static void submit_single_xfer(struct ioat_task *ioat_task); + +static void +construct_user_config(struct user_config *self) +{ + self->queue_depth = 32; + self->time_in_sec = 10; + self->core_mask = "0x1"; +} + +static void +dump_user_config(struct user_config *self) +{ + printf("User configuration:\n"); + printf("Run time: %u seconds\n", self->time_in_sec); + printf("Core mask: %s\n", self->core_mask); + printf("Queue depth: %u\n", self->queue_depth); +} + +static void +ioat_exit(void) +{ + struct ioat_device *dev; + + while (!TAILQ_EMPTY(&g_devices)) { + dev = TAILQ_FIRST(&g_devices); + TAILQ_REMOVE(&g_devices, dev, tailq); + if (dev->ioat) { + spdk_ioat_detach(dev->ioat); + } + free(dev); + } +} +static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_task *ioat_task) +{ + int len; + uintptr_t src_offset; + uintptr_t dst_offset; + uint64_t fill_pattern; + + if (ioat_task->type == IOAT_FILL_TYPE) { + fill_pattern = rand_r(&seed); + fill_pattern = fill_pattern << 32 | rand_r(&seed); + + /* Ensure that the length of memset block is 8 Bytes aligned. + * In case the buffer crosses hugepage boundary and must be split, + * we also need to ensure 8 byte address alignment. We do it + * unconditionally to keep things simple. + */ + len = 8 + ((rand_r(&seed) % (SRC_BUFFER_SIZE - 16)) & ~0x7); + dst_offset = 8 + rand_r(&seed) % (SRC_BUFFER_SIZE - 8 - len); + ioat_task->fill_pattern = fill_pattern; + ioat_task->dst = (void *)(((uintptr_t)ioat_task->buffer + dst_offset) & ~0x7); + } else { + src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; + len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); + dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); + + memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); + ioat_task->src = (void *)((uintptr_t)g_src + src_offset); + ioat_task->dst = (void *)((uintptr_t)ioat_task->buffer + dst_offset); + } + ioat_task->len = len; + ioat_task->thread_entry = thread_entry; +} + +static void +ioat_done(void *cb_arg) +{ + char *value; + int i, failed = 0; + struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; + struct thread_entry *thread_entry = ioat_task->thread_entry; + + if (ioat_task->type == IOAT_FILL_TYPE) { + value = ioat_task->dst; + for (i = 0; i < ioat_task->len / 8; i++) { + if (memcmp(value, &ioat_task->fill_pattern, 8) != 0) { + thread_entry->fill_failed++; + failed = 1; + break; + } + value += 8; + } + if (!failed) { + thread_entry->fill_completed++; + } + } else { + if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { + thread_entry->xfer_failed++; + } else { + thread_entry->xfer_completed++; + } + } + + thread_entry->current_queue_depth--; + if (thread_entry->is_draining) { + spdk_mempool_put(thread_entry->data_pool, ioat_task->buffer); + spdk_mempool_put(thread_entry->task_pool, ioat_task); + } else { + prepare_ioat_task(thread_entry, ioat_task); + submit_single_xfer(ioat_task); + } +} + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) +{ + printf(" Found matching device at %04x:%02x:%02x.%x " + "vendor:0x%04x device:0x%04x\n", + spdk_pci_device_get_domain(pci_dev), + spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), + spdk_pci_device_get_func(pci_dev), + spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); + + return true; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) +{ + struct ioat_device *dev; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + printf("Failed to allocate device struct\n"); + return; + } + memset(dev, 0, sizeof(*dev)); + + dev->ioat = ioat; + TAILQ_INSERT_TAIL(&g_devices, dev, tailq); +} + +static int +ioat_init(void) +{ + TAILQ_INIT(&g_devices); + + if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { + fprintf(stderr, "ioat_probe() failed\n"); + return 1; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-h help message]\n"); + printf("\t[-c core mask for distributing I/O submission/completion work]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-q queue depth]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + construct_user_config(&g_user_config); + while ((op = getopt(argc, argv, "c:ht:q:")) != -1) { + switch (op) { + case 't': + g_user_config.time_in_sec = atoi(optarg); + break; + case 'c': + g_user_config.core_mask = optarg; + break; + case 'q': + g_user_config.queue_depth = atoi(optarg); + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return 1; + } + } + if (!g_user_config.time_in_sec || !g_user_config.core_mask || !g_user_config.queue_depth) { + usage(argv[0]); + return 1; + } + + return 0; +} + +static void +drain_xfers(struct thread_entry *thread_entry) +{ + while (thread_entry->current_queue_depth > 0) { + spdk_ioat_process_events(thread_entry->chan); + } +} + +static void +submit_single_xfer(struct ioat_task *ioat_task) +{ + if (ioat_task->type == IOAT_FILL_TYPE) + spdk_ioat_submit_fill(ioat_task->thread_entry->chan, ioat_task, ioat_done, + ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); + else + spdk_ioat_submit_copy(ioat_task->thread_entry->chan, ioat_task, ioat_done, + ioat_task->dst, ioat_task->src, ioat_task->len); + ioat_task->thread_entry->current_queue_depth++; +} + +static void +submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) +{ + while (queue_depth-- > 0) { + struct ioat_task *ioat_task = NULL; + ioat_task = spdk_mempool_get(thread_entry->task_pool); + ioat_task->buffer = spdk_mempool_get(thread_entry->data_pool); + + ioat_task->type = IOAT_COPY_TYPE; + if (spdk_ioat_get_dma_capabilities(thread_entry->chan) & SPDK_IOAT_ENGINE_FILL_SUPPORTED) { + if (queue_depth % 2) { + ioat_task->type = IOAT_FILL_TYPE; + } + } + prepare_ioat_task(thread_entry, ioat_task); + submit_single_xfer(ioat_task); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + char buf_pool_name[20], task_pool_name[20]; + struct thread_entry *t = (struct thread_entry *)arg; + + if (!t->chan) { + return 0; + } + + t->lcore_id = spdk_env_get_current_core(); + + snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%u", t->lcore_id); + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%u", t->lcore_id); + t->data_pool = spdk_mempool_create(buf_pool_name, g_user_config.queue_depth, SRC_BUFFER_SIZE, + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + t->task_pool = spdk_mempool_create(task_pool_name, g_user_config.queue_depth, + sizeof(struct ioat_task), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (!t->data_pool || !t->task_pool) { + fprintf(stderr, "Could not allocate buffer pool.\n"); + t->init_failed = true; + return 1; + } + + tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); + + submit_xfers(t, g_user_config.queue_depth); + while (spdk_get_ticks() < tsc_end) { + spdk_ioat_process_events(t->chan); + } + + t->is_draining = true; + drain_xfers(t); + + return 0; +} + +static int +init_src_buffer(void) +{ + int i; + + g_src = spdk_dma_zmalloc(SRC_BUFFER_SIZE, 512, NULL); + if (g_src == NULL) { + fprintf(stderr, "Allocate src buffer failed\n"); + return -1; + } + + for (i = 0; i < SRC_BUFFER_SIZE / 4; i++) { + memset((g_src + (4 * i)), i, 4); + } + + return 0; +} + +static int +init(void) +{ + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "verify"; + opts.core_mask = g_user_config.core_mask; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (init_src_buffer() != 0) { + fprintf(stderr, "Could not init src buffer\n"); + return 1; + } + if (ioat_init() != 0) { + fprintf(stderr, "Could not init ioat\n"); + return 1; + } + + return 0; +} + +static int +dump_result(struct thread_entry *threads, uint32_t num_threads) +{ + uint32_t i; + uint64_t total_completed = 0; + uint64_t total_failed = 0; + + for (i = 0; i < num_threads; i++) { + struct thread_entry *t = &threads[i]; + + if (!t->chan) { + continue; + } + + if (t->init_failed) { + total_failed++; + continue; + } + + total_completed += t->xfer_completed; + total_completed += t->fill_completed; + total_failed += t->xfer_failed; + total_failed += t->fill_failed; + if (total_completed || total_failed) + printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld\n", + t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); + } + return total_failed ? 1 : 0; +} + +static struct spdk_ioat_chan * +get_next_chan(void) +{ + struct spdk_ioat_chan *chan; + + if (g_next_device == NULL) { + fprintf(stderr, "Not enough ioat channels found. Check that ioat channels are bound\n"); + fprintf(stderr, "to uio_pci_generic or vfio-pci. scripts/setup.sh can help with this.\n"); + return NULL; + } + + chan = g_next_device->ioat; + + g_next_device = TAILQ_NEXT(g_next_device, tailq); + + return chan; +} + +static uint32_t +get_max_core(void) +{ + uint32_t i; + uint32_t max_core = 0; + + SPDK_ENV_FOREACH_CORE(i) { + if (i > max_core) { + max_core = i; + } + } + + return max_core; +} + +int +main(int argc, char **argv) +{ + uint32_t i, current_core; + struct thread_entry *threads; + uint32_t num_threads; + int rc; + + if (parse_args(argc, argv) != 0) { + return 1; + } + + if (init() != 0) { + return 1; + } + + dump_user_config(&g_user_config); + + g_next_device = TAILQ_FIRST(&g_devices); + + num_threads = get_max_core() + 1; + threads = calloc(num_threads, sizeof(*threads)); + if (!threads) { + fprintf(stderr, "Thread memory allocation failed\n"); + rc = 1; + goto cleanup; + } + + current_core = spdk_env_get_current_core(); + SPDK_ENV_FOREACH_CORE(i) { + if (i != current_core) { + threads[i].chan = get_next_chan(); + spdk_env_thread_launch_pinned(i, work_fn, &threads[i]); + } + } + + threads[current_core].chan = get_next_chan(); + work_fn(&threads[current_core]); + + spdk_env_thread_wait_all(); + rc = dump_result(threads, num_threads); + +cleanup: + spdk_dma_free(g_src); + ioat_exit(); + free(threads); + + return rc; +} diff --git a/src/spdk/examples/nvme/Makefile b/src/spdk/examples/nvme/Makefile new file mode 100644 index 00000000..f7066626 --- /dev/null +++ b/src/spdk/examples/nvme/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world identify perf reserve nvme_manage arbitration \ + hotplug cmb_copy + +DIRS-$(CONFIG_FIO_PLUGIN) += fio_plugin + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/nvme/arbitration/.gitignore b/src/spdk/examples/nvme/arbitration/.gitignore new file mode 100644 index 00000000..f1d6e38d --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/.gitignore @@ -0,0 +1 @@ +arbitration diff --git a/src/spdk/examples/nvme/arbitration/Makefile b/src/spdk/examples/nvme/arbitration/Makefile new file mode 100644 index 00000000..3affeb80 --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = arbitration + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/arbitration/arbitration.c b/src/spdk/examples/nvme/arbitration/arbitration.c new file mode 100644 index 00000000..8065b1ba --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/arbitration.c @@ -0,0 +1,1167 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_intel_rw_latency_page latency_page; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint64_t size_in_ios; + char name[1024]; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + struct spdk_nvme_qpair *qpair; + struct ns_worker_ctx *next; +}; + +struct arb_task { + struct ns_worker_ctx *ns_ctx; + void *buf; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; + enum spdk_nvme_qprio qprio; +}; + +struct arb_context { + int shm_id; + int outstanding_commands; + int num_namespaces; + int num_workers; + int rw_percentage; + int is_random; + int queue_depth; + int time_in_sec; + int io_count; + uint8_t latency_tracking_enable; + uint8_t arbitration_mechanism; + uint8_t arbitration_config; + uint32_t io_size_bytes; + uint32_t max_completions; + uint64_t tsc_rate; + const char *core_mask; + const char *workload_type; +}; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct spdk_mempool *task_pool = NULL; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static struct worker_thread *g_workers = NULL; + +static struct feature features[256]; + +static struct arb_context g_arbitration = { + .shm_id = -1, + .outstanding_commands = 0, + .num_workers = 0, + .num_namespaces = 0, + .rw_percentage = 50, + .queue_depth = 64, + .time_in_sec = 60, + .io_count = 100000, + .latency_tracking_enable = 0, + .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, + .arbitration_config = 0, + .io_size_bytes = 131072, + .max_completions = 0, + /* Default 4 cores for urgent/high/medium/low */ + .core_mask = "0xf", + .workload_type = "randrw", +}; + +/* + * For weighted round robin arbitration mechanism, the smaller value between + * weight and burst will be picked to execute the commands in one queue. + */ +#define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 +#define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 +#define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 +#define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */ + +/* + * Description of dword for priority weight and arbitration burst + * ------------------------------------------------------------------------------ + * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00 + * ------------------------------------------------------------------------------ + * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst + * ------------------------------------------------------------------------------ + * + * The priority weights are zero based value. + */ +#define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24 +#define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16 +#define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8 +#define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF +#define SPDK_NVME_ARB_BURST_MASK 0x7 + +#define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1) + +static void task_complete(struct arb_task *task); + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static const char *print_qprio(enum spdk_nvme_qprio); + + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || + spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), + g_arbitration.io_size_bytes); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->nvme.ctrlr = ctrlr; + entry->nvme.ns = ns; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; + entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_arbitration.num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_arbitration.outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + int nsid, num_ns; + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + if ((g_arbitration.latency_tracking_enable != 0) && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { + get_arb_feature(ctrlr); + + if (g_arbitration.arbitration_config != 0) { + set_arb_feature(ctrlr); + get_arb_feature(ctrlr); + } + } +} + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct ns_worker_ctx *ns_ctx) +{ + struct arb_task *task = NULL; + uint64_t offset_in_ios; + int rc; + struct ns_entry *entry = ns_ctx->entry; + + task = spdk_mempool_get(task_pool); + if (!task) { + fprintf(stderr, "Failed to get task from task_pool\n"); + exit(1); + } + + task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); + if (!task->buf) { + spdk_mempool_put(task_pool, task); + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + + task->ns_ctx = ns_ctx; + + if (g_arbitration.is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + if ((g_arbitration.rw_percentage == 100) || + (g_arbitration.rw_percentage != 0 && + ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { + rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } else { + rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } + + ns_ctx->current_queue_depth++; +} + +static void +task_complete(struct arb_task *task) +{ + struct ns_worker_ctx *ns_ctx; + + ns_ctx = task->ns_ctx; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + + spdk_dma_free(task->buf); + spdk_mempool_put(task_pool, task); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!ns_ctx->is_draining) { + submit_single_io(ns_ctx); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct arb_task *)ctx); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + while (queue_depth-- > 0) { + submit_single_io(ns_ctx); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) +{ + struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; + struct spdk_nvme_io_qpair_opts opts; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + opts.qprio = qprio; + + ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); + if (!ns_ctx->qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return 1; + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); +} + +static void +cleanup(uint32_t task_count) +{ + struct ns_entry *entry = g_namespaces; + struct ns_entry *next_entry = NULL; + struct worker_thread *worker = g_workers; + struct worker_thread *next_worker = NULL; + + while (entry) { + next_entry = entry->next; + free(entry); + entry = next_entry; + }; + + while (worker) { + next_worker = worker->next; + free(worker->ns_ctx); + free(worker); + worker = next_worker; + }; + + if (spdk_mempool_count(task_pool) != (size_t)task_count) { + fprintf(stderr, "task_pool count is %zu but should be %u\n", + spdk_mempool_count(task_pool), task_count); + } + spdk_mempool_free(task_pool); +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + + printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); + + /* Allocate a queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_arbitration.queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + cleanup_ns_worker_ctx(ns_ctx); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-l enable latency tracking, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 0xf - 4 cores)]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-a arbitration mechanism, must be one of below]\n"); + printf("\t\t(0, 1, 2)]\n"); + printf("\t\t(0: default round robin mechanism)]\n"); + printf("\t\t(1: weighted round robin mechanism)]\n"); + printf("\t\t(2: vendor specific mechanism)]\n"); + printf("\t[-b enable arbitration user configuration, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-n subjected IOs for performance comparison]\n"); + printf("\t[-i shared memory group ID]\n"); +} + +static const char * +print_qprio(enum spdk_nvme_qprio qprio) +{ + switch (qprio) { + case SPDK_NVME_QPRIO_URGENT: + return "urgent priority queue"; + case SPDK_NVME_QPRIO_HIGH: + return "high priority queue"; + case SPDK_NVME_QPRIO_MEDIUM: + return "medium priority queue"; + case SPDK_NVME_QPRIO_LOW: + return "low priority queue"; + default: + return "invalid priority queue"; + } +} + + +static void +print_configuration(char *program_name) +{ + printf("%s run with configuration:\n", program_name); + printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", + program_name, + g_arbitration.queue_depth, + g_arbitration.io_size_bytes, + g_arbitration.workload_type, + g_arbitration.rw_percentage, + g_arbitration.latency_tracking_enable, + g_arbitration.time_in_sec, + g_arbitration.core_mask, + g_arbitration.max_completions, + g_arbitration.arbitration_mechanism, + g_arbitration.arbitration_config, + g_arbitration.io_count, + g_arbitration.shm_id); +} + + +static void +print_performance(void) +{ + float io_per_second, sent_all_io_in_secs; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; + sent_all_io_in_secs = g_arbitration.io_count / io_per_second; + printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", + ns_ctx->entry->name, worker->lcore, + io_per_second, sent_all_io_in_secs, g_arbitration.io_count); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + printf("========================================================\n"); + + printf("\n"); +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page.buckets_32us[i]) + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, + entry->latency_page.buckets_32us[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_1ms[i]) + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, + entry->latency_page.buckets_1ms[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page.buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page( + ctrlr->ctrlr, log_page, + SPDK_NVME_GLOBAL_NS_TAG, + &ctrlr->latency_page, + sizeof(struct spdk_nvme_intel_rw_latency_page), + 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_arbitration.outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", + ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_arbitration.outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_arbitration.latency_tracking_enable) { + if (g_arbitration.rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_arbitration.rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type = NULL; + int op = 0; + bool mix_specified = false; + + while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) { + switch (op) { + case 'c': + g_arbitration.core_mask = optarg; + break; + case 'i': + g_arbitration.shm_id = atoi(optarg); + break; + case 'l': + g_arbitration.latency_tracking_enable = atoi(optarg); + break; + case 'm': + g_arbitration.max_completions = atoi(optarg); + break; + case 'q': + g_arbitration.queue_depth = atoi(optarg); + break; + case 's': + g_arbitration.io_size_bytes = atoi(optarg); + break; + case 't': + g_arbitration.time_in_sec = atoi(optarg); + break; + case 'w': + g_arbitration.workload_type = optarg; + break; + case 'M': + g_arbitration.rw_percentage = atoi(optarg); + mix_specified = true; + break; + case 'a': + g_arbitration.arbitration_mechanism = atoi(optarg); + break; + case 'b': + g_arbitration.arbitration_config = atoi(optarg); + break; + case 'n': + g_arbitration.io_count = atoi(optarg); + break; + case 'h': + default: + usage(argv[0]); + return 1; + } + } + + workload_type = g_arbitration.workload_type; + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_arbitration.rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_arbitration.rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_arbitration.is_random = 0; + } else { + g_arbitration.is_random = 1; + } + + if (g_arbitration.latency_tracking_enable != 0 && + g_arbitration.latency_tracking_enable != 1) { + fprintf(stderr, + "-l must be specified to value 0 or 1.\n"); + return 1; + } + + switch (g_arbitration.arbitration_mechanism) { + case SPDK_NVME_CC_AMS_RR: + case SPDK_NVME_CC_AMS_WRR: + case SPDK_NVME_CC_AMS_VS: + break; + default: + fprintf(stderr, + "-a must be specified to value 0, 1, or 7.\n"); + return 1; + } + + if (g_arbitration.arbitration_config != 0 && + g_arbitration.arbitration_config != 1) { + fprintf(stderr, + "-b must be specified to value 0 or 1.\n"); + return 1; + } else if (g_arbitration.arbitration_config == 1 && + g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { + fprintf(stderr, + "-a must be specified to 1 (WRR) together.\n"); + return 1; + } + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; + + g_workers = NULL; + g_arbitration.num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_arbitration.num_workers++; + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { + qprio++; + } + + worker->qprio = qprio % SPDK_NVME_QPRIO_MAX; + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* Update with user specified arbitration configuration */ + opts->arb_mechanism = g_arbitration.arbitration_mechanism; + + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attached to %s\n", trid->traddr); + + /* Update with actual arbitration configuration in use */ + g_arbitration.arbitration_mechanism = opts->arb_mechanism; + + register_ctrlr(ctrlr); +} + +static int +register_controllers(void) +{ + printf("Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_arbitration.num_namespaces == 0) { + fprintf(stderr, "No valid namespaces to continue IO testing\n"); + return 1; + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + if (g_arbitration.latency_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return 1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + + g_arbitration.outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10 = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]); +} + +static void +get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; + unsigned ab, lpw, mpw, hpw; + + ab = arb & SPDK_NVME_ARB_BURST_MASK; + lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + + printf("Current Arbitration Configuration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (ab == SPDK_NVME_ARB_BURST_MASK) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << ab); + } + + printf("Low Priority Weight: %u\n", lpw); + printf("Medium Priority Weight: %u\n", mpw); + printf("High Priority Weight: %u\n", hpw); + printf("\n"); + } +} + +static void +set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("set_feature(0x%02X) failed\n", fid); + feature->valid = false; + } else { + printf("Set Arbitration Feature Successfully\n"); + } + + g_arbitration.outstanding_commands--; +} + +static int +set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + struct spdk_nvme_cmd cmd = {}; + uint32_t arb = 0; + unsigned ab, lpw, mpw, hpw; + + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION; + + g_arbitration.outstanding_commands = 0; + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK; + hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT; + mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT; + lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT; + arb = hpw | mpw | lpw | ab; + cmd.cdw11 = arb; + } + + ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, + set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); + if (ret) { + printf("Set Arbitration Feature: Failed 0x%x\n", ret); + return 1; + } + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { + printf("Set Arbitration Feature failed and use default configuration\n"); + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + char task_pool_name[30]; + uint32_t task_count; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "arb"; + opts.core_mask = g_arbitration.core_mask; + opts.shm_id = g_arbitration.shm_id; + if (spdk_env_init(&opts) < 0) { + return 1; + } + + g_arbitration.tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + return 1; + } + + if (register_controllers() != 0) { + return 1; + } + + if (associate_workers_with_ns() != 0) { + return 1; + } + + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); + + /* + * The task_count will be dynamically calculated based on the + * number of attached active namespaces, queue depth and number + * of cores (workers) involved in the IO perations. + */ + task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + task_count *= g_arbitration.queue_depth; + + task_pool = spdk_mempool_create(task_pool_name, task_count, + sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY); + if (task_pool == NULL) { + fprintf(stderr, "could not initialize task pool\n"); + return 1; + } + + print_configuration(argv[0]); + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + + unregister_controllers(); + + cleanup(task_count); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/cmb_copy/.gitignore b/src/spdk/examples/nvme/cmb_copy/.gitignore new file mode 100644 index 00000000..fce73803 --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/.gitignore @@ -0,0 +1 @@ +cmb_copy diff --git a/src/spdk/examples/nvme/cmb_copy/Makefile b/src/spdk/examples/nvme/cmb_copy/Makefile new file mode 100644 index 00000000..86c18143 --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Eideticom Inc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Eideticom Inc nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = cmb_copy + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/cmb_copy/cmb_copy.c b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c new file mode 100644 index 00000000..223133ca --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c @@ -0,0 +1,394 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Eideticom Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Eideticom Inc, nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/nvme.h" + +#define CMB_COPY_DELIM "-" +#define CMB_COPY_READ 0 +#define CMB_COPY_WRITE 1 + +struct nvme_io { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_transport_id trid; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + unsigned nsid; + unsigned slba; + unsigned nlbas; + uint32_t lba_size; + unsigned done; +}; + +struct cmb_t { + struct spdk_nvme_transport_id trid; + struct spdk_nvme_ctrlr *ctrlr; +}; + +struct config { + struct nvme_io read; + struct nvme_io write; + struct cmb_t cmb; + size_t copy_size; +}; + +static struct config g_config; + +/* Namespaces index from 1. Return 0 to invoke an error */ +static unsigned get_nsid(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return g_config.read.nsid; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return g_config.write.nsid; + } + return 0; +} + +static int get_rw(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return CMB_COPY_READ; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return CMB_COPY_WRITE; + } + return -1; +} + +static void +check_io(void *arg, const struct spdk_nvme_cpl *completion) +{ + int *rw = (unsigned *)arg; + + if (*rw == CMB_COPY_READ) { + g_config.read.done = 1; + } else { + g_config.write.done = 1; + } +} + +static int +cmb_copy(void) +{ + int rc = 0, rw; + void *buf; + + /* Allocate QPs for the read and write controllers */ + g_config.read.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.read.ctrlr, NULL, 0); + g_config.write.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.write.ctrlr, NULL, 0); + if (g_config.read.qpair == NULL || g_config.read.qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return -ENOMEM; + } + + /* Allocate a buffer from our CMB */ + buf = spdk_nvme_ctrlr_alloc_cmb_io_buffer(g_config.cmb.ctrlr, g_config.copy_size); + if (buf == NULL) { + printf("ERROR: buffer allocation failed\n"); + printf("Are you sure %s has a valid CMB?\n", + g_config.cmb.trid.traddr); + return -ENOMEM; + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + rw = CMB_COPY_READ; + /* Do the read to the CMB IO buffer */ + rc = spdk_nvme_ns_cmd_read(g_config.read.ns, g_config.read.qpair, buf, + g_config.read.slba, g_config.read.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + return -EIO; + } + while (!g_config.read.done) { + spdk_nvme_qpair_process_completions(g_config.read.qpair, 0); + } + + /* Do the write from the CMB IO buffer */ + rw = CMB_COPY_WRITE; + rc = spdk_nvme_ns_cmd_write(g_config.write.ns, g_config.write.qpair, buf, + g_config.write.slba, g_config.write.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + return -EIO; + } + while (!g_config.write.done) { + spdk_nvme_qpair_process_completions(g_config.write.qpair, 0); + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + /* Free CMB buffer */ + spdk_nvme_ctrlr_free_cmb_io_buffer(g_config.cmb.ctrlr, buf, + g_config.copy_size); + + /* Free the queues */ + spdk_nvme_ctrlr_free_io_qpair(g_config.read.qpair); + spdk_nvme_ctrlr_free_io_qpair(g_config.write.qpair); + + return rc; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* We will only attach to the read or write controller */ + if (strcmp(trid->traddr, g_config.read.trid.traddr) && + strcmp(trid->traddr, g_config.write.trid.traddr)) { + printf("%s - not probed %s!\n", __func__, trid->traddr); + return 0; + } + + printf("%s - probed %s!\n", __func__, trid->traddr); + return 1; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, get_nsid(trid)); + if (ns == NULL) { + fprintf(stderr, "Could not locate namespace %d on controller %s.\n", + get_nsid(trid), trid->traddr); + exit(-1); + } + if (get_rw(trid) == CMB_COPY_READ) { + g_config.read.ctrlr = ctrlr; + g_config.read.ns = ns; + g_config.read.lba_size = spdk_nvme_ns_get_sector_size(ns); + } else { + g_config.write.ctrlr = ctrlr; + g_config.write.ns = ns; + g_config.write.lba_size = spdk_nvme_ns_get_sector_size(ns); + } + printf("%s - attached %s!\n", __func__, trid->traddr); + + return; +} + +static void +usage(char *program_name) +{ + printf("%s options (all mandatory)", program_name); + printf("\n"); + printf("\t[-r NVMe read parameters]\n"); + printf("\t[-w NVMe write parameters]\n"); + printf("\t[-c CMB to use for data buffers]\n"); + printf("\n"); + printf("Read/Write params:\n"); + printf(" <pci id>-<namespace>-<start LBA>-<number of LBAs>\n"); +} + +static void +parse(char *in, struct nvme_io *io) +{ + char *tok = NULL; + + tok = strtok(in, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + snprintf(&io->trid.traddr[0], SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->nsid = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->slba = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->nlbas = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok != NULL) { + goto err; + } + return; + +err: + fprintf(stderr, "%s: error parsing %s\n", __func__, in); + exit(-1); + +} + +static int +parse_args(int argc, char **argv) +{ + int op; + unsigned read = 0, write = 0, cmb = 0; + + while ((op = getopt(argc, argv, "r:w:c:")) != -1) { + switch (op) { + case 'r': + parse(optarg, &g_config.read); + read = 1; + break; + case 'w': + parse(optarg, &g_config.write); + write = 1; + break; + case 'c': + snprintf(g_config.cmb.trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", optarg); + cmb = 1; + break; + default: + usage(argv[0]); + return 1; + } + } + + if ((!read || !write || !cmb)) { + usage(argv[0]); + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc = 0; + struct spdk_env_opts opts; + + /* + * Parse the input arguments. For now we use the following + * format list: + * + * <pci id>-<namespace>-<start LBA>-<number of LBAs> + * + */ + rc = parse_args(argc, argv); + if (rc) { + fprintf(stderr, "Error in parse_args(): %d\n", + rc); + return -1; + } + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "cmb_copy"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + /* + * CMBs only apply to PCIe attached NVMe controllers so we + * only probe the PCIe bus. This is the default when we pass + * in NULL for the first argument. + */ + + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc) { + fprintf(stderr, "Error in spdk_nvme_probe(): %d\n", + rc); + return -1; + } + + /* + * For now enforce that the read and write controller are not + * the same. This avoids an internal only DMA. + */ + if (!strcmp(g_config.write.trid.traddr, g_config.read.trid.traddr)) { + fprintf(stderr, "Read and Write controllers must differ!\n"); + return -1; + } + + /* + * Perform a few sanity checks and set the buffer size for the + * CMB. + */ + if (g_config.read.nlbas * g_config.read.lba_size != + g_config.write.nlbas * g_config.write.lba_size) { + fprintf(stderr, "Read and write sizes do not match!\n"); + return -1; + } + g_config.copy_size = g_config.read.nlbas * g_config.read.lba_size; + + /* + * Get the ctrlr pointer for the CMB. For now we assume this + * is either the read or write NVMe controller though in + * theory that is not a necessary condition. + */ + + if (!strcmp(g_config.cmb.trid.traddr, g_config.read.trid.traddr)) { + g_config.cmb.ctrlr = g_config.read.ctrlr; + } + if (!strcmp(g_config.cmb.trid.traddr, g_config.write.trid.traddr)) { + g_config.cmb.ctrlr = g_config.write.ctrlr; + } + + /* + * Call the cmb_copy() function which performs the CMB + * based copy or returns an error code if it fails. + */ + rc = cmb_copy(); + if (rc) { + fprintf(stderr, "Error in spdk_cmb_copy(): %d\n", + rc); + return -1; + } + + return rc; +} diff --git a/src/spdk/examples/nvme/fio_plugin/.gitignore b/src/spdk/examples/nvme/fio_plugin/.gitignore new file mode 100644 index 00000000..1b0b36ac --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/src/spdk/examples/nvme/fio_plugin/Makefile b/src/spdk/examples/nvme/fio_plugin/Makefile new file mode 100644 index 00000000..dfd7917d --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. + +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP := fio_plugin + +CFLAGS += -I$(CONFIG_FIO_SOURCE_DIR) +LDFLAGS += -shared -rdynamic + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/fio_plugin/README.md b/src/spdk/examples/nvme/fio_plugin/README.md new file mode 100644 index 00000000..2c533282 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/README.md @@ -0,0 +1,97 @@ +# Compiling fio + +First, clone the fio source repository from https://github.com/axboe/fio + + git clone https://github.com/axboe/fio + +Then check out the fio 3.3: + + cd fio && git checkout fio-3.3 + +Finally, compile the code: + + make + +# Compiling SPDK + +First, clone the SPDK source repository from https://github.com/spdk/spdk + + git clone https://github.com/spdk/spdk + git submodule update --init + +Then, run the SPDK configure script to enable fio (point it to the root of the fio repository): + + cd spdk + ./configure --with-fio=/path/to/fio/repo <other configuration options> + +Finally, build SPDK: + + make + +**Note to advanced users**: These steps assume you're using the DPDK submodule. If you are using your +own version of DPDK, the fio plugin requires that DPDK be compiled with -fPIC. You can compile DPDK +with -fPIC by modifying your DPDK configuration file and adding the line: + + EXTRA_CFLAGS=-fPIC + +# Usage + +To use the SPDK fio plugin with fio, specify the plugin binary using LD_PRELOAD when running +fio and set ioengine=spdk in the fio configuration file (see example_config.fio in the same +directory as this README). + + LD_PRELOAD=<path to spdk repo>/examples/nvme/fio_plugin/fio_plugin fio + +To select NVMe devices, you pass an SPDK Transport Identifier string as the filename. These are in the +form: + + filename=key=value [key=value] ... ns=value + +Specifically, for local PCIe NVMe devices it will look like this: + + filename=trtype=PCIe traddr=0000.04.00.0 ns=1 + +And remote devices accessed via NVMe over Fabrics will look like this: + + filename=trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1 + + +**Note**: The specification of the PCIe address should not use the normal ':' +and instead only use '.'. This is a limitation in fio - it splits filenames on +':'. Also, the NVMe namespaces start at 1, not 0, and the namespace must be +specified at the end of the string. + +Currently the SPDK fio plugin is limited to the thread usage model, so fio jobs must also specify thread=1 +when using the SPDK fio plugin. + +fio also currently has a race condition on shutdown if dynamically loading the ioengine by specifying the +engine's full path via the ioengine parameter - LD_PRELOAD is recommended to avoid this race condition. + +When testing random workloads, it is recommended to set norandommap=1. fio's random map +processing consumes extra CPU cycles which will degrade performance over time with +the fio_plugin since all I/O are submitted and completed on a single CPU core. + +When testing FIO on multiple NVMe SSDs with SPDK plugin, it is recommended to use multiple jobs in FIO configurion. +It has been observed that there are some performance gap between FIO(with SPDK plugin enabled) and SPDK perf +(examples/nvme/perf/perf) on testing multiple NVMe SSDs. If you use one job(i.e., use one CPU core) configured for +FIO test, the performance is worse than SPDK perf (also using one CPU core) against many NVMe SSDs. But if you use +multiple jobs for FIO test, the performance of FIO is similiar with SPDK perf. After analyzing this phenomenon, we +think that is caused by the FIO architecture. Mainly FIO can scale with multiple threads (i.e., using CPU cores), +but it is not good to use one thread against many I/O devices. + +# End-to-end Data Protection (Optional) + +Running with PI setting, following settings steps are required. +First, format device namespace with proper PI setting. For example: + + nvme format /dev/nvme0n1 -l 1 -i 1 -p 0 -m 1 + +In fio configure file, add PRACT and set PRCHK by flags(GUARD|REFTAG|APPTAG) properly. For example: + + pi_act=0 + pi_chk=GUARD + +Blocksize should be set as the sum of data and metadata. For example, if data blocksize is 512 Byte, host generated +PI metadata is 8 Byte, then blocksize in fio configure file should be 520 Byte: + + bs=520 diff --git a/src/spdk/examples/nvme/fio_plugin/example_config.fio b/src/spdk/examples/nvme/fio_plugin/example_config.fio new file mode 100644 index 00000000..a8e62ccb --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/example_config.fio @@ -0,0 +1,15 @@ +[global] +ioengine=spdk +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=4k + +[test] +numjobs=1 diff --git a/src/spdk/examples/nvme/fio_plugin/fio_plugin.c b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c new file mode 100644 index 00000000..7785c399 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c @@ -0,0 +1,943 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/endian.h" +#include "spdk/crc16.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +#define NVME_IO_ALIGN 4096 +#define FIO_NVME_PI_APPTAG 0x1234 + +static bool spdk_env_initialized; +static int spdk_enable_sgl = 0; +static uint32_t spdk_pract_flag; +static uint32_t spdk_prchk_flags; + +struct spdk_fio_options { + void *pad; /* off1 used in option descriptions may not be 0 */ + int mem_size; + int shm_id; + int enable_sgl; + char *hostnqn; + int pi_act; + char *pi_chk; +}; + +struct spdk_fio_request { + struct io_u *io; + /** Offset in current iovec, fio only uses 1 vector */ + uint32_t iov_offset; + + /** Application tag and its mask for NVMe PI */ + uint16_t appmask; + uint16_t apptag; + + struct spdk_fio_thread *fio_thread; +}; + +struct spdk_fio_ctrlr { + struct spdk_nvme_transport_id tr_id; + struct spdk_nvme_ctrlr_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_fio_ctrlr *next; +}; + +static struct spdk_fio_ctrlr *ctrlr_g; +static int td_count; +static pthread_t g_ctrlr_thread_id = 0; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static bool g_error; + +struct spdk_fio_qpair { + struct fio_file *f; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + uint32_t io_flags; + bool do_nvme_pi; + struct spdk_fio_qpair *next; + struct spdk_fio_ctrlr *fio_ctrlr; +}; + +struct spdk_fio_thread { + struct thread_data *td; + + struct spdk_fio_qpair *fio_qpair; + struct spdk_fio_qpair *fio_qpair_current; // the current fio_qpair to be handled. + + struct io_u **iocq; // io completion queue + unsigned int iocq_count; // number of iocq entries filled by last getevents + unsigned int iocq_size; // number of iocq entries allocated + struct fio_file *current_f; // fio_file given by user + +}; + +static void * +spdk_fio_poll_ctrlrs(void *arg) +{ + struct spdk_fio_ctrlr *fio_ctrlr; + int oldstate; + int rc; + + /* Loop until the thread is cancelled */ + while (true) { + rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + pthread_mutex_lock(&mutex); + fio_ctrlr = ctrlr_g; + + while (fio_ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); + fio_ctrlr = fio_ctrlr->next; + } + + pthread_mutex_unlock(&mutex); + + rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_options *fio_options = td->eo; + + if (fio_options->hostnqn) { + snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); + } + + return true; +} + +static struct spdk_fio_ctrlr * +get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) +{ + struct spdk_fio_ctrlr *fio_ctrlr = ctrlr_g; + while (fio_ctrlr) { + if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { + return fio_ctrlr; + } + + fio_ctrlr = fio_ctrlr->next; + } + + return NULL; +} + +static bool +fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair) +{ + struct spdk_nvme_ns *ns = NULL; + const struct spdk_nvme_ns_data *nsdata; + + ns = fio_qpair->ns; + nsdata = spdk_nvme_ns_get_data(ns); + + if (!spdk_nvme_ns_supports_extended_lba(ns)) { + return false; + } + + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return false; + } + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (nsdata->dps.md_start) { + return false; + } + + /* Controller performs PI setup and check */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return false; + } + + /* Type3 don't support REFTAG */ + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { + return false; + } + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_ctrlr *fio_ctrlr; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns; + struct fio_file *f = fio_thread->current_f; + uint32_t ns_id; + char *p; + + p = strstr(f->file_name, "ns="); + assert(p != NULL); + ns_id = atoi(p + 3); + if (!ns_id) { + SPDK_ERRLOG("namespace id should be >=1, but current value=0\n"); + g_error = true; + return; + } + + fio_ctrlr = get_fio_ctrlr(trid); + /* it is a new ctrlr and needs to be added */ + if (!fio_ctrlr) { + /* Create an fio_ctrlr and add it to the list */ + fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); + if (!fio_ctrlr) { + SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); + g_error = true; + return; + } + fio_ctrlr->opts = *opts; + fio_ctrlr->ctrlr = ctrlr; + fio_ctrlr->tr_id = *trid; + fio_ctrlr->next = ctrlr_g; + ctrlr_g = fio_ctrlr; + } + + ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); + if (ns == NULL) { + SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + + if (!spdk_nvme_ns_is_active(ns)) { + SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if ((fio_qpair->f == f) || + ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && + (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { + /* Not the error case. Avoid duplicated connection */ + return; + } + fio_qpair = fio_qpair->next; + } + + /* create a new qpair */ + fio_qpair = calloc(1, sizeof(*fio_qpair)); + if (!fio_qpair) { + g_error = true; + SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); + return; + } + + fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, NULL, 0); + if (!fio_qpair->qpair) { + SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); + g_error = true; + free(fio_qpair); + return; + } + + fio_qpair->ns = ns; + fio_qpair->f = f; + fio_qpair->fio_ctrlr = fio_ctrlr; + fio_qpair->next = fio_thread->fio_qpair; + fio_thread->fio_qpair = fio_qpair; + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + fio_qpair->io_flags = spdk_pract_flag | spdk_prchk_flags; + } + + fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); + + f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); + if (f->real_file_size <= 0) { + g_error = true; + SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); + return; + } + + f->filetype = FIO_TYPE_BLOCK; + fio_file_set_size_known(f); +} + +static void parse_prchk_flags(const char *prchk_str) +{ + if (!prchk_str) { + return; + } + + if (strstr(prchk_str, "GUARD") != NULL) { + spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(prchk_str, "REFTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(prchk_str, "APPTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } +} + +/* Called once at initialization. This is responsible for gathering the size of + * each "file", which in our case are in the form + * 'key=value [key=value] ... ns=value' + * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' + * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ +static int spdk_fio_setup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + struct spdk_fio_options *fio_options = td->eo; + struct spdk_env_opts opts; + struct fio_file *f; + char *p; + int rc = 0; + struct spdk_nvme_transport_id trid; + struct spdk_fio_ctrlr *fio_ctrlr; + char *trid_info; + unsigned int i; + + if (!td->o.use_thread) { + log_err("spdk: must set thread=1 when using spdk plugin\n"); + return 1; + } + + pthread_mutex_lock(&mutex); + + fio_thread = calloc(1, sizeof(*fio_thread)); + assert(fio_thread != NULL); + + td->io_ops_data = fio_thread; + fio_thread->td = td; + + fio_thread->iocq_size = td->o.iodepth; + fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + if (!spdk_env_initialized) { + spdk_env_opts_init(&opts); + opts.name = "fio"; + opts.mem_size = fio_options->mem_size; + opts.shm_id = fio_options->shm_id; + spdk_enable_sgl = fio_options->enable_sgl; + spdk_pract_flag = fio_options->pi_act; + parse_prchk_flags(fio_options->pi_chk); + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + free(fio_thread->iocq); + free(fio_thread); + fio_thread = NULL; + pthread_mutex_unlock(&mutex); + return 1; + } + spdk_env_initialized = true; + spdk_unaffinitize_thread(); + + /* Spawn a thread to continue polling the controllers */ + rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); + if (rc != 0) { + SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); + } + } + + for_each_file(td, f, i) { + memset(&trid, 0, sizeof(trid)); + + trid.trtype = SPDK_NVME_TRANSPORT_PCIE; + + p = strstr(f->file_name, " ns="); + if (p == NULL) { + SPDK_ERRLOG("Failed to find namespace 'ns=X'\n"); + continue; + } + + trid_info = strndup(f->file_name, p - f->file_name); + if (!trid_info) { + SPDK_ERRLOG("Failed to allocate space for trid_info\n"); + continue; + } + + rc = spdk_nvme_transport_id_parse(&trid, trid_info); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); + free(trid_info); + continue; + } + free(trid_info); + + if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_pci_addr pci_addr; + if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { + SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); + continue; + } + spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); + } else { + if (trid.subnqn[0] == '\0') { + snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", + SPDK_NVMF_DISCOVERY_NQN); + } + } + + fio_thread->current_f = f; + + fio_ctrlr = get_fio_ctrlr(&trid); + if (fio_ctrlr) { + attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); + } else { + /* Enumerate all of the controllers */ + if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) { + SPDK_ERRLOG("spdk_nvme_probe() failed\n"); + continue; + } + } + + if (g_error) { + log_err("Failed to initialize spdk fio plugin\n"); + rc = 1; + break; + } + } + + td_count++; + + pthread_mutex_unlock(&mutex); + + return rc; +} + +static int spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL); + return td->orig_buffer == NULL; +} + +static void spdk_fio_iomem_free(struct thread_data *td) +{ + spdk_dma_free(td->orig_buffer); +} + +static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + fio_req->io = io_u; + fio_req->fio_thread = fio_thread; + + io_u->engine_data = fio_req; + + return 0; +} + +static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static void +fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_nvme_protection_info *pi; + uint16_t crc16; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + fio_req->appmask = 0xffff; + fio_req->apptag = FIO_NVME_PI_APPTAG; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + memset(pi, 0, sizeof(*pi)); + + if (io_u->ddir == DDIR_WRITE) { + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&pi->guard, crc16); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + to_be16(&pi->app_tag, FIO_NVME_PI_APPTAG); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&pi->ref_tag, (uint32_t)lba + i); + } + } + } +} + +static void +fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_nvme_protection_info *pi; + uint16_t crc16, guard, app_tag; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, ref_tag, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include last 8 bytes of PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&guard, crc16); + if (pi->guard != guard) { + fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, guard, pi->guard); + } + + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Previously we used the number of lbas as + * application tag for writes + */ + to_be16(&app_tag, FIO_NVME_PI_APPTAG); + if (pi->app_tag != app_tag) { + fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, app_tag, pi->app_tag); + } + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&ref_tag, (uint32_t)lba + i); + if (pi->ref_tag != ref_tag) { + fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%08x but returned with 0x%08x," + " may read the LBA without write it first\n", + lba + i, ref_tag, pi->ref_tag); + } + } + } +} + +static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_fio_request *fio_req = ctx; + struct spdk_fio_thread *fio_thread = fio_req->fio_thread; + + if (fio_thread->fio_qpair->do_nvme_pi) { + fio_extended_lba_verify_pi(fio_thread->fio_qpair, fio_req->io); + } + + assert(fio_thread->iocq_count < fio_thread->iocq_size); + fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; +} + +static void +spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + + fio_req->iov_offset = sgl_offset; +} + +static int +spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + struct io_u *io_u = fio_req->io; + + *address = io_u->buf; + *length = io_u->xfer_buflen; + + if (fio_req->iov_offset) { + assert(fio_req->iov_offset <= io_u->xfer_buflen); + *address += fio_req->iov_offset; + *length -= fio_req->iov_offset; + } + + return 0; +} + +#if FIO_IOOPS_VERSION >= 24 +typedef enum fio_q_status fio_q_status_t; +#else +typedef int fio_q_status_t; +#endif + +static fio_q_status_t +spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns = NULL; + uint32_t block_size; + uint64_t lba; + uint32_t lba_count; + + /* Find the namespace that corresponds to the file in the io_u */ + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if (fio_qpair->f == io_u->file) { + ns = fio_qpair->ns; + break; + } + fio_qpair = fio_qpair->next; + } + if (fio_qpair == NULL || ns == NULL) { + return -ENXIO; + } + + block_size = spdk_nvme_ns_get_extended_sector_size(ns); + + lba = io_u->offset / block_size; + lba_count = io_u->xfer_buflen / block_size; + + // TODO: considering situations that fio will randomize and verify io_u + if (fio_qpair->do_nvme_pi) { + fio_extended_lba_setup_pi(fio_qpair, io_u); + } + + switch (io_u->ddir) { + case DDIR_READ: + if (!spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); + } else { + rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); + } + break; + case DDIR_WRITE: + if (!spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); + } else { + rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); + } + break; + default: + assert(false); + break; + } + + /* NVMe read/write functions return -ENOMEM if there are no free requests. */ + if (rc == -ENOMEM) { + return FIO_Q_BUSY; + } + + if (rc != 0) { + return -abs(rc); + } + + return FIO_Q_QUEUED; +} + +static struct io_u *spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(event >= 0); + assert((unsigned)event < fio_thread->iocq_count); + return fio_thread->iocq[event]; +} + +static int spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair = NULL; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * 1000000000L + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->iocq_count = 0; + + /* fetch the next qpair */ + if (fio_thread->fio_qpair_current) { + fio_qpair = fio_thread->fio_qpair_current->next; + } + + for (;;) { + if (fio_qpair == NULL) { + fio_qpair = fio_thread->fio_qpair; + } + + while (fio_qpair != NULL) { + spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); + + if (fio_thread->iocq_count >= min) { + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; + } + + fio_qpair = fio_qpair->next; + } + + if (t) { + uint64_t elapse; + + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; +} + +static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static void spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); + fio_qpair_tmp = fio_qpair->next; + free(fio_qpair); + fio_qpair = fio_qpair_tmp; + } + + free(fio_thread); + + pthread_mutex_lock(&mutex); + td_count--; + if (td_count == 0) { + struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; + + fio_ctrlr = ctrlr_g; + while (fio_ctrlr != NULL) { + spdk_nvme_detach(fio_ctrlr->ctrlr); + fio_ctrlr_tmp = fio_ctrlr->next; + free(fio_ctrlr); + fio_ctrlr = fio_ctrlr_tmp; + } + ctrlr_g = NULL; + } + pthread_mutex_unlock(&mutex); + if (!ctrlr_g) { + if (pthread_cancel(g_ctrlr_thread_id) == 0) { + pthread_join(g_ctrlr_thread_id, NULL); + } + } +} + +/* This function enables addition of SPDK parameters to the fio config + * Adding new parameters by defining them here and defining a callback + * function to read the parameter value. */ +static struct fio_option options[] = { + { + .name = "mem_size_mb", + .lname = "Memory size in MB", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, mem_size), + .def = "512", + .help = "Memory Size for SPDK (MB)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "shm_id", + .lname = "shared memory ID", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, shm_id), + .def = "-1", + .help = "Shared Memory ID", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "enable_sgl", + .lname = "SGL used for I/O commands", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, enable_sgl), + .def = "0", + .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "hostnqn", + .lname = "Host NQN to use when connecting to controllers.", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, hostnqn), + .help = "Host NQN", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_act", + .lname = "Protection Information Action", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, pi_act), + .def = "1", + .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_chk", + .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, pi_chk), + .def = NULL, + .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = NULL, + }, +}; + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk", + .version = FIO_IOOPS_VERSION, + .queue = spdk_fio_queue, + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .setup = spdk_fio_setup, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, + .options = options, + .option_struct_size = sizeof(struct spdk_fio_options), +}; + +static void fio_init fio_spdk_register(void) +{ + register_ioengine(&ioengine); +} + +static void fio_exit fio_spdk_unregister(void) +{ + unregister_ioengine(&ioengine); +} diff --git a/src/spdk/examples/nvme/fio_plugin/full_bench.fio b/src/spdk/examples/nvme/fio_plugin/full_bench.fio new file mode 100644 index 00000000..4dea21d1 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/full_bench.fio @@ -0,0 +1,40 @@ +[global] +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +disable_slat=1 +disable_bw=1 +lat_percentiles=1 +clat_percentiles=0 +percentile_list=50:99:99.999 + +[precondition-sequential] +stonewall +description="Sequentially write to the device twice" +rw=write +iodepth=128 +bs=128k +loops=2 + +[4k_randwrite_qd1] +stonewall +description="4KiB Random Write QD=1" +bs=4k +rw=randwrite +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 + +[4k_randread_qd1] +stonewall +description="4KiB Random Read QD=1" +bs=4k +rw=randread +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 diff --git a/src/spdk/examples/nvme/hello_world/.gitignore b/src/spdk/examples/nvme/hello_world/.gitignore new file mode 100644 index 00000000..242c034c --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/.gitignore @@ -0,0 +1 @@ +hello_world diff --git a/src/spdk/examples/nvme/hello_world/Makefile b/src/spdk/examples/nvme/hello_world/Makefile new file mode 100644 index 00000000..890d761a --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = hello_world + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hello_world/hello_world.c b/src/spdk/examples/nvme/hello_world/hello_world.c new file mode 100644 index 00000000..34913073 --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/hello_world.c @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct ns_entry *next; + struct spdk_nvme_qpair *qpair; +}; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + /* + * spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe + * controller. During initialization, the IDENTIFY data for the + * controller is read using an NVMe admin command, and that data + * can be retrieved using spdk_nvme_ctrlr_get_data() to get + * detailed information on the controller. Refer to the NVMe + * specification for more details on IDENTIFY for NVMe controllers. + */ + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->next = g_namespaces; + g_namespaces = entry; + + printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns) / 1000000000); +} + +struct hello_world_sequence { + struct ns_entry *ns_entry; + char *buf; + unsigned using_cmb_io; + int is_completed; +}; + +static void +read_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + + /* + * The read I/O has completed. Print the contents of the + * buffer, free the buffer, then mark the sequence as + * completed. This will trigger the hello_world() function + * to exit its polling loop. + */ + printf("%s", sequence->buf); + spdk_free(sequence->buf); + sequence->is_completed = 1; +} + +static void +write_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + struct ns_entry *ns_entry = sequence->ns_entry; + int rc; + + /* + * The write I/O has completed. Free the buffer associated with + * the write I/O and allocate a new zeroed buffer for reading + * the data back from the NVMe namespace. + */ + if (sequence->using_cmb_io) { + spdk_nvme_ctrlr_free_cmb_io_buffer(ns_entry->ctrlr, sequence->buf, 0x1000); + } else { + spdk_free(sequence->buf); + } + sequence->buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + + rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + read_complete, (void *)sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + exit(1); + } +} + +static void +hello_world(void) +{ + struct ns_entry *ns_entry; + struct hello_world_sequence sequence; + int rc; + + ns_entry = g_namespaces; + while (ns_entry != NULL) { + /* + * Allocate an I/O qpair that we can use to submit read/write requests + * to namespaces on the controller. NVMe controllers typically support + * many qpairs per controller. Any I/O qpair allocated for a controller + * can submit I/O to any namespace on that controller. + * + * The SPDK NVMe driver provides no synchronization for qpair accesses - + * the application must ensure only a single thread submits I/O to a + * qpair, and that same thread must also check for completions on that + * qpair. This enables extremely efficient I/O processing by making all + * I/O operations completely lockless. + */ + ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, NULL, 0); + if (ns_entry->qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return; + } + + /* + * Use spdk_dma_zmalloc to allocate a 4KB zeroed buffer. This memory + * will be pinned, which is required for data buffers used for SPDK NVMe + * I/O operations. + */ + sequence.using_cmb_io = 1; + sequence.buf = spdk_nvme_ctrlr_alloc_cmb_io_buffer(ns_entry->ctrlr, 0x1000); + if (sequence.buf == NULL) { + sequence.using_cmb_io = 0; + sequence.buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + } + if (sequence.buf == NULL) { + printf("ERROR: write buffer allocation failed\n"); + return; + } + if (sequence.using_cmb_io) { + printf("INFO: using controller memory buffer for IO\n"); + } else { + printf("INFO: using host memory buffer for IO\n"); + } + sequence.is_completed = 0; + sequence.ns_entry = ns_entry; + + /* + * Print "Hello world!" to sequence.buf. We will write this data to LBA + * 0 on the namespace, and then later read it back into a separate buffer + * to demonstrate the full I/O path. + */ + snprintf(sequence.buf, 0x1000, "%s", "Hello world!\n"); + + /* + * Write the data buffer to LBA 0 of this namespace. "write_complete" and + * "&sequence" are specified as the completion callback function and + * argument respectively. write_complete() will be called with the + * value of &sequence as a parameter when the write I/O is completed. + * This allows users to potentially specify different completion + * callback routines for each I/O, as well as pass a unique handle + * as an argument so the application knows which I/O has completed. + * + * Note that the SPDK NVMe driver will only check for completions + * when the application calls spdk_nvme_qpair_process_completions(). + * It is the responsibility of the application to trigger the polling + * process. + */ + rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + write_complete, &sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + exit(1); + } + + /* + * Poll for completions. 0 here means process all available completions. + * In certain usage models, the caller may specify a positive integer + * instead of 0 to signify the maximum number of completions it should + * process. This function will never block - if there are no + * completions pending on the specified qpair, it will return immediately. + * + * When the write I/O completes, write_complete() will submit a new I/O + * to read LBA 0 into a separate buffer, specifying read_complete() as its + * completion routine. When the read I/O completes, read_complete() will + * print the buffer contents and set sequence.is_completed = 1. That will + * break this loop and then exit the program. + */ + while (!sequence.is_completed) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + /* + * Free the I/O qpair. This typically is done when an application exits. + * But SPDK does support freeing and then reallocating qpairs during + * operation. It is the responsibility of the caller to ensure all + * pending I/O are completed before trying to free the qpair. + */ + spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair); + ns_entry = ns_entry->next; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + int nsid, num_ns; + struct ctrlr_entry *entry; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + entry = malloc(sizeof(struct ctrlr_entry)); + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + printf("Attached to %s\n", trid->traddr); + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + /* + * Each controller has one or more namespaces. An NVMe namespace is basically + * equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how + * many namespaces exist on the controller. For Intel(R) P3X00 controllers, + * it will just be one namespace. + * + * Note that in NVMe, namespace IDs start at 1, not 0. + */ + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + printf("Using controller %s with %d namespaces.\n", entry->name, num_ns); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } +} + +static void +cleanup(void) +{ + struct ns_entry *ns_entry = g_namespaces; + struct ctrlr_entry *ctrlr_entry = g_controllers; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + free(ns_entry); + ns_entry = next; + } + + while (ctrlr_entry) { + struct ctrlr_entry *next = ctrlr_entry->next; + + spdk_nvme_detach(ctrlr_entry->ctrlr); + free(ctrlr_entry); + ctrlr_entry = next; + } +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "hello_world"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Initializing NVMe Controllers\n"); + + /* + * Start the SPDK NVMe enumeration process. probe_cb will be called + * for each NVMe controller found, giving our application a choice on + * whether to attach to each controller. attach_cb will then be + * called for each controller after the SPDK NVMe driver has completed + * initializing the controller we chose to attach. + */ + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + cleanup(); + return 1; + } + + if (g_controllers == NULL) { + fprintf(stderr, "no NVMe controllers found\n"); + cleanup(); + return 1; + } + + printf("Initialization complete.\n"); + hello_world(); + cleanup(); + return 0; +} diff --git a/src/spdk/examples/nvme/hotplug/.gitignore b/src/spdk/examples/nvme/hotplug/.gitignore new file mode 100644 index 00000000..e6ff5380 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/.gitignore @@ -0,0 +1 @@ +hotplug diff --git a/src/spdk/examples/nvme/hotplug/Makefile b/src/spdk/examples/nvme/hotplug/Makefile new file mode 100644 index 00000000..0dcdda9a --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = hotplug + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hotplug/hotplug.c b/src/spdk/examples/nvme/hotplug/hotplug.c new file mode 100644 index 00000000..66d93a71 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/hotplug.c @@ -0,0 +1,491 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/queue.h" + +struct dev_ctx { + TAILQ_ENTRY(dev_ctx) tailq; + bool is_new; + bool is_removed; + bool is_draining; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + uint32_t io_size_blocks; + uint64_t size_in_ios; + uint64_t io_completed; + uint64_t prev_io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + char name[1024]; +}; + +struct perf_task { + struct dev_ctx *dev; + void *buf; +}; + +static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs); + +static uint64_t g_tsc_rate; + +static uint32_t g_io_size_bytes = 4096; +static int g_queue_depth = 4; +static int g_time_in_sec; +static int g_expected_insert_times = -1; +static int g_expected_removal_times = -1; +static int g_insert_times; +static int g_removal_times; +static int g_shm_id = -1; + +static void +task_complete(struct perf_task *task); + +static void +register_dev(struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) { + perror("dev_ctx malloc"); + exit(1); + } + + snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + dev->ctrlr = ctrlr; + dev->is_new = true; + dev->is_removed = false; + dev->is_draining = false; + + dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1); + + if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) { + fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name); + goto skip; + } + + if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) { + fprintf(stderr, "Controller %s: Invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + dev->name, + spdk_nvme_ns_get_size(dev->ns), + spdk_nvme_ns_get_sector_size(dev->ns), + g_io_size_bytes); + goto skip; + } + + dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes; + dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns); + + dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + if (!dev->qpair) { + fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + goto skip; + } + g_insert_times++; + TAILQ_INSERT_TAIL(&g_devs, dev, tailq); + return; + +skip: + free(dev); +} + +static void +unregister_dev(struct dev_ctx *dev) +{ + fprintf(stderr, "unregister_dev: %s\n", dev->name); + + spdk_nvme_ctrlr_free_io_qpair(dev->qpair); + spdk_nvme_detach(dev->ctrlr); + + TAILQ_REMOVE(&g_devs, dev, tailq); + free(dev); +} + +static struct perf_task * +alloc_task(struct dev_ctx *dev) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + return NULL; + } + + task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL); + if (task->buf == NULL) { + free(task); + return NULL; + } + + task->dev = dev; + + return task; +} + +static void +free_task(struct perf_task *task) +{ + spdk_dma_free(task->buf); + free(task); +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void +submit_single_io(struct perf_task *task) +{ + struct dev_ctx *dev = task->dev; + uint64_t offset_in_ios; + int rc; + + offset_in_ios = dev->offset_in_ios++; + if (dev->offset_in_ios == dev->size_in_ios) { + dev->offset_in_ios = 0; + } + + rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf, + offset_in_ios * dev->io_size_blocks, + dev->io_size_blocks, io_complete, task, 0); + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + free_task(task); + } else { + dev->current_queue_depth++; + } +} + +static void +task_complete(struct perf_task *task) +{ + struct dev_ctx *dev; + + dev = task->dev; + dev->current_queue_depth--; + dev->io_completed++; + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!dev->is_draining && !dev->is_removed) { + submit_single_io(task); + } else { + free_task(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct perf_task *)ctx); +} + +static void +check_io(struct dev_ctx *dev) +{ + spdk_nvme_qpair_process_completions(dev->qpair, 0); +} + +static void +submit_io(struct dev_ctx *dev, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = alloc_task(dev); + if (task == NULL) { + fprintf(stderr, "task allocation failed\n"); + exit(1); + } + + submit_single_io(task); + } +} + +static void +drain_io(struct dev_ctx *dev) +{ + dev->is_draining = true; + while (dev->current_queue_depth > 0) { + check_io(dev); + } +} + +static void +print_stats(void) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n", + dev->name, + dev->io_completed, + dev->io_completed - dev->prev_io_completed); + dev->prev_io_completed = dev->io_completed; + } + + fprintf(stderr, "\n"); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attached to %s\n", trid->traddr); + + register_dev(ctrlr); +} + +static void +remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->ctrlr == ctrlr) { + /* + * Mark the device as removed, but don't detach yet. + * + * The I/O handling code will detach once it sees that + * is_removed is true and all outstanding I/O have been completed. + */ + dev->is_removed = true; + fprintf(stderr, "Controller removed: %s\n", dev->name); + return; + } + } + + /* + * If we get here, this remove_cb is for a controller that we are not tracking + * in g_devs (for example, because we skipped it during register_dev), + * so immediately detach it. + */ + spdk_nvme_detach(ctrlr); +} + +static void +io_loop(void) +{ + struct dev_ctx *dev, *dev_tmp; + uint64_t tsc_end; + uint64_t next_stats_tsc; + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + next_stats_tsc = spdk_get_ticks(); + + while (1) { + uint64_t now; + + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->is_new) { + /* Submit initial I/O for this controller. */ + submit_io(dev, g_queue_depth); + dev->is_new = false; + } + + check_io(dev); + } + + /* + * Check for hotplug events. + */ + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + break; + } + + /* + * Check for devices which were hot-removed and have finished + * processing outstanding I/Os. + * + * unregister_dev() may remove devs from the list, so use the + * removal-safe iterator. + */ + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + if (dev->is_removed && dev->current_queue_depth == 0) { + g_removal_times++; + unregister_dev(dev); + } + } + + now = spdk_get_ticks(); + if (now > tsc_end) { + break; + } + if (now > next_stats_tsc) { + print_stats(); + next_stats_tsc += g_tsc_rate; + } + + if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) { + break; + } + } + + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + drain_io(dev); + unregister_dev(dev); + } +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-i shm id (optional)]\n"); + printf("\t[-n expected hot insert times]\n"); + printf("\t[-r expected hot removal times]\n"); + printf("\t[-t time in seconds]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + /* default value */ + g_time_in_sec = 0; + + while ((op = getopt(argc, argv, "i:n:r:t:")) != -1) { + switch (op) { + case 'i': + g_shm_id = atoi(optarg); + break; + case 'n': + g_expected_insert_times = atoi(optarg); + break; + case 'r': + g_expected_removal_times = atoi(optarg); + break; + case 't': + g_time_in_sec = atoi(optarg); + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + return 0; +} + + +static int +register_controllers(void) +{ + fprintf(stderr, "Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + /* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */ + g_insert_times = 0; + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "hotplug"; + opts.core_mask = "0x1"; + if (g_shm_id > -1) { + opts.shm_id = g_shm_id; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + /* Detect the controllers that are plugged in at startup. */ + if (register_controllers() != 0) { + return 1; + } + + fprintf(stderr, "Initialization complete. Starting I/O...\n"); + io_loop(); + + if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) { + fprintf(stderr, "Expected inserts %d != actual inserts %d\n", + g_expected_insert_times, g_insert_times); + return 1; + } + + if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) { + fprintf(stderr, "Expected removals %d != actual removals %d\n", + g_expected_removal_times, g_removal_times); + return 1; + } + + return 0; +} diff --git a/src/spdk/examples/nvme/identify/.gitignore b/src/spdk/examples/nvme/identify/.gitignore new file mode 100644 index 00000000..5c5444c1 --- /dev/null +++ b/src/spdk/examples/nvme/identify/.gitignore @@ -0,0 +1 @@ +identify diff --git a/src/spdk/examples/nvme/identify/Makefile b/src/spdk/examples/nvme/identify/Makefile new file mode 100644 index 00000000..0aa5e52b --- /dev/null +++ b/src/spdk/examples/nvme/identify/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = identify + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/identify/identify.c b/src/spdk/examples/nvme/identify/identify.c new file mode 100644 index 00000000..3958483b --- /dev/null +++ b/src/spdk/examples/nvme/identify/identify.c @@ -0,0 +1,1723 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/nvme_ocssd.h" +#include "spdk/env.h" +#include "spdk/nvme_intel.h" +#include "spdk/nvmf_spec.h" +#include "spdk/pci_ids.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/uuid.h" + +#define MAX_DISCOVERY_LOG_ENTRIES ((uint64_t)1000) + +#define NUM_CHUNK_INFO_ENTRIES 8 + +static int outstanding_commands; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct feature features[256]; + +static struct spdk_nvme_error_information_entry error_page[256]; + +static struct spdk_nvme_health_information_page health_page; + +static struct spdk_nvme_firmware_page firmware_page; + +static struct spdk_nvme_cmds_and_effect_log_page cmd_effects_log_page; + +static struct spdk_nvme_intel_smart_information_page intel_smart_page; + +static struct spdk_nvme_intel_temperature_page intel_temperature_page; + +static struct spdk_nvme_intel_marketing_description_page intel_md_page; + +static struct spdk_nvmf_discovery_log_page *g_discovery_page; +static size_t g_discovery_page_size; +static uint64_t g_discovery_page_numrec; + +static struct spdk_ocssd_geometry_data geometry_data; + +static struct spdk_ocssd_chunk_information_entry g_ocssd_chunk_info_page[NUM_CHUNK_INFO_ENTRIES ]; + +static bool g_hex_dump = false; + +static int g_shm_id = -1; + +static int g_dpdk_mem = 64; + +static int g_master_core = 0; + +static char g_core_mask[16] = "0x1"; + +static struct spdk_nvme_transport_id g_trid; + +static int g_controllers_found = 0; + +static void +hex_dump(const void *data, size_t size) +{ + size_t offset = 0, i; + const uint8_t *bytes = data; + + while (size) { + printf("%08zX:", offset); + + for (i = 0; i < 16; i++) { + if (i == 8) { + printf("-"); + } else { + printf(" "); + } + + if (i < size) { + printf("%02X", bytes[offset + i]); + } else { + printf(" "); + } + } + + printf(" "); + + for (i = 0; i < 16; i++) { + if (i < size) { + if (bytes[offset + i] > 0x20 && bytes[offset + i] < 0x7F) { + printf("%c", bytes[offset + i]); + } else { + printf("."); + } + } + } + + printf("\n"); + + offset += 16; + if (size > 16) { + size -= 16; + } else { + break; + } + } +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + outstanding_commands--; +} + +static void +get_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get log page failed\n"); + } + outstanding_commands--; +} + +static void +get_ocssd_geometry_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get ocssd geometry failed\n"); + } + outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10 = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]); +} + +static void +get_features(struct spdk_nvme_ctrlr *ctrlr) +{ + size_t i; + + uint8_t features_to_get[] = { + SPDK_NVME_FEAT_ARBITRATION, + SPDK_NVME_FEAT_POWER_MANAGEMENT, + SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD, + SPDK_NVME_FEAT_ERROR_RECOVERY, + SPDK_NVME_FEAT_NUMBER_OF_QUEUES, + SPDK_OCSSD_FEAT_MEDIA_FEEDBACK, + }; + + /* Submit several GET FEATURES commands and wait for them to complete */ + outstanding_commands = 0; + for (i = 0; i < SPDK_COUNTOF(features_to_get); i++) { + if (!spdk_nvme_ctrlr_is_ocssd_supported(ctrlr) && + features_to_get[i] == SPDK_OCSSD_FEAT_MEDIA_FEEDBACK) { + continue; + } + if (get_feature(ctrlr, features_to_get[i]) == 0) { + outstanding_commands++; + } else { + printf("get_feature(0x%02X) failed to submit command\n", features_to_get[i]); + } + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_error_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ERROR, + SPDK_NVME_GLOBAL_NS_TAG, error_page, + sizeof(*error_page) * (cdata->elpe + 1), + 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_health_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, &health_page, sizeof(health_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_firmware_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_FIRMWARE_SLOT, + SPDK_NVME_GLOBAL_NS_TAG, &firmware_page, sizeof(firmware_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_cmd_effects_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, + SPDK_NVME_GLOBAL_NS_TAG, &cmd_effects_log_page, sizeof(cmd_effects_log_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_smart_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_SMART, SPDK_NVME_GLOBAL_NS_TAG, + &intel_smart_page, sizeof(intel_smart_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_temperature_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE, + SPDK_NVME_GLOBAL_NS_TAG, &intel_temperature_page, sizeof(intel_temperature_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static int +get_intel_md_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION, + SPDK_NVME_GLOBAL_NS_TAG, &intel_md_page, sizeof(intel_md_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static void +get_discovery_log_page_header_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvmf_discovery_log_page *new_discovery_page; + struct spdk_nvme_ctrlr *ctrlr = cb_arg; + uint16_t recfmt; + uint64_t remaining; + uint64_t offset; + + outstanding_commands--; + if (spdk_nvme_cpl_is_error(cpl)) { + /* Return without printing anything - this may not be a discovery controller */ + free(g_discovery_page); + g_discovery_page = NULL; + return; + } + + /* Got the first 4K of the discovery log page */ + recfmt = from_le16(&g_discovery_page->recfmt); + if (recfmt != 0) { + printf("Unrecognized discovery log record format %" PRIu16 "\n", recfmt); + return; + } + + g_discovery_page_numrec = from_le64(&g_discovery_page->numrec); + + /* Pick an arbitrary limit to avoid ridiculously large buffer size. */ + if (g_discovery_page_numrec > MAX_DISCOVERY_LOG_ENTRIES) { + printf("Discovery log has %" PRIu64 " entries - limiting to %" PRIu64 ".\n", + g_discovery_page_numrec, MAX_DISCOVERY_LOG_ENTRIES); + g_discovery_page_numrec = MAX_DISCOVERY_LOG_ENTRIES; + } + + /* + * Now that we now how many entries should be in the log page, we can allocate + * the full log page buffer. + */ + g_discovery_page_size += g_discovery_page_numrec * sizeof(struct + spdk_nvmf_discovery_log_page_entry); + new_discovery_page = realloc(g_discovery_page, g_discovery_page_size); + if (new_discovery_page == NULL) { + free(g_discovery_page); + printf("Discovery page allocation failed!\n"); + return; + } + + g_discovery_page = new_discovery_page; + + /* Retrieve the rest of the discovery log page */ + offset = offsetof(struct spdk_nvmf_discovery_log_page, entries); + remaining = g_discovery_page_size - offset; + while (remaining) { + uint32_t size; + + /* Retrieve up to 4 KB at a time */ + size = spdk_min(remaining, 4096); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, (char *)g_discovery_page + offset, size, offset, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + offset += size; + remaining -= size; + outstanding_commands++; + } +} + +static int +get_discovery_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + /* Allocate the initial discovery log page buffer - this will be resized later. */ + g_discovery_page_size = sizeof(*g_discovery_page); + g_discovery_page = calloc(1, g_discovery_page_size); + if (g_discovery_page == NULL) { + printf("Discovery log page allocation failed!\n"); + exit(1); + } + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, g_discovery_page, g_discovery_page_size, 0, + get_discovery_log_page_header_completion, ctrlr)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static void +get_log_pages(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + outstanding_commands = 0; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (get_error_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Error Log Page failed\n"); + } + + if (get_health_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (SMART/health) failed\n"); + } + + if (get_firmware_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Firmware Slot Information) failed\n"); + } + + if (cdata->lpa.celp) { + if (get_cmd_effects_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Commands Supported and Effects) failed\n"); + } + } + + if (cdata->vid == SPDK_PCI_VID_INTEL) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + if (get_intel_smart_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel SMART/health) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + if (get_intel_temperature_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel temperature) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + if (get_intel_md_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel Marketing Description) failed\n"); + } + } + + } + + if (get_discovery_log_page(ctrlr) == 0) { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_ocssd_chunk_info_log_page(struct spdk_nvme_ns *ns) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, + nsid, &g_ocssd_chunk_info_page, sizeof(g_ocssd_chunk_info_page), 0, + get_log_page_completion, NULL) == 0) { + outstanding_commands++; + } else { + printf("get_ocssd_chunk_info_log_page() failed\n"); + return -1; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + return 0; +} + +static void +get_ocssd_geometry(struct spdk_nvme_ns *ns, struct spdk_ocssd_geometry_data *geometry_data) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ocssd_ctrlr_cmd_geometry(ctrlr, nsid, geometry_data, + sizeof(*geometry_data), get_ocssd_geometry_completion, NULL)) { + printf("Get OpenChannel SSD geometry failed\n"); + exit(1); + } else { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +print_hex_be(const void *v, size_t size) +{ + const uint8_t *buf = v; + + while (size--) { + printf("%02X", *buf++); + } +} + +static void +print_uint128_hex(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + printf("0x%llX%016llX", hi, lo); + } else { + printf("0x%llX", lo); + } +} + +static void +print_uint128_dec(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + /* can't handle large (>64-bit) decimal values for now, so fall back to hex */ + print_uint128_hex(v); + } else { + printf("%llu", (unsigned long long)lo); + } +} + +/* The len should be <= 8. */ +static void +print_uint_var_dec(uint8_t *array, unsigned int len) +{ + uint64_t result = 0; + int i = len; + + while (i > 0) { + result += (uint64_t)array[i - 1] << (8 * (i - 1)); + i--; + } + printf("%lu", result); +} + +/* Print ASCII string as defined by the NVMe spec */ +static void +print_ascii_string(const void *buf, size_t size) +{ + const uint8_t *str = buf; + + /* Trim trailing spaces */ + while (size > 0 && str[size - 1] == ' ') { + size--; + } + + while (size--) { + if (*str >= 0x20 && *str <= 0x7E) { + printf("%c", *str); + } else { + printf("."); + } + str++; + } +} + +static void +print_ocssd_chunk_info(struct spdk_ocssd_chunk_information_entry *chk_info, int chk_num) +{ + int i; + char *cs_str, *ct_str; + + printf("OCSSD Chunk Info Glance\n"); + printf("======================\n"); + + for (i = 0; i < chk_num; i++) { + cs_str = chk_info[i].cs.free ? "Free" : + chk_info[i].cs.closed ? "Closed" : + chk_info[i].cs.open ? "Open" : + chk_info[i].cs.offline ? "Offline" : "Unknown"; + ct_str = chk_info[i].ct.seq_write ? "Sequential Write" : + chk_info[i].ct.rnd_write ? "Random Write" : "Unknown"; + + printf("------------\n"); + printf("Chunk index: %d\n", i); + printf("Chunk state: %s(0x%x)\n", cs_str, *(uint8_t *) & (chk_info[i].cs)); + printf("Chunk type (write mode): %s\n", ct_str); + printf("Chunk type (size_deviate): %s\n", chk_info[i].ct.size_deviate ? "Yes" : "No"); + printf("Wear-level Index: %d\n", chk_info[i].wli); + printf("Starting LBA: %ld\n", chk_info[i].slba); + printf("Number of blocks in chunk: %ld\n", chk_info[i].cnlb); + printf("Write Pointer: %ld\n", chk_info[i].wp); + } +} + +static void +print_ocssd_geometry(struct spdk_ocssd_geometry_data *geometry_data) +{ + printf("Namespace OCSSD Geometry\n"); + printf("=======================\n"); + + if (geometry_data->mjr < 2) { + printf("Open-Channel Spec version is less than 2.0\n"); + printf("OC version: maj:%d\n", geometry_data->mjr); + return; + } + + printf("OC version: maj:%d min:%d\n", geometry_data->mjr, geometry_data->mnr); + printf("LBA format:\n"); + printf(" Group bits: %d\n", geometry_data->lbaf.grp_len); + printf(" PU bits: %d\n", geometry_data->lbaf.pu_len); + printf(" Chunk bits: %d\n", geometry_data->lbaf.chk_len); + printf(" Logical block bits: %d\n", geometry_data->lbaf.lbk_len); + + printf("Media and Controller Capabilities:\n"); + printf(" Namespace supports Vector Chunk Copy: %s\n", + geometry_data->mccap.vec_chk_cpy ? "Supported" : "Not Supported"); + printf(" Namespace supports multiple resets a free chunk: %s\n", + geometry_data->mccap.multi_reset ? "Supported" : "Not Supported"); + + printf("Wear-level Index Delta Threshold: %d\n", geometry_data->wit); + printf("Groups (channels): %d\n", geometry_data->num_grp); + printf("PUs (LUNs) per group: %d\n", geometry_data->num_pu); + printf("Chunks per LUN: %d\n", geometry_data->num_chk); + printf("Logical blks per chunk: %d\n", geometry_data->clba); + printf("MIN write size: %d\n", geometry_data->ws_min); + printf("OPT write size: %d\n", geometry_data->ws_opt); + printf("Cache min write size: %d\n", geometry_data->mw_cunits); + printf("Max open chunks: %d\n", geometry_data->maxoc); + printf("Max open chunks per PU: %d\n", geometry_data->maxocpu); + printf("\n"); +} + +static void +print_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + const struct spdk_uuid *uuid; + uint32_t i; + uint32_t flags; + char uuid_str[SPDK_UUID_STRING_LEN]; + + nsdata = spdk_nvme_ns_get_data(ns); + flags = spdk_nvme_ns_get_flags(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + if (g_hex_dump) { + hex_dump(nsdata, sizeof(*nsdata)); + printf("\n"); + } + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Inactive namespace ID\n\n"); + return; + } + + printf("Deallocate: %s\n", + (flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Deallocated/Unwritten Error: %s\n", + nsdata->nsfeat.dealloc_or_unwritten_error ? "Supported" : "Not Supported"); + printf("Deallocated Read Value: %s\n", + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_00 ? "All 0x00" : + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_FF ? "All 0xFF" : + "Unknown"); + printf("Deallocate in Write Zeroes: %s\n", + nsdata->dlfeat.bits.write_zero_deallocate ? "Supported" : "Not Supported"); + printf("Deallocated Guard Field: %s\n", + nsdata->dlfeat.bits.guard_value ? "CRC for Read Value" : "0xFFFF"); + printf("Flush: %s\n", + (flags & SPDK_NVME_NS_FLUSH_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Reservation: %s\n", + (flags & SPDK_NVME_NS_RESERVATION_SUPPORTED) ? "Supported" : "Not Supported"); + if (flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + printf("End-to-End Data Protection: Supported\n"); + printf("Protection Type: Type%d\n", nsdata->dps.pit); + printf("Metadata Transfered as: %s\n", + nsdata->flbas.extended ? "Extended Data LBA" : "Separate Metadata Buffer"); + printf("Metadata Location: %s\n", + nsdata->dps.md_start ? "First 8 Bytes" : "Last 8 Bytes"); + } + printf("Namespace Sharing Capabilities: %s\n", + nsdata->nmic.can_share ? "Multiple Controllers" : "Private"); + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + if (nsdata->noiob) { + printf("Optimal I/O Boundary: %u blocks\n", nsdata->noiob); + } + if (!spdk_mem_all_zero(nsdata->nguid, sizeof(nsdata->nguid))) { + printf("NGUID: "); + print_hex_be(nsdata->nguid, sizeof(nsdata->nguid)); + printf("\n"); + } + if (!spdk_mem_all_zero(&nsdata->eui64, sizeof(nsdata->eui64))) { + printf("EUI64: "); + print_hex_be(&nsdata->eui64, sizeof(nsdata->eui64)); + printf("\n"); + } + uuid = spdk_nvme_ns_get_uuid(ns); + if (uuid) { + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), uuid); + printf("UUID: %s\n", uuid_str); + } + printf("Thin Provisioning: %s\n", + nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + printf("Per-NS Atomic Units: %s\n", + nsdata->nsfeat.ns_atomic_write_unit ? "Yes" : "No"); + if (nsdata->nawun) { + printf("Atomic Write Unit (Normal): %d\n", nsdata->nawun + 1); + } + if (nsdata->nawupf) { + printf("Atomic Write Unit (PFail): %d\n", nsdata->nawupf + 1); + } + + printf("NGUID/EUI64 Never Reused: %s\n", + nsdata->nsfeat.guid_never_reused ? "Yes" : "No"); + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("\n"); + + if (spdk_nvme_ctrlr_is_ocssd_supported(spdk_nvme_ns_get_ctrlr(ns))) { + get_ocssd_geometry(ns, &geometry_data); + print_ocssd_geometry(&geometry_data); + get_ocssd_chunk_info_log_page(ns); + print_ocssd_chunk_info(g_ocssd_chunk_info_page, NUM_CHUNK_INFO_ENTRIES); + } +} + +static const char * +admin_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_DELETE_IO_SQ: + return "Delete I/O Submission Queue"; + case SPDK_NVME_OPC_CREATE_IO_SQ: + return "Create I/O Submission Queue"; + case SPDK_NVME_OPC_GET_LOG_PAGE: + return "Get Log Page"; + case SPDK_NVME_OPC_DELETE_IO_CQ: + return "Delete I/O Completion Queue"; + case SPDK_NVME_OPC_CREATE_IO_CQ: + return "Create I/O Completion Queue"; + case SPDK_NVME_OPC_IDENTIFY: + return "Identify"; + case SPDK_NVME_OPC_ABORT: + return "Abort"; + case SPDK_NVME_OPC_SET_FEATURES: + return "Set Features"; + case SPDK_NVME_OPC_GET_FEATURES: + return "Get Features"; + case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST: + return "Asynchronous Event Request"; + case SPDK_NVME_OPC_NS_MANAGEMENT: + return "Namespace Management"; + case SPDK_NVME_OPC_FIRMWARE_COMMIT: + return "Firmware Commit"; + case SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD: + return "Firmware Image Download"; + case SPDK_NVME_OPC_DEVICE_SELF_TEST: + return "Device Self-test"; + case SPDK_NVME_OPC_NS_ATTACHMENT: + return "Namespace Attachment"; + case SPDK_NVME_OPC_KEEP_ALIVE: + return "Keep Alive"; + case SPDK_NVME_OPC_DIRECTIVE_SEND: + return "Directive Send"; + case SPDK_NVME_OPC_DIRECTIVE_RECEIVE: + return "Directive Receive"; + case SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT: + return "Virtualization Management"; + case SPDK_NVME_OPC_NVME_MI_SEND: + return "NVMe-MI Send"; + case SPDK_NVME_OPC_NVME_MI_RECEIVE: + return "NVMe-MI Receive"; + case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG: + return "Doorbell Buffer Config"; + case SPDK_NVME_OPC_FORMAT_NVM: + return "Format NVM"; + case SPDK_NVME_OPC_SECURITY_SEND: + return "Security Send"; + case SPDK_NVME_OPC_SECURITY_RECEIVE: + return "Security Receive"; + case SPDK_NVME_OPC_SANITIZE: + return "Sanitize"; + default: + if (opc >= 0xC0) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static const char * +io_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_FLUSH: + return "Flush"; + case SPDK_NVME_OPC_WRITE: + return "Write"; + case SPDK_NVME_OPC_READ: + return "Read"; + case SPDK_NVME_OPC_WRITE_UNCORRECTABLE: + return "Write Uncorrectable"; + case SPDK_NVME_OPC_COMPARE: + return "Compare"; + case SPDK_NVME_OPC_WRITE_ZEROES: + return "Write Zeroes"; + case SPDK_NVME_OPC_DATASET_MANAGEMENT: + return "Dataset Management"; + case SPDK_NVME_OPC_RESERVATION_REGISTER: + return "Reservation Register"; + case SPDK_NVME_OPC_RESERVATION_REPORT: + return "Reservation Report"; + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + return "Reservation Acquire"; + case SPDK_NVME_OPC_RESERVATION_RELEASE: + return "Reservation Release"; + default: + if (opc >= 0x80) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static void +print_controller(struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_transport_id *trid) +{ + const struct spdk_nvme_ctrlr_data *cdata; + union spdk_nvme_cap_register cap; + union spdk_nvme_vs_register vs; + uint8_t str[512]; + uint32_t i; + struct spdk_nvme_error_information_entry *error_entry; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + uint32_t nsid; + + cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); + vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr); + + get_features(ctrlr); + get_log_pages(ctrlr); + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr) != 0) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("NVMe Controller at %04x:%02x:%02x.%x [%04x:%04x]\n", + pci_addr.domain, pci_addr.bus, + pci_addr.dev, pci_addr.func, + pci_id.vendor_id, pci_id.device_id); + } + printf("=====================================================\n"); + + if (g_hex_dump) { + hex_dump(cdata, sizeof(*cdata)); + printf("\n"); + } + + printf("Controller Capabilities/Features\n"); + printf("================================\n"); + printf("Vendor ID: %04x\n", cdata->vid); + printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); + printf("Serial Number: "); + print_ascii_string(cdata->sn, sizeof(cdata->sn)); + printf("\n"); + printf("Model Number: "); + print_ascii_string(cdata->mn, sizeof(cdata->mn)); + printf("\n"); + printf("Firmware Version: "); + print_ascii_string(cdata->fr, sizeof(cdata->fr)); + printf("\n"); + printf("Recommended Arb Burst: %d\n", cdata->rab); + printf("IEEE OUI Identifier: %02x %02x %02x\n", + cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); + printf("Multi-path I/O\n"); + printf(" May have multiple subsystem ports: %s\n", cdata->cmic.multi_port ? "Yes" : "No"); + printf(" May be connected to multiple hosts: %s\n", cdata->cmic.multi_host ? "Yes" : "No"); + printf(" Associated with SR-IOV VF: %s\n", cdata->cmic.sr_iov ? "Yes" : "No"); + printf("Max Data Transfer Size: "); + if (cdata->mdts == 0) { + printf("Unlimited\n"); + } else { + printf("%" PRIu64 "\n", (uint64_t)1 << (12 + cap.bits.mpsmin + cdata->mdts)); + } + if (features[SPDK_NVME_FEAT_ERROR_RECOVERY].valid) { + unsigned tler = features[SPDK_NVME_FEAT_ERROR_RECOVERY].result & 0xFFFF; + printf("Error Recovery Timeout: "); + if (tler == 0) { + printf("Unlimited\n"); + } else { + printf("%u milliseconds\n", tler * 100); + } + } + printf("NVMe Specification Version (VS): %u.%u", vs.bits.mjr, vs.bits.mnr); + if (vs.bits.ter) { + printf(".%u", vs.bits.ter); + } + printf("\n"); + if (cdata->ver.raw != 0) { + printf("NVMe Specification Version (Identify): %u.%u", cdata->ver.bits.mjr, cdata->ver.bits.mnr); + if (cdata->ver.bits.ter) { + printf(".%u", cdata->ver.bits.ter); + } + printf("\n"); + } + + printf("Maximum Queue Entries: %u\n", cap.bits.mqes + 1); + printf("Contiguous Queues Required: %s\n", cap.bits.cqr ? "Yes" : "No"); + printf("Arbitration Mechanisms Supported\n"); + printf(" Weighted Round Robin: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_WRR ? "Supported" : "Not Supported"); + printf(" Vendor Specific: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_VS ? "Supported" : "Not Supported"); + printf("Reset Timeout: %" PRIu64 " ms\n", (uint64_t)500 * cap.bits.to); + printf("Doorbell Stride: %" PRIu64 " bytes\n", + (uint64_t)1 << (2 + cap.bits.dstrd)); + printf("NVM Subsystem Reset: %s\n", + cap.bits.nssrs ? "Supported" : "Not Supported"); + printf("Command Sets Supported\n"); + printf(" NVM Command Set: %s\n", + cap.bits.css & SPDK_NVME_CAP_CSS_NVM ? "Supported" : "Not Supported"); + printf("Boot Partition: %s\n", + cap.bits.bps ? "Supported" : "Not Supported"); + printf("Memory Page Size Minimum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmin)); + printf("Memory Page Size Maximum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmax)); + printf("Optional Asynchronous Events Supported\n"); + printf(" Namespace Attribute Notices: %s\n", + cdata->oaes.ns_attribute_notices ? "Supported" : "Not Supported"); + printf(" Firmware Activation Notices: %s\n", + cdata->oaes.fw_activation_notices ? "Supported" : "Not Supported"); + + printf("128-bit Host Identifier: %s\n", + cdata->ctratt.host_id_exhid_supported ? "Supported" : "Not Supported"); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Security Send/Receive: %s\n", + cdata->oacs.security ? "Supported" : "Not Supported"); + printf("Format NVM: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("Firmware Activate/Download: %s\n", + cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Namespace Management: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Device Self-Test: %s\n", + cdata->oacs.device_self_test ? "Supported" : "Not Supported"); + printf("Directives: %s\n", + cdata->oacs.directives ? "Supported" : "Not Supported"); + printf("NVMe-MI: %s\n", + cdata->oacs.nvme_mi ? "Supported" : "Not Supported"); + printf("Virtualization Management: %s\n", + cdata->oacs.virtualization_management ? "Supported" : "Not Supported"); + printf("Doorbell Buffer Config: %s\n", + cdata->oacs.doorbell_buffer_config ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl + 1); + printf("Async Event Request Limit: %d\n", cdata->aerl + 1); + printf("Number of Firmware Slots: "); + if (cdata->oacs.firmware != 0) { + printf("%d\n", cdata->frmw.num_slots); + } else { + printf("N/A\n"); + } + printf("Firmware Slot 1 Read-Only: "); + if (cdata->oacs.firmware != 0) { + printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + } else { + printf("N/A\n"); + } + if (cdata->fwug == 0x00) { + printf("Firmware Update Granularity: No Information Provided\n"); + } else if (cdata->fwug == 0xFF) { + printf("Firmware Update Granularity: No Restriction\n"); + } else { + printf("Firmware Update Granularity: %u KiB\n", + cdata->fwug * 4); + } + printf("Per-Namespace SMART Log: %s\n", + cdata->lpa.ns_smart ? "Yes" : "No"); + printf("Command Effects Log Page: %s\n", + cdata->lpa.celp ? "Supported" : "Not Supported"); + printf("Get Log Page Extended Data: %s\n", + cdata->lpa.edlp ? "Supported" : "Not Supported"); + printf("Telemetry Log Pages: %s\n", + cdata->lpa.telemetry ? "Supported" : "Not Supported"); + printf("Error Log Page Entries Supported: %d\n", cdata->elpe + 1); + if (cdata->kas == 0) { + printf("Keep Alive: Not Supported\n"); + } else { + printf("Keep Alive: Supported\n"); + printf("Keep Alive Granularity: %u ms\n", + cdata->kas * 100); + } + printf("\n"); + + printf("NVM Command Set Attributes\n"); + printf("==========================\n"); + printf("Submission Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->sqes.max); + printf(" Min: %d\n", 1 << cdata->sqes.min); + printf("Completion Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->cqes.max); + printf(" Min: %d\n", 1 << cdata->cqes.min); + printf("Number of Namespaces: %d\n", cdata->nn); + printf("Compare Command: %s\n", + cdata->oncs.compare ? "Supported" : "Not Supported"); + printf("Write Uncorrectable Command: %s\n", + cdata->oncs.write_unc ? "Supported" : "Not Supported"); + printf("Dataset Management Command: %s\n", + cdata->oncs.dsm ? "Supported" : "Not Supported"); + printf("Write Zeroes Command: %s\n", + cdata->oncs.write_zeroes ? "Supported" : "Not Supported"); + printf("Set Features Save Field: %s\n", + cdata->oncs.set_features_save ? "Supported" : "Not Supported"); + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + printf("Timestamp: %s\n", + cdata->oncs.timestamp ? "Supported" : "Not Supported"); + printf("Volatile Write Cache: %s\n", + cdata->vwc.present ? "Present" : "Not Present"); + printf("Atomic Write Unit (Normal): %d\n", cdata->awun + 1); + printf("Atomic Write Unit (PFail): %d\n", cdata->awupf + 1); + printf("Scatter-Gather List\n"); + printf(" SGL Command Set: %s\n", + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED ? "Supported" : + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED ? "Supported (Dword aligned)" : + "Not Supported"); + printf(" SGL Keyed: %s\n", + cdata->sgls.keyed_sgl ? "Supported" : "Not Supported"); + printf(" SGL Bit Bucket Descriptor: %s\n", + cdata->sgls.bit_bucket_descriptor ? "Supported" : "Not Supported"); + printf(" SGL Metadata Pointer: %s\n", + cdata->sgls.metadata_pointer ? "Supported" : "Not Supported"); + printf(" Oversized SGL: %s\n", + cdata->sgls.oversized_sgl ? "Supported" : "Not Supported"); + printf(" SGL Metadata Address: %s\n", + cdata->sgls.metadata_address ? "Supported" : "Not Supported"); + printf(" SGL Offset: %s\n", + cdata->sgls.sgl_offset ? "Supported" : "Not Supported"); + printf(" Transport SGL Data Block: %s\n", + cdata->sgls.transport_sgl ? "Supported" : "Not Supported"); + printf("\n"); + + printf("Firmware Slot Information\n"); + printf("=========================\n"); + if (g_hex_dump) { + hex_dump(&firmware_page, sizeof(firmware_page)); + printf("\n"); + } + printf("Active slot: %u\n", firmware_page.afi.active_slot); + if (firmware_page.afi.next_reset_slot) { + printf("Next controller reset slot: %u\n", firmware_page.afi.next_reset_slot); + } + for (i = 0; i < 7; i++) { + if (!spdk_mem_all_zero(firmware_page.revision[i], sizeof(firmware_page.revision[i]))) { + printf("Slot %u Firmware Revision: ", i + 1); + print_ascii_string(firmware_page.revision[i], sizeof(firmware_page.revision[i])); + printf("\n"); + } + } + printf("\n"); + + if (cdata->lpa.celp) { + printf("Commands Supported and Effects\n"); + printf("==============================\n"); + + if (g_hex_dump) { + hex_dump(&cmd_effects_log_page, sizeof(cmd_effects_log_page)); + printf("\n"); + } + + printf("Admin Commands\n"); + printf("--------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.admin_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.admin_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + admin_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + + printf("I/O Commands\n"); + printf("------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.io_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.io_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + io_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + printf("\n"); + } + + printf("Error Log\n"); + printf("=========\n"); + for (i = 0; i <= cdata->elpe; i++) { + error_entry = &error_page[i]; + if (error_entry->error_count == 0) { + continue; + } + if (i != 0) { + printf("-----------\n"); + } + + printf("Entry: %u\n", i); + printf("Error Count: 0x%"PRIx64"\n", error_entry->error_count); + printf("Submission Queue Id: 0x%x\n", error_entry->sqid); + printf("Command Id: 0x%x\n", error_entry->cid); + printf("Phase Bit: %x\n", error_entry->status.p); + printf("Status Code: 0x%x\n", error_entry->status.sc); + printf("Status Code Type: 0x%x\n", error_entry->status.sct); + printf("Do Not Retry: %x\n", error_entry->status.dnr); + printf("Error Location: 0x%x\n", error_entry->error_location); + printf("LBA: 0x%"PRIx64"\n", error_entry->lba); + printf("Namespace: 0x%x\n", error_entry->nsid); + printf("Vendor Log Page: 0x%x\n", error_entry->vendor_specific); + + } + printf("\n"); + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; + unsigned ab, lpw, mpw, hpw; + + ab = arb & 0x7; + lpw = ((arb >> 8) & 0xFF) + 1; + mpw = ((arb >> 16) & 0xFF) + 1; + hpw = ((arb >> 24) & 0xFF) + 1; + + printf("Arbitration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (ab == 0x7) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << ab); + } + printf("Low Priority Weight: %u\n", lpw); + printf("Medium Priority Weight: %u\n", mpw); + printf("High Priority Weight: %u\n", hpw); + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_POWER_MANAGEMENT].valid) { + unsigned ps = features[SPDK_NVME_FEAT_POWER_MANAGEMENT].result & 0x1F; + printf("Power Management\n"); + printf("================\n"); + printf("Number of Power States: %u\n", cdata->npss + 1); + printf("Current Power State: Power State #%u\n", ps); + for (i = 0; i <= cdata->npss; i++) { + const struct spdk_nvme_power_state *psd = &cdata->psd[i]; + printf("Power State #%u: ", i); + if (psd->mps) { + /* MP scale is 0.0001 W */ + printf("Max Power: %u.%04u W\n", + psd->mp / 10000, + psd->mp % 10000); + } else { + /* MP scale is 0.01 W */ + printf("Max Power: %3u.%02u W\n", + psd->mp / 100, + psd->mp % 100); + } + /* TODO: print other power state descriptor fields */ + } + printf("Non-Operational Permissive Mode: %s\n", + cdata->ctratt.non_operational_power_state_permissive_mode ? "Supported" : "Not Supported"); + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].valid) { + printf("Health Information\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(&health_page, sizeof(health_page)); + printf("\n"); + } + + printf("Critical Warnings:\n"); + printf(" Available Spare Space: %s\n", + health_page.critical_warning.bits.available_spare ? "WARNING" : "OK"); + printf(" Temperature: %s\n", + health_page.critical_warning.bits.temperature ? "WARNING" : "OK"); + printf(" Device Reliability: %s\n", + health_page.critical_warning.bits.device_reliability ? "WARNING" : "OK"); + printf(" Read Only: %s\n", + health_page.critical_warning.bits.read_only ? "Yes" : "No"); + printf(" Volatile Memory Backup: %s\n", + health_page.critical_warning.bits.volatile_memory_backup ? "WARNING" : "OK"); + printf("Current Temperature: %u Kelvin (%d Celsius)\n", + health_page.temperature, + (int)health_page.temperature - 273); + printf("Temperature Threshold: %u Kelvin (%d Celsius)\n", + features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result, + (int)features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result - 273); + printf("Available Spare: %u%%\n", health_page.available_spare); + printf("Available Spare Threshold: %u%%\n", health_page.available_spare_threshold); + printf("Life Percentage Used: %u%%\n", health_page.percentage_used); + printf("Data Units Read: "); + print_uint128_dec(health_page.data_units_read); + printf("\n"); + printf("Data Units Written: "); + print_uint128_dec(health_page.data_units_written); + printf("\n"); + printf("Host Read Commands: "); + print_uint128_dec(health_page.host_read_commands); + printf("\n"); + printf("Host Write Commands: "); + print_uint128_dec(health_page.host_write_commands); + printf("\n"); + printf("Controller Busy Time: "); + print_uint128_dec(health_page.controller_busy_time); + printf(" minutes\n"); + printf("Power Cycles: "); + print_uint128_dec(health_page.power_cycles); + printf("\n"); + printf("Power On Hours: "); + print_uint128_dec(health_page.power_on_hours); + printf(" hours\n"); + printf("Unsafe Shutdowns: "); + print_uint128_dec(health_page.unsafe_shutdowns); + printf("\n"); + printf("Unrecoverable Media Errors: "); + print_uint128_dec(health_page.media_errors); + printf("\n"); + printf("Lifetime Error Log Entries: "); + print_uint128_dec(health_page.num_error_info_log_entries); + printf("\n"); + printf("Warning Temperature Time: %u minutes\n", health_page.warning_temp_time); + printf("Critical Temperature Time: %u minutes\n", health_page.critical_temp_time); + for (i = 0; i < 8; i++) { + if (health_page.temp_sensor[i] != 0) { + printf("Temperature Sensor %d: %u Kelvin (%d Celsius)\n", + i + 1, health_page.temp_sensor[i], + (int)health_page.temp_sensor[i] - 273); + } + } + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].valid) { + uint32_t result = features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].result; + + printf("Number of Queues\n"); + printf("================\n"); + printf("Number of I/O Submission Queues: %u\n", (result & 0xFFFF) + 1); + printf("Number of I/O Completion Queues: %u\n", (result & 0xFFFF0000 >> 16) + 1); + printf("\n"); + } + + if (features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].valid) { + uint32_t result = features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].result; + + printf("OCSSD Media Feedback\n"); + printf("=======================\n"); + printf("High ECC status: %u\n", (result & 0x1)); + printf("Vector High ECC status: %u\n", (result & 0x2 >> 1)); + printf("\n"); + } + + if (cdata->hctma.bits.supported) { + printf("Host Controlled Thermal Management\n"); + printf("==================================\n"); + printf("Minimum Thermal Management Temperature: "); + if (cdata->mntmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mntmt, (int)cdata->mntmt - 273); + } else { + printf("Not Reported\n"); + } + printf("Maximum Thermal Managment Temperature: "); + if (cdata->mxtmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mxtmt, (int)cdata->mxtmt - 273); + } else { + printf("Not Reported\n"); + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + size_t i = 0; + + printf("Intel Health Information\n"); + printf("==================\n"); + for (i = 0; + i < SPDK_COUNTOF(intel_smart_page.attributes); i++) { + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PROGRAM_FAIL_COUNT) { + printf("Program Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_ERASE_FAIL_COUNT) { + printf("Erase Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_WEAR_LEVELING_COUNT) { + printf("Wear Leveling Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Min: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[0], 2); + printf("\n"); + printf(" Max: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[2], 2); + printf("\n"); + printf(" Avg: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[4], 2); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_E2E_ERROR_COUNT) { + printf("End to End Error Detection Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_CRC_ERROR_COUNT) { + printf("CRC Error Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_MEDIA_WEAR) { + printf("Timed Workload, Media Wear:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_READ_PERCENTAGE) { + printf("Timed Workload, Host Read/Write Ratio:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("%%"); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_TIMER) { + printf("Timed Workload, Timer:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS) { + printf("Thermal Throttle Status:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Percentage: %d%%\n", intel_smart_page.attributes[i].raw_value[0]); + printf(" Throttling Event Count: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[1], 4); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER) { + printf("Retry Buffer Overflow Counter:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT) { + printf("PLL Lock Loss Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_NAND_BYTES_WRITTEN) { + printf("NAND Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_BYTES_WRITTEN) { + printf("Host Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + printf("Intel Temperature Information\n"); + printf("==================\n"); + printf("Current Temperature: %lu\n", intel_temperature_page.current_temperature); + printf("Overtemp shutdown Flag for last critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_last); + printf("Overtemp shutdown Flag for life critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_life); + printf("Highest temperature: %lu\n", intel_temperature_page.highest_temperature); + printf("Lowest temperature: %lu\n", intel_temperature_page.lowest_temperature); + printf("Specified Maximum Operating Temperature: %lu\n", + intel_temperature_page.specified_max_op_temperature); + printf("Specified Minimum Operating Temperature: %lu\n", + intel_temperature_page.specified_min_op_temperature); + printf("Estimated offset: %ld\n", intel_temperature_page.estimated_offset); + printf("\n"); + printf("\n"); + + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + printf("Intel Marketing Information\n"); + printf("==================\n"); + snprintf(str, sizeof(intel_md_page.marketing_product), "%s", intel_md_page.marketing_product); + printf("Marketing Product Information: %s\n", str); + printf("\n"); + printf("\n"); + } + + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + print_namespace(spdk_nvme_ctrlr_get_ns(ctrlr, nsid)); + } + + if (g_discovery_page) { + printf("Discovery Log Page\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(g_discovery_page, g_discovery_page_size); + printf("\n"); + } + + printf("Generation Counter: %" PRIu64 "\n", + from_le64(&g_discovery_page->genctr)); + printf("Number of Records: %" PRIu64 "\n", + from_le64(&g_discovery_page->numrec)); + printf("Record Format: %" PRIu16 "\n", + from_le16(&g_discovery_page->recfmt)); + printf("\n"); + + for (i = 0; i < g_discovery_page_numrec; i++) { + struct spdk_nvmf_discovery_log_page_entry *entry = &g_discovery_page->entries[i]; + + printf("Discovery Log Entry %u\n", i); + printf("----------------------\n"); + printf("Transport Type: %u (%s)\n", + entry->trtype, spdk_nvme_transport_id_trtype_str(entry->trtype)); + printf("Address Family: %u (%s)\n", + entry->adrfam, spdk_nvme_transport_id_adrfam_str(entry->adrfam)); + printf("Subsystem Type: %u (%s)\n", + entry->subtype, + entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY ? "Discovery Service" : + entry->subtype == SPDK_NVMF_SUBTYPE_NVME ? "NVM Subsystem" : + "Unknown"); + printf("Transport Requirements:\n"); + printf(" Secure Channel: %s\n", + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED ? "Not Specified" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED ? "Required" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED ? "Not Required" : + "Reserved"); + printf("Port ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->portid), from_le16(&entry->portid)); + printf("Controller ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->cntlid), from_le16(&entry->cntlid)); + printf("Admin Max SQ Size: %" PRIu16 "\n", + from_le16(&entry->asqsz)); + snprintf(str, sizeof(entry->trsvcid) + 1, "%s", entry->trsvcid); + printf("Transport Service Identifier: %s\n", str); + snprintf(str, sizeof(entry->subnqn) + 1, "%s", entry->subnqn); + printf("NVM Subsystem Qualified Name: %s\n", str); + snprintf(str, sizeof(entry->traddr) + 1, "%s", entry->traddr); + printf("Transport Address: %s\n", str); + + if (entry->trtype == SPDK_NVMF_TRTYPE_RDMA) { + printf("Transport Specific Address Subtype - RDMA\n"); + printf(" RDMA QP Service Type: %u (%s)\n", + entry->tsas.rdma.rdma_qptype, + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED ? "Reliable Connected" : + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM ? "Reliable Datagram" : + "Unknown"); + printf(" RDMA Provider Type: %u (%s)\n", + entry->tsas.rdma.rdma_prtype, + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_NONE ? "No provider specified" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IB ? "InfiniBand" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE ? "InfiniBand RoCE" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE2 ? "InfiniBand RoCE v2" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IWARP ? "iWARP" : + "Unknown"); + printf(" RDMA CM Service: %u (%s)\n", + entry->tsas.rdma.rdma_cms, + entry->tsas.rdma.rdma_cms == SPDK_NVMF_RDMA_CMS_RDMA_CM ? "RDMA_CM" : + "Unknown"); + if (entry->adrfam == SPDK_NVMF_ADRFAM_IB) { + printf(" RDMA Partition Key: %" PRIu32 "\n", + from_le32(&entry->tsas.rdma.rdma_pkey)); + } + } + } + free(g_discovery_page); + g_discovery_page = NULL; + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -r trid remote NVMe over Fabrics target address\n"); + printf(" Format: 'key:value [key:value] ...'\n"); + printf(" Keys:\n"); + printf(" trtype Transport type (e.g. RDMA)\n"); + printf(" adrfam Address family (e.g. IPv4, IPv6)\n"); + printf(" traddr Transport address (e.g. 192.168.100.8)\n"); + printf(" trsvcid Transport service identifier (e.g. 4420)\n"); + printf(" subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf(" Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n"); + + spdk_tracelog_usage(stdout, "-L"); + + printf(" -i shared memory group ID\n"); + printf(" -p core number in decimal to run this application which started from 0\n"); + printf(" -d DPDK huge memory size in MB\n"); + printf(" -x print hex dump of raw data\n"); + printf(" -v verbose (enable warnings)\n"); + printf(" -H show this usage\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op, rc; + + g_trid.trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + while ((op = getopt(argc, argv, "d:i:p:r:xHL:")) != -1) { + switch (op) { + case 'd': + g_dpdk_mem = atoi(optarg); + break; + case 'i': + g_shm_id = atoi(optarg); + break; + case 'p': + g_master_core = atoi(optarg); + if (g_master_core < 0) { + fprintf(stderr, "Invalid core number\n"); + return 1; + } + snprintf(g_core_mask, sizeof(g_core_mask), "0x%llx", 1ULL << g_master_core); + break; + case 'r': + if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { + fprintf(stderr, "Error parsing transport address\n"); + return 1; + } + break; + case 'x': + g_hex_dump = true; + break; + case 'L': + rc = spdk_log_set_trace_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -L flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + + case 'H': + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + g_controllers_found++; + print_controller(ctrlr, trid); + spdk_nvme_detach(ctrlr); +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "identify"; + opts.shm_id = g_shm_id; + opts.mem_size = g_dpdk_mem; + opts.mem_channel = 1; + opts.master_core = g_master_core; + opts.core_mask = g_core_mask; + if (g_trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { + opts.no_pci = true; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + /* A specific trid is required. */ + if (strlen(g_trid.traddr) != 0) { + ctrlr = spdk_nvme_connect(&g_trid, NULL, 0); + if (!ctrlr) { + fprintf(stderr, "spdk_nvme_connect() failed\n"); + return 1; + } + + g_controllers_found++; + print_controller(ctrlr, &g_trid); + spdk_nvme_detach(ctrlr); + } else if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_controllers_found == 0) { + fprintf(stderr, "No NVMe controllers found.\n"); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/nvme_manage/.gitignore b/src/spdk/examples/nvme/nvme_manage/.gitignore new file mode 100644 index 00000000..cdc78a1a --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/.gitignore @@ -0,0 +1 @@ +nvme_manage diff --git a/src/spdk/examples/nvme/nvme_manage/Makefile b/src/spdk/examples/nvme/nvme_manage/Makefile new file mode 100644 index 00000000..4f49872e --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = nvme_manage + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/nvme_manage/nvme_manage.c b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c new file mode 100644 index 00000000..360cbaac --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c @@ -0,0 +1,970 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/util.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns_data *common_ns_data; + int outstanding_admin_cmds; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; +static int g_shm_id = -1; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +enum controller_display_model { + CONTROLLER_DISPLAY_ALL = 0x0, + CONTROLLER_DISPLAY_SIMPLISTIC = 0x1, +}; + +static int +cmp_devs(const void *ap, const void *bp) +{ + const struct dev *a = ap, *b = bp; + + return spdk_pci_addr_compare(&a->pci_addr, &b->pci_addr); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +identify_common_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + if (cpl->status.sc != SPDK_NVME_SC_SUCCESS) { + /* Identify Namespace for NSID = FFFFFFFFh is optional, so failure is not fatal. */ + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + dev->outstanding_admin_cmds--; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + struct spdk_nvme_cmd cmd; + + /* add to dev list */ + dev = &devs[num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; + + /* Retrieve controller data */ + dev->cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + dev->common_ns_data = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (dev->common_ns_data == NULL) { + fprintf(stderr, "common_ns_data allocation failure\n"); + return; + } + + /* Identify Namespace with NSID set to FFFFFFFFh to get common namespace capabilities. */ + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10 = 0; /* CNS = 0 (Identify Namespace) */ + cmd.nsid = SPDK_NVME_GLOBAL_NS_TAG; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, dev->common_ns_data, + sizeof(struct spdk_nvme_ns_data), identify_common_ns_cb, dev) != 0) { + dev->outstanding_admin_cmds--; + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void usage(void) +{ + printf("NVMe Management Options"); + printf("\n"); + printf("\t[1: list controllers]\n"); + printf("\t[2: create namespace]\n"); + printf("\t[3: delete namespace]\n"); + printf("\t[4: attach namespace to controller]\n"); + printf("\t[5: detach namespace from controller]\n"); + printf("\t[6: format namespace or controller]\n"); + printf("\t[7: firmware update]\n"); + printf("\t[8: quit]\n"); +} + +static void +display_namespace_dpc(const struct spdk_nvme_ns_data *nsdata) +{ + if (nsdata->dpc.pit1 || nsdata->dpc.pit2 || nsdata->dpc.pit3) { + if (nsdata->dpc.pit1) { + printf("PIT1 "); + } + + if (nsdata->dpc.pit2) { + printf("PIT2 "); + } + + if (nsdata->dpc.pit3) { + printf("PIT3 "); + } + } else { + printf("Not Supported\n"); + return; + } + + if (nsdata->dpc.md_start && nsdata->dpc.md_end) { + printf("Location: Head or Tail\n"); + } else if (nsdata->dpc.md_start) { + printf("Location: Head\n"); + } else if (nsdata->dpc.md_end) { + printf("Location: Tail\n"); + } else { + printf("Not Supported\n"); + } +} + +static void +display_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + uint32_t i; + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + printf("Format Progress Indicator: %s\n", + nsdata->fpi.fpi_supported ? "Supported" : "Not Supported"); + if (nsdata->fpi.fpi_supported && nsdata->fpi.percentage_remaining) { + printf("Formatted Percentage: %d%%\n", 100 - nsdata->fpi.percentage_remaining); + } + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("Data Protection Capabilities:"); + display_namespace_dpc(nsdata); + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE == nsdata->dps.pit) { + printf("Data Protection Setting: N/A\n"); + } else { + printf("Data Protection Setting: PIT%d Location: %s\n", + nsdata->dps.pit, nsdata->dps.md_start ? "Head" : "Tail"); + } + printf("Multipath IO and Sharing: %s\n", + nsdata->nmic.can_share ? "Supported" : "Not Supported"); + printf("\n"); +} + +static void +display_controller(struct dev *dev, int model) +{ + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata; + uint8_t str[128]; + uint32_t i; + + cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + if (model == CONTROLLER_DISPLAY_SIMPLISTIC) { + printf("%04x:%02x:%02x.%02x ", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("%-40.40s %-20.20s ", + cdata->mn, cdata->sn); + printf("%5d ", cdata->cntlid); + printf("\n"); + return; + } + + printf("=====================================================\n"); + printf("NVMe Controller: %04x:%02x:%02x.%02x\n", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("============================\n"); + printf("Controller Capabilities/Features\n"); + printf("Controller ID: %d\n", cdata->cntlid); + snprintf(str, sizeof(cdata->sn) + 1, "%s", cdata->sn); + printf("Serial Number: %s\n", str); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Namespace Manage And Attach: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Namespace Format: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("\n"); + printf("NVM Command Set Attributes\n"); + printf("============================\n"); + if (cdata->fna.format_all_ns) { + printf("Namespace format operation applies to all namespaces\n"); + } else { + printf("Namespace format operation applies to per namespace\n"); + } + printf("\n"); + printf("Namespace Attributes\n"); + printf("============================\n"); + for (i = 1; i <= spdk_nvme_ctrlr_get_num_ns(dev->ctrlr); i++) { + ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, i); + if (ns == NULL) { + continue; + } + display_namespace(ns); + } +} + +static void +display_controller_list(void) +{ + struct dev *iter; + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_ALL); + } +} + +static char * +get_line(char *buf, int buf_size, FILE *f) +{ + char *ret; + size_t len; + + ret = fgets(buf, buf_size, f); + if (ret == NULL) { + return NULL; + } + + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') { + buf[len - 1] = '\0'; + } + return buf; +} + +static struct dev * +get_controller(void) +{ + struct spdk_pci_addr pci_addr; + char address[64]; + char *p; + int ch; + struct dev *iter; + + memset(address, 0, sizeof(address)); + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_SIMPLISTIC); + } + + printf("Please Input PCI Address(domain:bus:dev.func):\n"); + + while ((ch = getchar()) != '\n' && ch != EOF); + p = get_line(address, 64, stdin); + if (p == NULL) { + return NULL; + } + + while (isspace(*p)) { + p++; + } + + if (spdk_pci_addr_parse(&pci_addr, p) < 0) { + return NULL; + } + + foreach_dev(iter) { + if (spdk_pci_addr_compare(&pci_addr, &iter->pci_addr) == 0) { + return iter; + } + } + return NULL; +} + +static int +get_lba_format(const struct spdk_nvme_ns_data *ns_data) +{ + int lbaf, i; + + printf("\nSupported LBA formats:\n"); + for (i = 0; i <= ns_data->nlbaf; i++) { + printf("%2d: %d data bytes", i, 1 << ns_data->lbaf[i].lbads); + if (ns_data->lbaf[i].ms) { + printf(" + %d metadata bytes", ns_data->lbaf[i].ms); + } + printf("\n"); + } + + printf("Please input LBA format index (0 - %d):\n", ns_data->nlbaf); + if (scanf("%d", &lbaf) != 1 || lbaf > ns_data->nlbaf) { + return -1; + } + + return lbaf; +} + +static void +identify_allocated_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + dev->outstanding_admin_cmds--; +} + +static uint32_t +get_allocated_nsid(struct dev *dev) +{ + uint32_t nsid; + size_t i; + struct spdk_nvme_ns_list *ns_list; + struct spdk_nvme_cmd cmd = {0}; + + ns_list = spdk_dma_zmalloc(sizeof(*ns_list), 4096, NULL); + if (ns_list == NULL) { + printf("Allocation error\n"); + return 0; + } + + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10 = SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST; + cmd.nsid = 0; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, ns_list, sizeof(*ns_list), + identify_allocated_ns_cb, dev)) { + printf("Identify command failed\n"); + spdk_dma_free(ns_list); + return 0; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + + printf("Allocated Namespace IDs:\n"); + for (i = 0; i < SPDK_COUNTOF(ns_list->ns_list); i++) { + if (ns_list->ns_list[i] == 0) { + break; + } + printf("%u\n", ns_list->ns_list[i]); + } + + spdk_dma_free(ns_list); + + printf("Please Input Namespace ID:\n"); + if (!scanf("%u", &nsid)) { + printf("Invalid Namespace ID\n"); + nsid = 0; + } + + return nsid; +} + +static void +ns_attach(struct dev *device, int attachment_op, int ctrlr_id, int ns_id) +{ + int ret = 0; + struct spdk_nvme_ctrlr_list *ctrlr_list; + + ctrlr_list = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_list), + 4096, NULL); + if (ctrlr_list == NULL) { + printf("Allocation error (controller list)\n"); + exit(1); + } + + ctrlr_list->ctrlr_count = 1; + ctrlr_list->ctrlr_list[0] = ctrlr_id; + + if (attachment_op == SPDK_NVME_NS_CTRLR_ATTACH) { + ret = spdk_nvme_ctrlr_attach_ns(device->ctrlr, ns_id, ctrlr_list); + } else if (attachment_op == SPDK_NVME_NS_CTRLR_DETACH) { + ret = spdk_nvme_ctrlr_detach_ns(device->ctrlr, ns_id, ctrlr_list); + } + + if (ret) { + fprintf(stdout, "ns attach: Failed\n"); + } + + spdk_dma_free(ctrlr_list); +} + +static void +ns_manage_add(struct dev *device, uint64_t ns_size, uint64_t ns_capacity, int ns_lbasize, + uint8_t ns_dps_type, uint8_t ns_dps_location, uint8_t ns_nmic) +{ + uint32_t nsid; + struct spdk_nvme_ns_data *ndata; + + ndata = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (ndata == NULL) { + printf("Allocation error (namespace data)\n"); + exit(1); + } + + ndata->nsze = ns_size; + ndata->ncap = ns_capacity; + ndata->flbas.format = ns_lbasize; + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + ndata->dps.pit = ns_dps_type; + ndata->dps.md_start = ns_dps_location; + } + ndata->nmic.can_share = ns_nmic; + nsid = spdk_nvme_ctrlr_create_ns(device->ctrlr, ndata); + if (nsid == 0) { + fprintf(stdout, "ns manage: Failed\n"); + } else { + printf("Created namespace ID %u\n", nsid); + } + + spdk_dma_free(ndata); +} + +static void +ns_manage_delete(struct dev *device, int ns_id) +{ + int ret = 0; + + ret = spdk_nvme_ctrlr_delete_ns(device->ctrlr, ns_id); + if (ret) { + fprintf(stdout, "ns manage: Failed\n"); + return; + } +} + +static void +nvme_manage_format(struct dev *device, int ns_id, int ses, int pi, int pil, int ms, int lbaf) +{ + int ret = 0; + struct spdk_nvme_format format = {}; + + format.lbaf = lbaf; + format.ms = ms; + format.pi = pi; + format.pil = pil; + format.ses = ses; + ret = spdk_nvme_ctrlr_format(device->ctrlr, ns_id, &format); + if (ret) { + fprintf(stdout, "nvme format: Failed\n"); + return; + } +} + +static void +attach_and_detach_ns(int attachment_op) +{ + uint32_t nsid; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + nsid = get_allocated_nsid(ctrlr); + if (nsid == 0) { + printf("Invalid Namespace ID\n"); + return; + } + + ns_attach(ctrlr, attachment_op, ctrlr->cdata->cntlid, nsid); +} + +static void +add_ns(void) +{ + uint64_t ns_size = 0; + uint64_t ns_capacity = 0; + int ns_lbasize; + int ns_dps_type = 0; + int ns_dps_location = 0; + int ns_nmic = 0; + struct dev *ctrlr = NULL; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + if (!ctrlr->common_ns_data) { + printf("Controller did not return common namespace capabilities\n"); + return; + } + + ns_lbasize = get_lba_format(ctrlr->common_ns_data); + if (ns_lbasize < 0) { + printf("Invalid LBA format number\n"); + return; + } + + printf("Please Input Namespace Size (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_size)) { + printf("Invalid Namespace Size\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Namespace Capacity (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_capacity)) { + printf("Invalid Namespace Capacity\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Data Protection Type (0 - 3):\n"); + if (!scanf("%d", &ns_dps_type)) { + printf("Invalid Data Protection Type\n"); + while (getchar() != '\n'); + return; + } + + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + printf("Please Input Data Protection Location (1: Head; 0: Tail):\n"); + if (!scanf("%d", &ns_dps_location)) { + printf("Invalid Data Protection Location\n"); + while (getchar() != '\n'); + return; + } + } + + printf("Please Input Multi-path IO and Sharing Capabilities (1: Share; 0: Private):\n"); + if (!scanf("%d", &ns_nmic)) { + printf("Invalid Multi-path IO and Sharing Capabilities\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_add(ctrlr, ns_size, ns_capacity, ns_lbasize, + ns_dps_type, ns_dps_location, ns_nmic); +} + +static void +delete_ns(void) +{ + int ns_id; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + printf("Please Input Namespace ID:\n"); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_delete(ctrlr, ns_id); +} + +static void +format_nvm(void) +{ + int ns_id; + int ses; + int pil; + int pi; + int ms; + int lbaf; + char option; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ns_data *nsdata; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.format) { + printf("Controller does not support Format NVM command\n"); + return; + } + + if (cdata->fna.format_all_ns) { + ns_id = SPDK_NVME_GLOBAL_NS_TAG; + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, 1); + } else { + printf("Please Input Namespace ID (1 - %d):\n", cdata->nn); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, ns_id); + } + + if (ns == NULL) { + printf("Namespace ID %d not found\n", ns_id); + while (getchar() != '\n'); + return; + } + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Please Input Secure Erase Setting:\n"); + printf(" 0: No secure erase operation requested\n"); + printf(" 1: User data erase\n"); + if (cdata->fna.crypto_erase_supported) { + printf(" 2: Cryptographic erase\n"); + } + if (!scanf("%d", &ses)) { + printf("Invalid Secure Erase Setting\n"); + while (getchar() != '\n'); + return; + } + + lbaf = get_lba_format(nsdata); + if (lbaf < 0) { + printf("Invalid LBA format number\n"); + return; + } + + if (nsdata->lbaf[lbaf].ms) { + printf("Please Input Protection Information:\n"); + printf(" 0: Protection information is not enabled\n"); + printf(" 1: Protection information is enabled, Type 1\n"); + printf(" 2: Protection information is enabled, Type 2\n"); + printf(" 3: Protection information is enabled, Type 3\n"); + if (!scanf("%d", &pi)) { + printf("Invalid protection information\n"); + while (getchar() != '\n'); + return; + } + + if (pi) { + printf("Please Input Protection Information Location:\n"); + printf(" 0: Protection information transferred as the last eight bytes of metadata\n"); + printf(" 1: Protection information transferred as the first eight bytes of metadata\n"); + if (!scanf("%d", &pil)) { + printf("Invalid protection information location\n"); + while (getchar() != '\n'); + return; + } + } else { + pil = 0; + } + + printf("Please Input Metadata Setting:\n"); + printf(" 0: Metadata is transferred as part of a separate buffer\n"); + printf(" 1: Metadata is transferred as part of an extended data LBA\n"); + if (!scanf("%d", &ms)) { + printf("Invalid metadata setting\n"); + while (getchar() != '\n'); + return; + } + } else { + ms = 0; + pi = 0; + pil = 0; + } + + printf("Warning: use this utility at your own risk.\n" + "This command will format your namespace and all data will be lost.\n" + "This command may take several minutes to complete,\n" + "so do not interrupt the utility until it completes.\n" + "Press 'Y' to continue with the format operation.\n"); + + while (getchar() != '\n'); + if (!scanf("%c", &option)) { + printf("Invalid option\n"); + while (getchar() != '\n'); + return; + } + + if (option == 'y' || option == 'Y') { + nvme_manage_format(ctrlr, ns_id, ses, pi, pil, ms, lbaf); + } else { + printf("NVMe format abort\n"); + } +} + +static void +update_firmware_image(void) +{ + int rc; + int fd = -1; + int slot; + unsigned int size; + struct stat fw_stat; + char path[256]; + void *fw_image; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + enum spdk_nvme_fw_commit_action commit_action; + struct spdk_nvme_status status; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.firmware) { + printf("Controller does not support firmware download and commit command\n"); + return; + } + + printf("Please Input The Path Of Firmware Image\n"); + + if (get_line(path, sizeof(path), stdin) == NULL) { + printf("Invalid path setting\n"); + while (getchar() != '\n'); + return; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("Open file failed"); + return; + } + rc = fstat(fd, &fw_stat); + if (rc < 0) { + printf("Fstat failed\n"); + close(fd); + return; + } + + if (fw_stat.st_size % 4) { + printf("Firmware image size is not multiple of 4\n"); + close(fd); + return; + } + + size = fw_stat.st_size; + + fw_image = spdk_dma_zmalloc(size, 4096, NULL); + if (fw_image == NULL) { + printf("Allocation error\n"); + close(fd); + return; + } + + if (read(fd, fw_image, size) != ((ssize_t)(size))) { + printf("Read firmware image failed\n"); + close(fd); + spdk_dma_free(fw_image); + return; + } + close(fd); + + printf("Please Input Slot(0 - 7):\n"); + if (!scanf("%d", &slot)) { + printf("Invalid Slot\n"); + spdk_dma_free(fw_image); + while (getchar() != '\n'); + return; + } + + commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG; + rc = spdk_nvme_ctrlr_update_firmware(ctrlr->ctrlr, fw_image, size, slot, commit_action, &status); + if (rc == -ENXIO && status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && + status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { + printf("conventional reset is needed to enable firmware !\n"); + } else if (rc) { + printf("spdk_nvme_ctrlr_update_firmware failed\n"); + } else { + printf("spdk_nvme_ctrlr_update_firmware success\n"); + } + spdk_dma_free(fw_image); +} + +static void +args_usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -i shared memory group ID\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "i:")) != -1) { + switch (op) { + case 'i': + g_shm_id = atoi(optarg); + break; + default: + args_usage(argv[0]); + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int i, rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "nvme_manage"; + opts.core_mask = "0x1"; + opts.shm_id = g_shm_id; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + qsort(devs, num_devs, sizeof(devs[0]), cmp_devs); + + usage(); + + while (1) { + int cmd; + bool exit_flag = false; + + if (!scanf("%d", &cmd)) { + printf("Invalid Command: command must be number 1-8\n"); + while (getchar() != '\n'); + usage(); + continue; + } + switch (cmd) { + case 1: + display_controller_list(); + break; + case 2: + add_ns(); + break; + case 3: + delete_ns(); + break; + case 4: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_ATTACH); + break; + case 5: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_DETACH); + break; + case 6: + format_nvm(); + break; + case 7: + update_firmware_image(); + break; + case 8: + exit_flag = true; + break; + default: + printf("Invalid Command\n"); + break; + } + + if (exit_flag) { + break; + } + + while (getchar() != '\n'); + printf("press Enter to display cmd menu ...\n"); + while (getchar() != '\n'); + usage(); + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/perf/.gitignore b/src/spdk/examples/nvme/perf/.gitignore new file mode 100644 index 00000000..bd14107d --- /dev/null +++ b/src/spdk/examples/nvme/perf/.gitignore @@ -0,0 +1 @@ +perf diff --git a/src/spdk/examples/nvme/perf/Makefile b/src/spdk/examples/nvme/perf/Makefile new file mode 100644 index 00000000..573f56a0 --- /dev/null +++ b/src/spdk/examples/nvme/perf/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = perf + +ifeq ($(OS),Linux) +SYS_LIBS += -laio +CFLAGS += -DHAVE_LIBAIO +endif + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/perf/README.md b/src/spdk/examples/nvme/perf/README.md new file mode 100644 index 00000000..e5ec38d1 --- /dev/null +++ b/src/spdk/examples/nvme/perf/README.md @@ -0,0 +1,5 @@ +# Compiling perf on FreeBSD + +To use perf test on FreeBSD over NVMe-oF, explicitly link userspace library of HBA. For example, on a setup with Mellanox HBA, + + LIBS += -lmlx5 diff --git a/src/spdk/examples/nvme/perf/perf.c b/src/spdk/examples/nvme/perf/perf.c new file mode 100644 index 00000000..f8f4d75e --- /dev/null +++ b/src/spdk/examples/nvme/perf/perf.c @@ -0,0 +1,1726 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/fd.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" +#include "spdk/histogram_data.h" +#include "spdk/endian.h" +#include "spdk/crc16.h" + +#if HAVE_LIBAIO +#include <libaio.h> +#endif + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_intel_rw_latency_page *latency_page; + struct ctrlr_entry *next; + char name[1024]; +}; + +enum entry_type { + ENTRY_TYPE_NVME_NS, + ENTRY_TYPE_AIO_FILE, +}; + +struct ns_entry { + enum entry_type type; + + union { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; +#if HAVE_LIBAIO + struct { + int fd; + } aio; +#endif + } u; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint32_t num_io_requests; + uint64_t size_in_ios; + uint32_t io_flags; + uint16_t apptag_mask; + uint16_t apptag; + char name[1024]; + const struct spdk_nvme_ns_data *nsdata; +}; + +static const double g_latency_cutoffs[] = { + 0.01, + 0.10, + 0.25, + 0.50, + 0.75, + 0.90, + 0.95, + 0.98, + 0.99, + 0.995, + 0.999, + 0.9999, + 0.99999, + 0.999999, + 0.9999999, + -1, +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t total_tsc; + uint64_t min_tsc; + uint64_t max_tsc; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + + union { + struct { + struct spdk_nvme_qpair *qpair; + } nvme; + +#if HAVE_LIBAIO + struct { + struct io_event *events; + io_context_t ctx; + } aio; +#endif + } u; + + struct ns_worker_ctx *next; + + struct spdk_histogram_data *histogram; +}; + +struct perf_task { + struct ns_worker_ctx *ns_ctx; + void *buf; + uint64_t submit_tsc; + uint16_t appmask; + uint16_t apptag; + uint64_t lba; + bool is_read; +#if HAVE_LIBAIO + struct iocb iocb; +#endif +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; +}; + +static int g_outstanding_commands; + +static bool g_latency_ssd_tracking_enable = false; +static int g_latency_sw_tracking_level = 0; + +static struct ctrlr_entry *g_controllers = NULL; +static int g_controllers_found = 0; +static struct ns_entry *g_namespaces = NULL; +static int g_num_namespaces = 0; +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_align = 0x200; +static uint32_t g_io_size_bytes; +static uint32_t g_max_io_md_size; +static uint32_t g_max_io_size_blocks; +static uint32_t g_metacfg_pract_flag; +static uint32_t g_metacfg_prchk_flags; +static int g_rw_percentage; +static int g_is_random; +static int g_queue_depth; +static int g_time_in_sec; +static uint32_t g_max_completions; +static int g_dpdk_mem; +static int g_shm_id = -1; +static uint32_t g_disable_sq_cmb; +static bool g_no_pci; +static bool g_warn; + +static const char *g_core_mask; + +struct trid_entry { + struct spdk_nvme_transport_id trid; + uint16_t nsid; + TAILQ_ENTRY(trid_entry) tailq; +}; + +static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list); + +static int g_aio_optind; /* Index of first AIO filename in argv */ + +static void +task_complete(struct perf_task *task); + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + uint32_t max_xfer_size, entries; + struct spdk_nvme_io_qpair_opts opts; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + g_warn = true; + return; + } + + if (spdk_nvme_ns_get_size(ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(ns) > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + g_warn = true; + return; + } + + max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + /* NVMe driver may add additional entries based on + * stripe size and maximum transfer size, we assume + * 1 more entry be used for stripe. + */ + entries = (g_io_size_bytes - 1) / max_xfer_size + 2; + if ((g_queue_depth * entries) > opts.io_queue_size) { + printf("controller IO queue size %u less than required\n", + opts.io_queue_size); + printf("Consider using lower queue depth or small IO size because " + "IO requests may be queued at the NVMe driver.\n"); + g_warn = true; + } + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->type = ENTRY_TYPE_NVME_NS; + entry->u.nvme.ctrlr = ctrlr; + entry->u.nvme.ns = ns; + entry->num_io_requests = entries; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / + g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + entry->io_flags = g_metacfg_pract_flag | g_metacfg_prchk_flags; + } + + if (g_max_io_md_size < spdk_nvme_ns_get_md_size(ns)) { + g_max_io_md_size = spdk_nvme_ns_get_md_size(ns); + } + + if (g_max_io_size_blocks < entry->io_size_blocks) { + g_max_io_size_blocks = entry->io_size_blocks; + } + + entry->nsdata = spdk_nvme_ns_get_data(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +unregister_namespaces(void) +{ + struct ns_entry *entry = g_namespaces; + + while (entry) { + struct ns_entry *next = entry->next; + free(entry); + entry = next; + } +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_outstanding_commands++; + + while (g_outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry) +{ + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + uint32_t nsid; + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + entry->latency_page = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_rw_latency_page), + 4096, NULL); + if (entry->latency_page == NULL) { + printf("Allocation error (latency page)\n"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + if (trid_entry->nsid == 0) { + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + } else { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid); + if (!ns) { + perror("Namespace does not exist."); + exit(1); + } + + register_ns(ctrlr, ns); + } + +} + +#if HAVE_LIBAIO +static int +register_aio_file(const char *path) +{ + struct ns_entry *entry; + + int flags, fd; + uint64_t size; + uint32_t blklen; + + if (g_rw_percentage == 100) { + flags = O_RDONLY; + } else if (g_rw_percentage == 0) { + flags = O_WRONLY; + } else { + flags = O_RDWR; + } + + flags |= O_DIRECT; + + fd = open(path, flags); + if (fd < 0) { + fprintf(stderr, "Could not open AIO device %s: %s\n", path, strerror(errno)); + return -1; + } + + size = spdk_fd_get_size(fd); + if (size == 0) { + fprintf(stderr, "Could not determine size of AIO device %s\n", path); + close(fd); + return -1; + } + + blklen = spdk_fd_get_blocklen(fd); + if (blklen == 0) { + fprintf(stderr, "Could not determine block size of AIO device %s\n", path); + close(fd); + return -1; + } + + /* + * TODO: This should really calculate the LCM of the current g_io_align and blklen. + * For now, it's fairly safe to just assume all block sizes are powers of 2. + */ + if (g_io_align < blklen) { + g_io_align = blklen; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + close(fd); + perror("aio ns_entry malloc"); + return -1; + } + + entry->type = ENTRY_TYPE_AIO_FILE; + entry->u.aio.fd = fd; + entry->size_in_ios = size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / blklen; + + snprintf(entry->name, sizeof(entry->name), "%s", path); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; + + return 0; +} + +static int +aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, void *buf, + unsigned long nbytes, uint64_t offset, void *cb_ctx) +{ + iocb->aio_fildes = fd; + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = cmd; + iocb->u.c.buf = buf; + iocb->u.c.nbytes = nbytes; + iocb->u.c.offset = offset; + iocb->data = cb_ctx; + + if (io_submit(aio_ctx, 1, &iocb) < 0) { + printf("io_submit"); + return -1; + } + + return 0; +} + +static void +aio_check_io(struct ns_worker_ctx *ns_ctx) +{ + int count, i; + struct timespec timeout; + + timeout.tv_sec = 0; + timeout.tv_nsec = 0; + + count = io_getevents(ns_ctx->u.aio.ctx, 1, g_queue_depth, ns_ctx->u.aio.events, &timeout); + if (count < 0) { + fprintf(stderr, "io_getevents error\n"); + exit(1); + } + + for (i = 0; i < count; i++) { + task_complete(ns_ctx->u.aio.events[i].data); + } +} +#endif /* HAVE_LIBAIO */ + +static void +task_extended_lba_setup_pi(struct ns_entry *entry, struct perf_task *task, uint64_t lba, + uint32_t lba_count, bool is_write) +{ + struct spdk_nvme_protection_info *pi; + uint32_t i, md_size, sector_size, pi_offset; + uint16_t crc16; + + task->appmask = 0; + task->apptag = 0; + + if (!spdk_nvme_ns_supports_extended_lba(entry->u.nvme.ns)) { + return; + } + + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return; + } + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return; + } + + /* Type3 don't support REFTAG */ + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { + return; + } + + sector_size = spdk_nvme_ns_get_sector_size(entry->u.nvme.ns); + md_size = spdk_nvme_ns_get_md_size(entry->u.nvme.ns); + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (entry->nsdata->dps.md_start) { + return; + } + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + task->appmask = 0xffff; + task->apptag = lba_count; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = ((sector_size + md_size) * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(task->buf + pi_offset); + memset(pi, 0, sizeof(*pi)); + + if (is_write) { + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include PI */ + crc16 = spdk_crc16_t10dif(task->buf + (sector_size + md_size) * i, + sector_size + md_size - 8); + to_be16(&pi->guard, crc16); + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + to_be16(&pi->app_tag, lba_count); + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&pi->ref_tag, (uint32_t)lba + i); + } + } + } +} + +static void +task_extended_lba_pi_verify(struct ns_entry *entry, struct perf_task *task, + uint64_t lba, uint32_t lba_count) +{ + struct spdk_nvme_protection_info *pi; + uint32_t i, md_size, sector_size, pi_offset, ref_tag; + uint16_t crc16, guard, app_tag; + + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return; + } + + sector_size = spdk_nvme_ns_get_sector_size(entry->u.nvme.ns); + md_size = spdk_nvme_ns_get_md_size(entry->u.nvme.ns); + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (entry->nsdata->dps.md_start) { + return; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = ((sector_size + md_size) * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(task->buf + pi_offset); + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include last 8 bytes of PI */ + crc16 = spdk_crc16_t10dif(task->buf + (sector_size + md_size) * i, + sector_size + md_size - 8); + to_be16(&guard, crc16); + if (pi->guard != guard) { + fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, guard, pi->guard); + } + + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Previously we used the number of lbas as + * application tag for writes + */ + to_be16(&app_tag, lba_count); + if (pi->app_tag != app_tag) { + fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, app_tag, pi->app_tag); + } + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&ref_tag, (uint32_t)lba + i); + if (pi->ref_tag != ref_tag) { + fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%08x but returned with 0x%08x," + " may read the LBA without write it first\n", + lba + i, ref_tag, pi->ref_tag); + } + } + } +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct perf_task *task) +{ + uint64_t offset_in_ios; + int rc; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ns_entry *entry = ns_ctx->entry; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + task->is_read = false; + task->submit_tsc = spdk_get_ticks(); + task->lba = offset_in_ios * entry->io_size_blocks; + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { +#if HAVE_LIBAIO + if (entry->type == ENTRY_TYPE_AIO_FILE) { + rc = aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD, task->buf, + g_io_size_bytes, offset_in_ios * g_io_size_bytes, task); + } else +#endif + { + task_extended_lba_setup_pi(entry, task, task->lba, + entry->io_size_blocks, false); + task->is_read = true; + + rc = spdk_nvme_ns_cmd_read_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair, + task->buf, NULL, + task->lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->appmask, task->apptag); + } + } else { +#if HAVE_LIBAIO + if (entry->type == ENTRY_TYPE_AIO_FILE) { + rc = aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE, task->buf, + g_io_size_bytes, offset_in_ios * g_io_size_bytes, task); + } else +#endif + { + task_extended_lba_setup_pi(entry, task, task->lba, + entry->io_size_blocks, true); + + rc = spdk_nvme_ns_cmd_write_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair, + task->buf, NULL, + task->lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->appmask, task->apptag); + } + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static void +task_complete(struct perf_task *task) +{ + struct ns_worker_ctx *ns_ctx; + uint64_t tsc_diff; + struct ns_entry *entry; + + ns_ctx = task->ns_ctx; + entry = ns_ctx->entry; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + tsc_diff = spdk_get_ticks() - task->submit_tsc; + ns_ctx->total_tsc += tsc_diff; + if (ns_ctx->min_tsc > tsc_diff) { + ns_ctx->min_tsc = tsc_diff; + } + if (ns_ctx->max_tsc < tsc_diff) { + ns_ctx->max_tsc = tsc_diff; + } + if (g_latency_sw_tracking_level > 0) { + spdk_histogram_data_tally(ns_ctx->histogram, tsc_diff); + } + + /* add application level verification for end-to-end data protection */ + if (entry->type == ENTRY_TYPE_NVME_NS) { + if (spdk_nvme_ns_supports_extended_lba(entry->u.nvme.ns) && + task->is_read && !g_metacfg_pract_flag) { + task_extended_lba_pi_verify(entry, task, task->lba, + entry->io_size_blocks); + } + } + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (ns_ctx->is_draining) { + spdk_dma_free(task->buf); + free(task); + } else { + submit_single_io(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct perf_task *)ctx); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ +#if HAVE_LIBAIO + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { + aio_check_io(ns_ctx); + } else +#endif + { + spdk_nvme_qpair_process_completions(ns_ctx->u.nvme.qpair, g_max_completions); + } +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + uint32_t max_io_size_bytes; + + while (queue_depth-- > 0) { + task = calloc(1, sizeof(*task)); + if (task == NULL) { + fprintf(stderr, "Out of memory allocating tasks\n"); + exit(1); + } + + /* maximum extended lba format size from all active + * namespace, it's same with g_io_size_bytes for + * namespace without metadata + */ + max_io_size_bytes = g_io_size_bytes + g_max_io_md_size * g_max_io_size_blocks; + task->buf = spdk_dma_zmalloc(max_io_size_bytes, g_io_align, NULL); + if (task->buf == NULL) { + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + memset(task->buf, queue_depth % 8 + 1, max_io_size_bytes); + + task->ns_ctx = ns_ctx; + + submit_single_io(task); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + ns_ctx->u.aio.events = calloc(g_queue_depth, sizeof(struct io_event)); + if (!ns_ctx->u.aio.events) { + return -1; + } + ns_ctx->u.aio.ctx = 0; + if (io_setup(g_queue_depth, &ns_ctx->u.aio.ctx) < 0) { + free(ns_ctx->u.aio.events); + perror("io_setup"); + return -1; + } +#endif + } else { + /* + * TODO: If a controller has multiple namespaces, they could all use the same queue. + * For now, give each namespace/thread combination its own queue. + */ + struct spdk_nvme_io_qpair_opts opts; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ns_ctx->entry->u.nvme.ctrlr, &opts, sizeof(opts)); + if (opts.io_queue_requests < ns_ctx->entry->num_io_requests) { + opts.io_queue_requests = ns_ctx->entry->num_io_requests; + } + + ns_ctx->u.nvme.qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->u.nvme.ctrlr, &opts, + sizeof(opts)); + if (!ns_ctx->u.nvme.qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return -1; + } + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + io_destroy(ns_ctx->u.aio.ctx); + free(ns_ctx->u.aio.events); +#endif + } else { + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->u.nvme.qpair); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + + printf("Starting thread on core %u\n", worker->lcore); + + /* Allocate a queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + cleanup_ns_worker_ctx(ns_ctx); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); +#if HAVE_LIBAIO + printf(" [AIO device(s)]..."); +#endif + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-o io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-L enable latency tracking via sw, default: disabled]\n"); + printf("\t\t-L for latency summary, -LL for detailed histogram\n"); + printf("\t[-l enable latency tracking via ssd (if supported), default: disabled]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 1)]\n"); + printf("\t[-D disable submission queue in controller memory buffer, default: enabled]\n"); + printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n"); + printf("\t Format: 'key:value [key:value] ...'\n"); + printf("\t Keys:\n"); + printf("\t trtype Transport type (e.g. PCIe, RDMA)\n"); + printf("\t adrfam Address family (e.g. IPv4, IPv6)\n"); + printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n"); + printf("\t trsvcid Transport service identifier (e.g. 4420)\n"); + printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n"); + printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n"); + printf("\t[-e metadata configuration]\n"); + printf("\t Keys:\n"); + printf("\t PRACT Protection Information Action bit (PRACT=1 or PRACT=0)\n"); + printf("\t PRCHK Control of Protection Information Checking (PRCHK=GUARD|REFTAG|APPTAG)\n"); + printf("\t Example: -e 'PRACT=0,PRCHK=GUARD|REFTAG|APPTAG'\n"); + printf("\t -e 'PRACT=1,PRCHK=GUARD'\n"); + printf("\t[-s DPDK huge memory size in MB.]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-i shared memory group ID]\n"); +} + +static void +check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + double **cutoff = ctx; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far / total; + while (so_far_pct >= **cutoff && **cutoff > 0) { + printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate); + (*cutoff)++; + } +} + +static void +print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far * 100 / total; + printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", + (double)start * 1000 * 1000 / g_tsc_rate, + (double)end * 1000 * 1000 / g_tsc_rate, + so_far_pct, count); +} + +static void +print_performance(void) +{ + uint64_t total_io_completed, total_io_tsc; + double io_per_second, mb_per_second, average_latency, min_latency, max_latency; + double sum_ave_latency, min_latency_so_far, max_latency_so_far; + double total_io_per_second, total_mb_per_second; + int ns_count; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + total_io_per_second = 0; + total_mb_per_second = 0; + total_io_completed = 0; + total_io_tsc = 0; + min_latency_so_far = (double)UINT64_MAX; + max_latency_so_far = 0; + ns_count = 0; + + printf("========================================================\n"); + printf("%103s\n", "Latency(us)"); + printf("%-55s: %10s %10s %10s %10s %10s\n", + "Device Information", "IOPS", "MB/s", "Average", "min", "max"); + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + if (ns_ctx->io_completed != 0) { + io_per_second = (double)ns_ctx->io_completed / g_time_in_sec; + mb_per_second = io_per_second * g_io_size_bytes / (1024 * 1024); + average_latency = ((double)ns_ctx->total_tsc / ns_ctx->io_completed) * 1000 * 1000 / g_tsc_rate; + min_latency = (double)ns_ctx->min_tsc * 1000 * 1000 / g_tsc_rate; + if (min_latency < min_latency_so_far) { + min_latency_so_far = min_latency; + } + + max_latency = (double)ns_ctx->max_tsc * 1000 * 1000 / g_tsc_rate; + if (max_latency > max_latency_so_far) { + max_latency_so_far = max_latency; + } + + printf("%-43.43s from core %u: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + ns_ctx->entry->name, worker->lcore, + io_per_second, mb_per_second, + average_latency, min_latency, max_latency); + total_io_per_second += io_per_second; + total_mb_per_second += mb_per_second; + total_io_completed += ns_ctx->io_completed; + total_io_tsc += ns_ctx->total_tsc; + ns_count++; + } + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (ns_count != 0 && total_io_completed) { + sum_ave_latency = ((double)total_io_tsc / total_io_completed) * 1000 * 1000 / g_tsc_rate; + printf("========================================================\n"); + printf("%-55s: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + "Total", total_io_per_second, total_mb_per_second, + sum_ave_latency, min_latency_so_far, max_latency_so_far); + printf("\n"); + } + + if (g_latency_sw_tracking_level == 0 || total_io_completed == 0) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + const double *cutoff = g_latency_cutoffs; + + printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("=================================================================================\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, check_cutoff, &cutoff); + + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (g_latency_sw_tracking_level == 1) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("==============================================================================\n"); + printf(" Range in us Cumulative IO count\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, print_bucket, NULL); + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page->buckets_32us[i]) { + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, entry->latency_page->buckets_32us[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_1ms[i]) { + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, entry->latency_page->buckets_1ms[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page->buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr->ctrlr, log_page, SPDK_NVME_GLOBAL_NS_TAG, + ctrlr->latency_page, sizeof(struct spdk_nvme_intel_rw_latency_page), 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_latency_ssd_tracking_enable) { + if (g_rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static void +unregister_trids(void) +{ + struct trid_entry *trid_entry, *tmp; + + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) { + free(trid_entry); + } +} + +static int +add_trid(const char *trid_str) +{ + struct trid_entry *trid_entry; + struct spdk_nvme_transport_id *trid; + char *ns; + + trid_entry = calloc(1, sizeof(*trid_entry)); + if (trid_entry == NULL) { + return -1; + } + + trid = &trid_entry->trid; + memset(trid, 0, sizeof(*trid)); + trid->trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) { + fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str); + free(trid_entry); + return 1; + } + + ns = strcasestr(trid_str, "ns:"); + if (ns) { + char nsid_str[6]; /* 5 digits maximum in an nsid */ + int len; + int nsid; + + ns += 3; + + len = strcspn(ns, " \t\n"); + if (len > 5) { + fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n"); + free(trid_entry); + return 1; + } + + memcpy(nsid_str, ns, len); + nsid_str[len] = '\0'; + + nsid = atoi(nsid_str); + if (nsid <= 0 || nsid > 65535) { + fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n"); + free(trid_entry); + return 1; + } + + trid_entry->nsid = (uint16_t)nsid; + } + + TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq); + return 0; +} + +static int +parse_metadata(const char *metacfg_str) +{ + const char *sep; + + if (strstr(metacfg_str, "PRACT=1") != NULL) { + g_metacfg_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; + } + + sep = strchr(metacfg_str, ','); + if (!sep) { + return 0; + } + + if (strstr(sep, "PRCHK=") != NULL) { + if (strstr(sep, "GUARD") != NULL) { + g_metacfg_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(sep, "REFTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(sep, "APPTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } + } + + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type; + int op; + bool mix_specified = false; + + /* default value */ + g_queue_depth = 0; + g_io_size_bytes = 0; + workload_type = NULL; + g_time_in_sec = 0; + g_rw_percentage = -1; + g_core_mask = NULL; + g_max_completions = 0; + + while ((op = getopt(argc, argv, "c:e:i:lm:o:q:r:s:t:w:DLM:")) != -1) { + switch (op) { + case 'c': + g_core_mask = optarg; + break; + case 'e': + if (parse_metadata(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'i': + g_shm_id = atoi(optarg); + break; + case 'l': + g_latency_ssd_tracking_enable = true; + break; + case 'm': + g_max_completions = atoi(optarg); + break; + case 'o': + g_io_size_bytes = atoi(optarg); + break; + case 'q': + g_queue_depth = atoi(optarg); + break; + case 'r': + if (add_trid(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 's': + g_dpdk_mem = atoi(optarg); + break; + case 't': + g_time_in_sec = atoi(optarg); + break; + case 'w': + workload_type = optarg; + break; + case 'D': + g_disable_sq_cmb = 1; + break; + case 'L': + g_latency_sw_tracking_level++; + break; + case 'M': + g_rw_percentage = atoi(optarg); + mix_specified = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_queue_depth) { + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + usage(argv[0]); + return 1; + } + if (!workload_type) { + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_is_random = 0; + } else { + g_is_random = 1; + } + + if (TAILQ_EMPTY(&g_trid_list)) { + /* If no transport IDs specified, default to enumerating all local PCIe devices */ + add_trid("trtype:PCIe"); + } else { + struct trid_entry *trid_entry, *trid_entry_tmp; + + g_no_pci = true; + /* check whether there is local PCIe type */ + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) { + if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + g_no_pci = false; + break; + } + } + } + + g_aio_optind = optind; + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + + /* Free namespace context and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + spdk_histogram_data_free(ns_ctx->histogram); + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attaching to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (g_disable_sq_cmb) { + opts->use_cmb_sqs = false; + } + + printf("Attaching to NVMe Controller at %s\n", + trid->traddr); + } + + /* Set io_queue_size to UINT16_MAX, NVMe driver + * will then reduce this to MQES to maximize + * the io_queue_size as much as possible. + */ + opts->io_queue_size = UINT16_MAX; + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct trid_entry *trid_entry = cb_ctx; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + + g_controllers_found++; + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("Attached to NVMe Controller at %s [%04x:%04x]\n", + trid->traddr, + pci_id.vendor_id, pci_id.device_id); + } + + register_ctrlr(ctrlr, trid_entry); +} + +static int +register_controllers(void) +{ + struct trid_entry *trid_entry; + + printf("Initializing NVMe Controllers\n"); + + TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) { + if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n", + trid_entry->trid.traddr); + return -1; + } + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + spdk_dma_free(entry->latency_page); + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +register_aio_files(int argc, char **argv) +{ +#if HAVE_LIBAIO + int i; + + /* Treat everything after the options as files for AIO */ + for (i = g_aio_optind; i < argc; i++) { + if (register_aio_file(argv[i]) != 0) { + return 1; + } + } +#endif /* HAVE_LIBAIO */ + + return 0; +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->min_tsc = UINT64_MAX; + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + ns_ctx->histogram = spdk_histogram_data_alloc(); + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "perf"; + opts.shm_id = g_shm_id; + if (g_core_mask) { + opts.core_mask = g_core_mask; + } + + if (g_dpdk_mem) { + opts.mem_size = g_dpdk_mem; + } + if (g_no_pci) { + opts.no_pci = g_no_pci; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + rc = -1; + goto cleanup; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + rc = -1; + goto cleanup; + } + + if (register_aio_files(argc, argv) != 0) { + rc = -1; + goto cleanup; + } + + if (register_controllers() != 0) { + rc = -1; + goto cleanup; + } + + if (g_warn) { + printf("WARNING: Some requested NVMe devices were skipped\n"); + } + + if (g_num_namespaces == 0) { + fprintf(stderr, "No valid NVMe controllers or AIO devices found\n"); + return 0; + } + + if (associate_workers_with_ns() != 0) { + rc = -1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + +cleanup: + unregister_trids(); + unregister_namespaces(); + unregister_controllers(); + unregister_workers(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/reserve/.gitignore b/src/spdk/examples/nvme/reserve/.gitignore new file mode 100644 index 00000000..c58b368c --- /dev/null +++ b/src/spdk/examples/nvme/reserve/.gitignore @@ -0,0 +1 @@ +reserve diff --git a/src/spdk/examples/nvme/reserve/Makefile b/src/spdk/examples/nvme/reserve/Makefile new file mode 100644 index 00000000..315faef0 --- /dev/null +++ b/src/spdk/examples/nvme/reserve/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = reserve + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/reserve/reserve.c b/src/spdk/examples/nvme/reserve/reserve.c new file mode 100644 index 00000000..4e0d54e2 --- /dev/null +++ b/src/spdk/examples/nvme/reserve/reserve.c @@ -0,0 +1,394 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/log.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + char name[100]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int outstanding_commands; +static int reserve_command_result; +static bool get_host_id_successful; + +#define HOST_ID 0xABABABABCDCDCDCD +#define EXT_HOST_ID ((uint8_t[]){0x0f, 0x97, 0xcd, 0x74, 0x8c, 0x80, 0x41, 0x42, \ + 0x99, 0x0f, 0x65, 0xc4, 0xf0, 0x39, 0x24, 0x20}) + +#define CR_KEY 0xDEADBEAF5A5A5A5B + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + fprintf(stdout, "Get Features - Host Identifier failed\n"); + get_host_id_successful = false; + } else { + get_host_id_successful = true; + } + outstanding_commands--; +} + +static int +get_host_identifier(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + uint8_t host_id[16]; + uint32_t host_id_size; + uint32_t cdw11; + + if (spdk_nvme_ctrlr_get_data(ctrlr)->ctratt.host_id_exhid_supported) { + host_id_size = 16; + cdw11 = 1; + printf("Using 128-bit extended host identifier\n"); + } else { + host_id_size = 8; + cdw11 = 0; + printf("Using 64-bit host identifier\n"); + } + + outstanding_commands = 0; + ret = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_HOST_IDENTIFIER, cdw11, host_id, + host_id_size, + get_feature_completion, NULL); + if (ret) { + fprintf(stdout, "Get Feature: Failed\n"); + return -1; + } + + outstanding_commands++; + get_host_id_successful = false; + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (get_host_id_successful) { + spdk_trace_dump(stdout, "Get Feature: Host Identifier:", host_id, host_id_size); + } + + return 0; +} + +static void +reservation_ns_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + reserve_command_result = -1; + } else { + reserve_command_result = 0; + } + + outstanding_commands--; +} + +static int +reservation_ns_register(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_register_data rr_data; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + rr_data.crkey = CR_KEY; + rr_data.nrkey = CR_KEY; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_register(ns, qpair, &rr_data, true, + SPDK_NVME_RESERVE_REGISTER_KEY, + SPDK_NVME_RESERVE_PTPL_NO_CHANGES, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Register Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Register Failed\n"); + } + + return 0; +} + +static int +reservation_ns_report(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret, i; + uint8_t *payload; + struct spdk_nvme_reservation_status_data *status; + struct spdk_nvme_reservation_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + outstanding_commands = 0; + reserve_command_result = -1; + + payload = spdk_dma_zmalloc(0x1000, 0x1000, NULL); + if (!payload) { + fprintf(stderr, "DMA Buffer Allocation Failed\n"); + return -1; + } + + ret = spdk_nvme_ns_cmd_reservation_report(ns, qpair, payload, 0x1000, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return 0; + } + + status = (struct spdk_nvme_reservation_status_data *)payload; + fprintf(stdout, "Reservation Generation Counter %u\n", status->generation); + fprintf(stdout, "Reservation type %u\n", status->type); + fprintf(stdout, "Reservation Number of Registered Controllers %u\n", status->nr_regctl); + fprintf(stdout, "Reservation Persist Through Power Loss State %u\n", status->ptpl_state); + for (i = 0; i < status->nr_regctl; i++) { + cdata = (struct spdk_nvme_reservation_ctrlr_data *)(payload + sizeof(struct + spdk_nvme_reservation_status_data) * (i + 1)); + fprintf(stdout, "Controller ID %u\n", cdata->ctrlr_id); + fprintf(stdout, "Controller Reservation Status %u\n", cdata->rcsts.status); + fprintf(stdout, "Controller Host ID 0x%"PRIx64"\n", cdata->host_id); + fprintf(stdout, "Controller Reservation Key 0x%"PRIx64"\n", cdata->key); + } + + spdk_dma_free(payload); + return 0; +} + +static int +reservation_ns_acquire(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_acquire_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + cdata.prkey = 0; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_acquire(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_ACQUIRE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Acquire Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Acquire Failed\n"); + } + + return 0; +} + +static int +reservation_ns_release(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_key_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_release(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_RELEASE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Release Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Release Failed\n"); + } + + return 0; +} + +static void +reserve_controller(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + const struct spdk_pci_addr *pci_addr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + printf("NVMe Controller at PCI bus %d, device %d, function %d\n", + pci_addr->bus, pci_addr->dev, pci_addr->func); + printf("=====================================================\n"); + + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + + if (!cdata->oncs.reservations) { + return; + } + + get_host_identifier(ctrlr); + + /* tested 1 namespace */ + reservation_ns_register(ctrlr, qpair, 1); + reservation_ns_acquire(ctrlr, qpair, 1); + reservation_ns_report(ctrlr, qpair, 1); + reservation_ns_release(ctrlr, qpair, 1); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* + * Provide both 64-bit and 128-bit host identifiers. + * + * The NVMe library will choose which one to use based on whether the controller + * supports extended host identifiers. + */ + to_le64(opts->host_id, HOST_ID); + memcpy(opts->extended_host_id, EXT_HOST_ID, sizeof(opts->extended_host_id)); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &devs[num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; +} + +int main(int argc, char **argv) +{ + struct dev *iter; + int rc, i; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "reserve"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + rc = 0; + + foreach_dev(iter) { + struct spdk_nvme_qpair *qpair; + + qpair = spdk_nvme_ctrlr_alloc_io_qpair(iter->ctrlr, NULL, 0); + if (!qpair) { + fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + rc = 1; + } else { + reserve_controller(iter->ctrlr, qpair, &iter->pci_addr); + } + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return rc; +} diff --git a/src/spdk/examples/sock/Makefile b/src/spdk/examples/sock/Makefile new file mode 100644 index 00000000..097061fd --- /dev/null +++ b/src/spdk/examples/sock/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) + @: + +clean: $(DIRS-y) + @: + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/sock/hello_world/.gitignore b/src/spdk/examples/sock/hello_world/.gitignore new file mode 100644 index 00000000..95ffb143 --- /dev/null +++ b/src/spdk/examples/sock/hello_world/.gitignore @@ -0,0 +1 @@ +hello_sock diff --git a/src/spdk/examples/sock/hello_world/Makefile b/src/spdk/examples/sock/hello_world/Makefile new file mode 100644 index 00000000..f638153c --- /dev/null +++ b/src/spdk/examples/sock/hello_world/Makefile @@ -0,0 +1,54 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_sock + +C_SRCS := hello_sock.c + +SPDK_LIB_LIST += event thread util conf trace log jsonrpc json rpc + +LIBS += $(SOCK_MODULES_LINKER_ARGS) $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all : $(APP) + @: + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(SOCK_MODULES_FILES) $(LINKER_MODULES) $(ENV_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/src/spdk/examples/sock/hello_world/hello_sock.c b/src/spdk/examples/sock/hello_world/hello_sock.c new file mode 100644 index 00000000..a3a5d553 --- /dev/null +++ b/src/spdk/examples/sock/hello_world/hello_sock.c @@ -0,0 +1,422 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/string.h" + +#include "spdk/sock.h" +#include "spdk/net.h" + +#define ACCEPT_TIMEOUT_US 1000 +#define CLOSE_TIMEOUT_US 1000000 +#define BUFFER_SIZE 1024 +#define ADDR_STR_LEN INET6_ADDRSTRLEN + +static bool g_is_running; + +static char *g_host; +static int g_port; +static bool g_is_server; +static bool g_verbose; + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + bool is_server; + char *host; + int port; + + bool verbose; + int bytes_in; + int bytes_out; + + struct spdk_sock *sock; + + struct spdk_sock_group *group; + struct spdk_poller *poller_in; + struct spdk_poller *poller_out; + struct spdk_poller *time_out; +}; + +/* + * Usage function for printing parameters that are specific to this application + */ +static void +hello_sock_usage(void) +{ + printf(" -H host_addr host address\n"); + printf(" -P port port number\n"); + printf(" -S start in server mode\n"); + printf(" -V print out additional informations"); +} + +/* + * This function is called to parse the parameters that are specific to this application + */ +static void hello_sock_parse_arg(int ch, char *arg) +{ + switch (ch) { + case 'H': + g_host = arg; + break; + case 'P': + g_port = atoi(arg); + break; + case 'S': + g_is_server = 1; + break; + case 'V': + g_verbose = true; + } +} + +static int +hello_sock_close_timeout_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + SPDK_NOTICELOG("Connection closed\n"); + + spdk_poller_unregister(&ctx->time_out); + spdk_poller_unregister(&ctx->poller_in); + spdk_sock_close(&ctx->sock); + + spdk_app_stop(0); + return 0; +} + +static int +hello_sock_recv_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc; + char buf_in[BUFFER_SIZE]; + + /* + * Get response + */ + rc = spdk_sock_recv(ctx->sock, buf_in, sizeof(buf_in) - 1); + + if (rc <= 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + return -1; + } + + if (rc > 0) { + ctx->bytes_in += rc; + buf_in[rc] = '\0'; + printf("%s", buf_in); + } + + return 0; +} + +static int +hello_sock_writev_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc = 0; + char buf_out[BUFFER_SIZE]; + struct iovec iov; + ssize_t n; + + n = read(STDIN_FILENO, buf_out, sizeof(buf_out)); + if (n == 0 || !g_is_running) { + /* EOF */ + SPDK_NOTICELOG("Closing connection...\n"); + + ctx->time_out = spdk_poller_register(hello_sock_close_timeout_poll, ctx, + CLOSE_TIMEOUT_US); + + spdk_poller_unregister(&ctx->poller_out); + return 0; + } + if (n > 0) { + /* + * Send message to the server + */ + iov.iov_base = buf_out; + iov.iov_len = n; + rc = spdk_sock_writev(ctx->sock, &iov, 1); + if (rc > 0) { + ctx->bytes_out += rc; + } + } + return rc; +} + +static int +hello_sock_connect(struct hello_context_t *ctx) +{ + int rc; + char saddr[ADDR_STR_LEN], caddr[ADDR_STR_LEN]; + uint16_t cport, sport; + + SPDK_NOTICELOG("Connecting to the server on %s:%d\n", ctx->host, ctx->port); + + ctx->sock = spdk_sock_connect(ctx->host, ctx->port); + if (ctx->sock == NULL) { + SPDK_ERRLOG("connect error(%d): %s\n", errno, spdk_strerror(errno)); + return -1; + } + + rc = spdk_sock_getaddr(ctx->sock, saddr, sizeof(saddr), &sport, caddr, sizeof(caddr), &cport); + if (rc < 0) { + SPDK_ERRLOG("Cannot get connection addresses\n"); + spdk_sock_close(&ctx->sock); + return -1; + } + + SPDK_NOTICELOG("Connection accepted from (%s, %hu) to (%s, %hu)\n", caddr, cport, saddr, sport); + + fcntl(STDIN_FILENO, F_SETFL, fcntl(STDIN_FILENO, F_GETFL) | O_NONBLOCK); + + g_is_running = true; + ctx->poller_in = spdk_poller_register(hello_sock_recv_poll, ctx, 0); + ctx->poller_out = spdk_poller_register(hello_sock_writev_poll, ctx, 0); + + return 0; +} + +static void +hello_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) +{ + ssize_t n; + char buf[BUFFER_SIZE]; + struct iovec iov; + struct hello_context_t *ctx = arg; + + n = spdk_sock_recv(sock, buf, sizeof(buf)); + if (n < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + return; + } + + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + + if (n > 0) { + ctx->bytes_in += n; + iov.iov_base = buf; + iov.iov_len = n; + n = spdk_sock_writev(sock, &iov, 1); + if (n > 0) { + ctx->bytes_out += n; + } + return; + } + + /* Connection closed */ + SPDK_NOTICELOG("Connection closed\n"); + spdk_sock_group_remove_sock(group, sock); + spdk_sock_close(&sock); +} + +static int +hello_sock_accept_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + struct spdk_sock *sock; + int rc; + int count = 0; + char saddr[ADDR_STR_LEN], caddr[ADDR_STR_LEN]; + uint16_t cport, sport; + + if (!g_is_running) { + spdk_poller_unregister(&ctx->poller_in); + spdk_poller_unregister(&ctx->poller_out); + spdk_sock_close(&ctx->sock); + spdk_sock_group_close(&ctx->group); + spdk_app_stop(0); + return 0; + } + + while (1) { + sock = spdk_sock_accept(ctx->sock); + if (sock != NULL) { + spdk_sock_getaddr(sock, saddr, sizeof(saddr), &sport, caddr, sizeof(caddr), &cport); + + SPDK_NOTICELOG("Accepting a new connection from (%s, %hu) to (%s, %hu)\n", + caddr, cport, saddr, sport); + + rc = spdk_sock_group_add_sock(ctx->group, sock, + hello_sock_cb, ctx); + + if (rc < 0) { + spdk_sock_close(&sock); + SPDK_ERRLOG("failed\n"); + break; + } + + count++; + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + SPDK_ERRLOG("accept error(%d): %s\n", errno, spdk_strerror(errno)); + } + break; + } + } + + return count; +} + +static int +hello_sock_group_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc; + + rc = spdk_sock_group_poll(ctx->group); + if (rc < 0) { + SPDK_ERRLOG("Failed to poll sock_group=%p\n", ctx->group); + } + + return -1; +} + +static int +hello_sock_listen(struct hello_context_t *ctx) +{ + ctx->sock = spdk_sock_listen(ctx->host, ctx->port); + if (ctx->sock == NULL) { + SPDK_ERRLOG("Cannot create server socket\n"); + return -1; + } + + SPDK_NOTICELOG("Listening connection on %s:%d\n", ctx->host, ctx->port); + + /* + * Create sock group for server socket + */ + ctx->group = spdk_sock_group_create(); + + g_is_running = true; + + /* + * Start acceptor and group poller + */ + ctx->poller_in = spdk_poller_register(hello_sock_accept_poll, ctx, + ACCEPT_TIMEOUT_US); + ctx->poller_out = spdk_poller_register(hello_sock_group_poll, ctx, 0); + + return 0; +} + +static void +hello_sock_shutdown_cb(void) +{ + g_is_running = false; +} +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1, void *arg2) +{ + struct hello_context_t *ctx = arg1; + int rc = 0; + + SPDK_NOTICELOG("Successfully started the application\n"); + + if (ctx->is_server) { + rc = hello_sock_listen(ctx); + } else { + rc = hello_sock_connect(ctx); + } + + if (rc) { + spdk_app_stop(-1); + return; + } +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t hello_context = {}; + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + opts.name = "hello_sock"; + opts.config_file = "sock.conf"; + opts.shutdown_cb = hello_sock_shutdown_cb; + + if ((rc = spdk_app_parse_args(argc, argv, &opts, "H:P:SV", NULL, hello_sock_parse_arg, + hello_sock_usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { + exit(rc); + } + hello_context.is_server = g_is_server; + hello_context.host = g_host; + hello_context.port = g_port; + hello_context.verbose = g_verbose; + + rc = spdk_net_framework_start(); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + goto end; + } + + rc = spdk_app_start(&opts, hello_start, &hello_context, NULL); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + } + +end: + SPDK_NOTICELOG("Exiting from application\n"); + + if (hello_context.verbose) { + printf("** %d bytes received, %d bytes sent **\n", + hello_context.bytes_in, hello_context.bytes_out); + } + + spdk_net_framework_fini(); + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} |