diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/examples | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/examples')
85 files changed, 20420 insertions, 0 deletions
diff --git a/src/spdk/examples/Makefile b/src/spdk/examples/Makefile new file mode 100644 index 000000000..516cf83fc --- /dev/null +++ b/src/spdk/examples/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += accel bdev blob ioat nvme sock vmd nvmf + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/accel/Makefile b/src/spdk/examples/accel/Makefile new file mode 100644 index 000000000..55ede2195 --- /dev/null +++ b/src/spdk/examples/accel/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += perf + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/accel/perf/.gitignore b/src/spdk/examples/accel/perf/.gitignore new file mode 100644 index 000000000..83fe5432f --- /dev/null +++ b/src/spdk/examples/accel/perf/.gitignore @@ -0,0 +1 @@ +accel_perf diff --git a/src/spdk/examples/accel/perf/Makefile b/src/spdk/examples/accel/perf/Makefile new file mode 100644 index 000000000..b28f7c412 --- /dev/null +++ b/src/spdk/examples/accel/perf/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = accel_perf + +C_SRCS := accel_perf.c + +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += bdev accel event thread util conf trace \ + log jsonrpc json rpc sock notify + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/accel/perf/accel_perf.c b/src/spdk/examples/accel/perf/accel_perf.c new file mode 100644 index 000000000..2093253c1 --- /dev/null +++ b/src/spdk/examples/accel/perf/accel_perf.c @@ -0,0 +1,716 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/accel_engine.h" +#include "spdk/crc32.h" +#include "spdk/util.h" + +#define DATA_PATTERN 0x5a +#define ALIGN_4K 0x1000 + +static uint64_t g_tsc_rate; +static uint64_t g_tsc_us_rate; +static uint64_t g_tsc_end; +static int g_xfer_size_bytes = 4096; +static int g_queue_depth = 32; +static int g_time_in_sec = 5; +static uint32_t g_crc32c_seed = 0; +static int g_fail_percent_goal = 0; +static uint8_t g_fill_pattern = 255; +static bool g_verify = false; +static const char *g_workload_type = NULL; +static enum accel_capability g_workload_selection; +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; +static pthread_mutex_t g_workers_lock = PTHREAD_MUTEX_INITIALIZER; +uint64_t g_capabilites; +struct ap_task; + +struct worker_thread { + struct spdk_io_channel *ch; + uint64_t xfer_completed; + uint64_t xfer_failed; + uint64_t injected_miscompares; + uint64_t current_queue_depth; + TAILQ_HEAD(, ap_task) tasks; + struct worker_thread *next; + unsigned core; + struct spdk_thread *thread; + bool is_draining; + struct spdk_poller *is_draining_poller; + struct spdk_poller *stop_poller; +}; + +struct ap_task { + void *src; + void *dst; + void *dst2; + struct worker_thread *worker; + int status; + int expected_status; /* used for compare */ + TAILQ_ENTRY(ap_task) link; +}; + +static void +dump_user_config(struct spdk_app_opts *opts) +{ + printf("SPDK Configuration:\n"); + printf("Core mask: %s\n\n", opts->reactor_mask); + printf("Accel Perf Configuration:\n"); + printf("Workload Type: %s\n", g_workload_type); + if (g_workload_selection == ACCEL_CRC32C) { + printf("CRC-32C seed: %u\n", g_crc32c_seed); + } else if (g_workload_selection == ACCEL_FILL) { + printf("Fill pattern: 0x%x\n", g_fill_pattern); + } else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) { + printf("Failure inject: %u percent\n", g_fail_percent_goal); + } + printf("Transfer size: %u bytes\n", g_xfer_size_bytes); + printf("Queue depth: %u\n", g_queue_depth); + printf("Run time: %u seconds\n", g_time_in_sec); + printf("Verify: %s\n\n", g_verify ? "Yes" : "No"); +} + +static void +usage(void) +{ + printf("accel_perf options:\n"); + printf("\t[-h help message]\n"); + printf("\t[-q queue depth]\n"); + printf("\t[-n number of channels]\n"); + printf("\t[-o transfer size in bytes]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n"); + printf("\t[-s for crc32c workload, use this seed value (default 0)\n"); + printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n"); + printf("\t[-f for fill workload, use this BYTE value (default 255)\n"); + printf("\t[-y verify result if this switch is on]\n"); +} + +static int +parse_args(int argc, char *argv) +{ + switch (argc) { + case 'f': + g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10); + break; + case 'o': + g_xfer_size_bytes = spdk_strtol(optarg, 10); + break; + case 'P': + g_fail_percent_goal = spdk_strtol(optarg, 10); + break; + case 'q': + g_queue_depth = spdk_strtol(optarg, 10); + break; + case 's': + g_crc32c_seed = spdk_strtol(optarg, 10); + break; + case 't': + g_time_in_sec = spdk_strtol(optarg, 10); + break; + case 'y': + g_verify = true; + break; + case 'w': + g_workload_type = optarg; + if (!strcmp(g_workload_type, "copy")) { + g_workload_selection = ACCEL_COPY; + } else if (!strcmp(g_workload_type, "fill")) { + g_workload_selection = ACCEL_FILL; + } else if (!strcmp(g_workload_type, "crc32c")) { + g_workload_selection = ACCEL_CRC32C; + } else if (!strcmp(g_workload_type, "compare")) { + g_workload_selection = ACCEL_COMPARE; + } else if (!strcmp(g_workload_type, "dualcast")) { + g_workload_selection = ACCEL_DUALCAST; + } + break; + default: + usage(); + return 1; + } + return 0; +} + +static void +unregister_worker(void *arg1) +{ + struct worker_thread *worker = arg1; + struct ap_task *task; + + while (!TAILQ_EMPTY(&worker->tasks)) { + task = TAILQ_FIRST(&worker->tasks); + TAILQ_REMOVE(&worker->tasks, task, link); + free(task); + } + spdk_put_io_channel(worker->ch); + pthread_mutex_lock(&g_workers_lock); + assert(g_num_workers >= 1); + if (--g_num_workers == 0) { + pthread_mutex_unlock(&g_workers_lock); + spdk_app_stop(0); + } + pthread_mutex_unlock(&g_workers_lock); +} + +static void accel_done(void *ref, int status); + +static void +_submit_single(void *arg1, void *arg2) +{ + struct worker_thread *worker = arg1; + struct ap_task *task = arg2; + int random_num; + int rc = 0; + + assert(worker); + + task->worker = worker; + task->worker->current_queue_depth++; + switch (g_workload_selection) { + case ACCEL_COPY: + rc = spdk_accel_submit_copy(worker->ch, task->dst, task->src, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_FILL: + /* For fill use the first byte of the task->dst buffer */ + rc = spdk_accel_submit_fill(worker->ch, task->dst, *(uint8_t *)task->src, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_CRC32C: + rc = spdk_accel_submit_crc32c(worker->ch, (uint32_t *)task->dst, + task->src, g_crc32c_seed, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_COMPARE: + random_num = rand() % 100; + if (random_num < g_fail_percent_goal) { + task->expected_status = -EILSEQ; + *(uint8_t *)task->dst = ~DATA_PATTERN; + } else { + task->expected_status = 0; + *(uint8_t *)task->dst = DATA_PATTERN; + } + rc = spdk_accel_submit_compare(worker->ch, task->dst, task->src, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_DUALCAST: + rc = spdk_accel_submit_dualcast(worker->ch, task->dst, task->dst2, + task->src, g_xfer_size_bytes, accel_done, task); + break; + default: + assert(false); + break; + + } + + if (rc) { + accel_done(task, rc); + } +} + +static void +_accel_done(void *arg1) +{ + struct ap_task *task = arg1; + struct worker_thread *worker = task->worker; + uint32_t sw_crc32c; + + assert(worker); + assert(worker->current_queue_depth > 0); + + if (g_verify && task->status == 0) { + switch (g_workload_selection) { + case ACCEL_CRC32C: + /* calculate sw CRC-32C and compare to sw aceel result. */ + sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed); + if (*(uint32_t *)task->dst != sw_crc32c) { + SPDK_NOTICELOG("CRC-32C miscompare\n"); + worker->xfer_failed++; + } + break; + case ACCEL_COPY: + if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { + SPDK_NOTICELOG("Data miscompare\n"); + worker->xfer_failed++; + } + break; + case ACCEL_DUALCAST: + if (memcmp(task->src, task->dst, g_xfer_size_bytes)) { + SPDK_NOTICELOG("Data miscompare, first destination\n"); + worker->xfer_failed++; + } + if (memcmp(task->src, task->dst2, g_xfer_size_bytes)) { + SPDK_NOTICELOG("Data miscompare, second destination\n"); + worker->xfer_failed++; + } + break; + case ACCEL_FILL: + if (memcmp(task->dst, task->src, g_xfer_size_bytes)) { + SPDK_NOTICELOG("Data miscompare\n"); + worker->xfer_failed++; + } + break; + case ACCEL_COMPARE: + break; + default: + assert(false); + break; + } + } + + if (task->expected_status == -EILSEQ) { + assert(task->status != 0); + worker->injected_miscompares++; + } else if (task->status) { + /* Expected to pass but API reported error. */ + worker->xfer_failed++; + } + + worker->xfer_completed++; + worker->current_queue_depth--; + + if (!worker->is_draining) { + _submit_single(worker, task); + } else { + spdk_free(task->src); + spdk_free(task->dst); + if (g_workload_selection == ACCEL_DUALCAST) { + spdk_free(task->dst2); + } + TAILQ_INSERT_TAIL(&worker->tasks, task, link); + } +} + +static void +batch_done(void *cb_arg, int status) +{ + struct ap_task *task = (struct ap_task *)cb_arg; + struct worker_thread *worker = task->worker; + + worker->current_queue_depth--; + TAILQ_INSERT_TAIL(&worker->tasks, task, link); +} + +static int +dump_result(void) +{ + uint64_t total_completed = 0; + uint64_t total_failed = 0; + uint64_t total_miscompared = 0; + uint64_t total_xfer_per_sec, total_bw_in_MiBps; + struct worker_thread *worker = g_workers; + + printf("\nCore Transfers Bandwidth Failed Miscompares\n"); + printf("-----------------------------------------------------------------\n"); + while (worker != NULL) { + + uint64_t xfer_per_sec = worker->xfer_completed / g_time_in_sec; + uint64_t bw_in_MiBps = (worker->xfer_completed * g_xfer_size_bytes) / + (g_time_in_sec * 1024 * 1024); + + total_completed += worker->xfer_completed; + total_failed += worker->xfer_failed; + total_miscompared += worker->injected_miscompares; + + if (xfer_per_sec) { + printf("%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64 "\n", + worker->core, xfer_per_sec, + bw_in_MiBps, worker->xfer_failed, worker->injected_miscompares); + } + + worker = worker->next; + } + + total_xfer_per_sec = total_completed / g_time_in_sec; + total_bw_in_MiBps = (total_completed * g_xfer_size_bytes) / + (g_time_in_sec * 1024 * 1024); + + printf("==================================================================\n"); + printf("Total:%16" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 " %11" PRIu64"\n\n", + total_xfer_per_sec, total_bw_in_MiBps, total_failed, total_miscompared); + + return total_failed ? 1 : 0; +} + +static int +_check_draining(void *arg) +{ + struct worker_thread *worker = arg; + + assert(worker); + + if (worker->current_queue_depth == 0) { + spdk_poller_unregister(&worker->is_draining_poller); + unregister_worker(worker); + } + + return -1; +} + +static int +_worker_stop(void *arg) +{ + struct worker_thread *worker = arg; + + assert(worker); + + spdk_poller_unregister(&worker->stop_poller); + + /* now let the worker drain and check it's outstanding IO with a poller */ + worker->is_draining = true; + worker->is_draining_poller = SPDK_POLLER_REGISTER(_check_draining, worker, 0); + + return 0; +} + +static void +_init_thread_done(void *ctx) +{ +} + +static int +_get_task_data_bufs(struct ap_task *task) +{ + uint32_t align = 0; + + /* For dualcast, the DSA HW requires 4K alignment on destination addresses but + * we do this for all engines to keep it simple. + */ + if (g_workload_selection == ACCEL_DUALCAST) { + align = ALIGN_4K; + } + + task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); + if (task->src == NULL) { + fprintf(stderr, "Unable to alloc src buffer\n"); + return -ENOMEM; + } + memset(task->src, DATA_PATTERN, g_xfer_size_bytes); + + task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); + if (task->dst == NULL) { + fprintf(stderr, "Unable to alloc dst buffer\n"); + return -ENOMEM; + } + + /* For compare we want the buffers to match, otherwise not. */ + if (g_workload_selection == ACCEL_COMPARE) { + memset(task->dst, DATA_PATTERN, g_xfer_size_bytes); + } else { + memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes); + } + + /* For fill, set the entire src buffer so we can check if verify is enabled. */ + if (g_workload_selection == ACCEL_FILL) { + memset(task->src, g_fill_pattern, g_xfer_size_bytes); + } + + if (g_workload_selection == ACCEL_DUALCAST) { + task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); + if (task->dst2 == NULL) { + fprintf(stderr, "Unable to alloc dst buffer\n"); + return -ENOMEM; + } + memset(task->dst2, ~DATA_PATTERN, g_xfer_size_bytes); + } + + return 0; +} + +static int +_batch_prep_cmd(struct worker_thread *worker, struct ap_task *task, struct spdk_accel_batch *batch) +{ + int rc = 0; + + switch (g_workload_selection) { + case ACCEL_COPY: + rc = spdk_accel_batch_prep_copy(worker->ch, batch, task->dst, + task->src, g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_DUALCAST: + rc = spdk_accel_batch_prep_dualcast(worker->ch, batch, task->dst, task->dst2, + task->src, g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_COMPARE: + rc = spdk_accel_batch_prep_compare(worker->ch, batch, task->dst, task->src, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_FILL: + rc = spdk_accel_batch_prep_fill(worker->ch, batch, task->dst, + *(uint8_t *)task->src, + g_xfer_size_bytes, accel_done, task); + break; + case ACCEL_CRC32C: + rc = spdk_accel_batch_prep_crc32c(worker->ch, batch, (uint32_t *)task->dst, + task->src, g_crc32c_seed, g_xfer_size_bytes, accel_done, task); + break; + default: + assert(false); + break; + } + + return rc; +} + +static void +_init_thread(void *arg1) +{ + struct worker_thread *worker; + struct ap_task *task; + int i, rc, max_per_batch, batch_count, num_tasks; + int remaining = g_queue_depth; + struct spdk_accel_batch *batch, *new_batch; + + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return; + } + + worker->core = spdk_env_get_current_core(); + worker->thread = spdk_get_thread(); + worker->next = g_workers; + worker->ch = spdk_accel_engine_get_io_channel(); + + max_per_batch = spdk_accel_batch_get_max(worker->ch); + assert(max_per_batch > 0); + num_tasks = g_queue_depth + spdk_divide_round_up(g_queue_depth, max_per_batch); + + TAILQ_INIT(&worker->tasks); + for (i = 0; i < num_tasks; i++) { + task = calloc(1, sizeof(struct ap_task)); + if (task == NULL) { + fprintf(stderr, "Could not allocate task.\n"); + return; + /* TODO cleanup */ + } + TAILQ_INSERT_TAIL(&worker->tasks, task, link); + } + + /* Register a poller that will stop the worker at time elapsed */ + worker->stop_poller = SPDK_POLLER_REGISTER(_worker_stop, worker, + g_time_in_sec * 1000000ULL); + + g_workers = worker; + pthread_mutex_lock(&g_workers_lock); + g_num_workers++; + pthread_mutex_unlock(&g_workers_lock); + + /* Batching is only possible if there is at least 2 operations. */ + if (g_queue_depth > 1) { + + /* Outter loop sets up each batch command, inner loop populates the + * batch descriptors. + */ + do { + new_batch = spdk_accel_batch_create(worker->ch); + if (new_batch == NULL) { + break; + } + + batch = new_batch; + batch_count = 0; + + do { + if (!TAILQ_EMPTY(&worker->tasks)) { + task = TAILQ_FIRST(&worker->tasks); + TAILQ_REMOVE(&worker->tasks, task, link); + } else { + fprintf(stderr, "Unable to get accel_task\n"); + goto error; + } + task->worker = worker; + task->worker->current_queue_depth++; + + if (_get_task_data_bufs(task)) { + fprintf(stderr, "Unable to get data bufs\n"); + goto error; + } + + rc = _batch_prep_cmd(worker, task, batch); + if (rc) { + fprintf(stderr, "error preping command\n"); + goto error; + } + remaining--; + batch_count++; + } while (batch_count < max_per_batch && remaining > 0); + + /* Now send the batch command. */ + if (!TAILQ_EMPTY(&worker->tasks)) { + task = TAILQ_FIRST(&worker->tasks); + TAILQ_REMOVE(&worker->tasks, task, link); + } else { + fprintf(stderr, "Unable to get accel_task\n"); + goto error; + } + task->worker = worker; + task->worker->current_queue_depth++; + + rc = spdk_accel_batch_submit(worker->ch, batch, batch_done, task); + if (rc) { + fprintf(stderr, "error ending batch %d\n", rc); + goto error; + } + /* We can't build a batch unless it has 2 descriptors (per spec). */ + } while (remaining > 1); + + /* If there are no more left, we're done. */ + if (remaining == 0) { + return; + } + } + + /* For engines that don't support batch or for the odd event that + * a batch ends with only one descriptor left. + */ + for (i = 0; i < remaining; i++) { + + if (!TAILQ_EMPTY(&worker->tasks)) { + task = TAILQ_FIRST(&worker->tasks); + TAILQ_REMOVE(&worker->tasks, task, link); + } else { + fprintf(stderr, "Unable to get accel_task\n"); + goto error; + } + + if (_get_task_data_bufs(task)) { + fprintf(stderr, "Unable to get data bufs\n"); + goto error; + } + + _submit_single(worker, task); + } + return; +error: + /* TODO clean exit */ + raise(SIGINT); + while (!TAILQ_EMPTY(&worker->tasks)) { + task = TAILQ_FIRST(&worker->tasks); + TAILQ_REMOVE(&worker->tasks, task, link); + free(task); + } + free(worker); + spdk_app_stop(-1); +} + +static void +accel_done(void *cb_arg, int status) +{ + struct ap_task *task = (struct ap_task *)cb_arg; + struct worker_thread *worker = task->worker; + + assert(worker); + + task->status = status; + spdk_thread_send_msg(worker->thread, _accel_done, task); +} + +static void +accel_perf_start(void *arg1) +{ + struct spdk_io_channel *accel_ch; + + accel_ch = spdk_accel_engine_get_io_channel(); + g_capabilites = spdk_accel_get_capabilities(accel_ch); + spdk_put_io_channel(accel_ch); + + if ((g_capabilites & g_workload_selection) != g_workload_selection) { + SPDK_WARNLOG("The selected workload is not natively supported by the current engine\n"); + SPDK_WARNLOG("The software engine will be used instead.\n\n"); + } + + g_tsc_rate = spdk_get_ticks_hz(); + g_tsc_us_rate = g_tsc_rate / (1000 * 1000); + g_tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + printf("Running for %d seconds...\n", g_time_in_sec); + fflush(stdout); + + spdk_for_each_thread(_init_thread, NULL, _init_thread_done); +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + struct worker_thread *worker, *tmp; + int rc = 0; + + pthread_mutex_init(&g_workers_lock, NULL); + spdk_app_opts_init(&opts); + opts.reactor_mask = "0x1"; + if ((rc = spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:", NULL, parse_args, + usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { + rc = -1; + goto cleanup; + } + + if ((g_workload_selection != ACCEL_COPY) && + (g_workload_selection != ACCEL_FILL) && + (g_workload_selection != ACCEL_CRC32C) && + (g_workload_selection != ACCEL_COMPARE) && + (g_workload_selection != ACCEL_DUALCAST)) { + usage(); + rc = -1; + goto cleanup; + } + + dump_user_config(&opts); + rc = spdk_app_start(&opts, accel_perf_start, NULL); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + } else { + dump_result(); + } + + pthread_mutex_destroy(&g_workers_lock); + + worker = g_workers; + while (worker) { + tmp = worker->next; + free(worker); + worker = tmp; + } +cleanup: + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/bdev/Makefile b/src/spdk/examples/bdev/Makefile new file mode 100644 index 000000000..dc1f52213 --- /dev/null +++ b/src/spdk/examples/bdev/Makefile @@ -0,0 +1,48 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-$(CONFIG_FIO_PLUGIN) = fio_plugin +DIRS-y += hello_world + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) + @: + +clean: $(DIRS-y) + @: + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/bdev/fio_plugin/.gitignore b/src/spdk/examples/bdev/fio_plugin/.gitignore new file mode 100644 index 000000000..1b0b36ac4 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/src/spdk/examples/bdev/fio_plugin/Makefile b/src/spdk/examples/bdev/fio_plugin/Makefile new file mode 100644 index 000000000..e50498d24 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/Makefile @@ -0,0 +1,48 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +FIO_PLUGIN := spdk_bdev + +C_SRCS = fio_plugin.c + +# Unable to combine the FIO plugin and the VPP socket abstraction (license incompatibility) +SPDK_LIB_LIST = $(filter-out sock_vpp,$(ALL_MODULES_LIST)) +SPDK_LIB_LIST += thread util bdev bdev_rpc conf accel rpc jsonrpc json log sock trace notify +SPDK_LIB_LIST += event $(EVENT_BDEV_SUBSYSTEM) + +include $(SPDK_ROOT_DIR)/mk/spdk.fio.mk diff --git a/src/spdk/examples/bdev/fio_plugin/README.md b/src/spdk/examples/bdev/fio_plugin/README.md new file mode 100644 index 000000000..369756fe3 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/README.md @@ -0,0 +1,74 @@ +# Introduction + +This directory contains a plug-in module for fio to enable use +with SPDK. Fio is free software published under version 2 of +the GPL license. + +# Compiling fio + +Clone the fio source repository from https://github.com/axboe/fio + + git clone https://github.com/axboe/fio + cd fio + +Compile the fio code and install: + + make + make install + +# Compiling SPDK + +Clone the SPDK source repository from https://github.com/spdk/spdk + + git clone https://github.com/spdk/spdk + cd spdk + git submodule update --init + +Then, run the SPDK configure script to enable fio (point it to the root of the fio repository): + + cd spdk + ./configure --with-fio=/path/to/fio/repo <other configuration options> + +Finally, build SPDK: + + make + +**Note to advanced users**: These steps assume you're using the DPDK submodule. If you are using your +own version of DPDK, the fio plugin requires that DPDK be compiled with -fPIC. You can compile DPDK +with -fPIC by modifying your DPDK configuration file and adding the line: + + EXTRA_CFLAGS=-fPIC + +# Usage + +To use the SPDK fio plugin with fio, specify the plugin binary using LD_PRELOAD when running +fio and set ioengine=spdk_bdev in the fio configuration file (see example_config.fio in the same +directory as this README). + + LD_PRELOAD=<path to spdk repo>/build/fio/spdk_bdev fio + +The fio configuration file must contain one new parameter: + + spdk_conf=./examples/bdev/fio_plugin/bdev.conf + +This must point at an SPDK configuration file. There are a number of example configuration +files in the SPDK repository under etc/spdk. + +You can specify which block device to run against by setting the filename parameter +to the block device name: + + filename=Malloc0 + +Or for NVMe devices: + + filename=Nvme0n1 + +Currently the SPDK fio plugin is limited to the thread usage model, so fio jobs must also specify thread=1 +when using the SPDK fio plugin. + +fio also currently has a race condition on shutdown if dynamically loading the ioengine by specifying the +engine's full path via the ioengine parameter - LD_PRELOAD is recommended to avoid this race condition. + +When testing random workloads, it is recommended to set norandommap=1. fio's random map +processing consumes extra CPU cycles which will degrade performance over time with +the fio_plugin since all I/O are submitted and completed on a single CPU core. diff --git a/src/spdk/examples/bdev/fio_plugin/bdev.conf.in b/src/spdk/examples/bdev/fio_plugin/bdev.conf.in new file mode 100644 index 000000000..948cebe33 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/bdev.conf.in @@ -0,0 +1,3 @@ +[Malloc] + NumberOfLuns 1 + LunSizeInMB 128 diff --git a/src/spdk/examples/bdev/fio_plugin/example_config.fio b/src/spdk/examples/bdev/fio_plugin/example_config.fio new file mode 100644 index 000000000..3a35432e9 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/example_config.fio @@ -0,0 +1,16 @@ +[global] +ioengine=spdk_bdev +spdk_conf=./examples/bdev/fio_plugin/bdev.conf.in +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=4k + +[test] +numjobs=1 diff --git a/src/spdk/examples/bdev/fio_plugin/fio_plugin.c b/src/spdk/examples/bdev/fio_plugin/fio_plugin.c new file mode 100644 index 000000000..4f50cfb01 --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/fio_plugin.c @@ -0,0 +1,838 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/accel_engine.h" +#include "spdk/conf.h" +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/queue.h" +#include "spdk/util.h" + +#include "spdk_internal/thread.h" +#include "spdk_internal/event.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +/* FreeBSD is missing CLOCK_MONOTONIC_RAW, + * so alternative is provided. */ +#ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC +#endif + +struct spdk_fio_options { + void *pad; + char *conf; + char *json_conf; + unsigned mem_mb; + bool mem_single_seg; +}; + +struct spdk_fio_request { + struct io_u *io; + struct thread_data *td; +}; + +struct spdk_fio_target { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + + TAILQ_ENTRY(spdk_fio_target) link; +}; + +struct spdk_fio_thread { + struct thread_data *td; /* fio thread context */ + struct spdk_thread *thread; /* spdk thread context */ + + TAILQ_HEAD(, spdk_fio_target) targets; + bool failed; /* true if the thread failed to initialize */ + + struct io_u **iocq; /* io completion queue */ + unsigned int iocq_count; /* number of iocq entries filled by last getevents */ + unsigned int iocq_size; /* number of iocq entries allocated */ +}; + +static bool g_spdk_env_initialized = false; +static const char *g_json_config_file = NULL; + +static int spdk_fio_init(struct thread_data *td); +static void spdk_fio_cleanup(struct thread_data *td); +static size_t spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread); + +/* Default polling timeout (ns) */ +#define SPDK_FIO_POLLING_TIMEOUT 1000000000ULL + +static int +spdk_fio_init_thread(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + + fio_thread = calloc(1, sizeof(*fio_thread)); + if (!fio_thread) { + SPDK_ERRLOG("failed to allocate thread local context\n"); + return -1; + } + + fio_thread->td = td; + td->io_ops_data = fio_thread; + + fio_thread->thread = spdk_thread_create("fio_thread", NULL); + if (!fio_thread->thread) { + free(fio_thread); + SPDK_ERRLOG("failed to allocate thread\n"); + return -1; + } + spdk_set_thread(fio_thread->thread); + + fio_thread->iocq_size = td->o.iodepth; + fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + TAILQ_INIT(&fio_thread->targets); + + return 0; +} + +static void +spdk_fio_bdev_close_targets(void *arg) +{ + struct spdk_fio_thread *fio_thread = arg; + struct spdk_fio_target *target, *tmp; + + TAILQ_FOREACH_SAFE(target, &fio_thread->targets, link, tmp) { + TAILQ_REMOVE(&fio_thread->targets, target, link); + spdk_put_io_channel(target->ch); + spdk_bdev_close(target->desc); + free(target); + } +} + +static void +spdk_fio_cleanup_thread(struct spdk_fio_thread *fio_thread) +{ + spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_close_targets, fio_thread); + + while (!spdk_thread_is_idle(fio_thread->thread)) { + spdk_fio_poll_thread(fio_thread); + } + + spdk_set_thread(fio_thread->thread); + + spdk_thread_exit(fio_thread->thread); + while (!spdk_thread_is_exited(fio_thread->thread)) { + spdk_thread_poll(fio_thread->thread, 0, 0); + } + spdk_thread_destroy(fio_thread->thread); + free(fio_thread->iocq); + free(fio_thread); +} + +static void +spdk_fio_calc_timeout(struct spdk_fio_thread *fio_thread, struct timespec *ts) +{ + uint64_t timeout, now; + + if (spdk_thread_has_active_pollers(fio_thread->thread)) { + return; + } + + timeout = spdk_thread_next_poller_expiration(fio_thread->thread); + now = spdk_get_ticks(); + + if (timeout == 0) { + timeout = now + (SPDK_FIO_POLLING_TIMEOUT * spdk_get_ticks_hz()) / SPDK_SEC_TO_NSEC; + } + + if (timeout > now) { + timeout = ((timeout - now) * SPDK_SEC_TO_NSEC) / spdk_get_ticks_hz() + + ts->tv_sec * SPDK_SEC_TO_NSEC + ts->tv_nsec; + + ts->tv_sec = timeout / SPDK_SEC_TO_NSEC; + ts->tv_nsec = timeout % SPDK_SEC_TO_NSEC; + } +} + +static pthread_t g_init_thread_id = 0; +static pthread_mutex_t g_init_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t g_init_cond; +static bool g_poll_loop = true; + +static void +spdk_fio_bdev_init_done(int rc, void *cb_arg) +{ + *(bool *)cb_arg = true; +} + +static void +spdk_fio_bdev_init_start(void *arg) +{ + bool *done = arg; + + if (g_json_config_file != NULL) { + spdk_app_json_config_load(g_json_config_file, SPDK_DEFAULT_RPC_ADDR, + spdk_fio_bdev_init_done, done, true); + } else { + spdk_subsystem_init(spdk_fio_bdev_init_done, done); + } +} + +static void +spdk_fio_bdev_fini_done(void *cb_arg) +{ + *(bool *)cb_arg = true; +} + +static void +spdk_fio_bdev_fini_start(void *arg) +{ + bool *done = arg; + + spdk_subsystem_fini(spdk_fio_bdev_fini_done, done); +} + +static void * +spdk_init_thread_poll(void *arg) +{ + struct spdk_fio_options *eo = arg; + struct spdk_fio_thread *fio_thread; + struct spdk_conf *config = NULL; + struct spdk_env_opts opts; + bool done; + int rc; + struct timespec ts; + struct thread_data td = {}; + + /* Create a dummy thread data for use on the initialization thread. */ + td.o.iodepth = 32; + td.eo = eo; + + /* Parse the SPDK configuration file */ + eo = arg; + + if (eo->conf && eo->json_conf) { + SPDK_ERRLOG("Cannot provide two types of configuration files\n"); + rc = EINVAL; + goto err_exit; + } else if (eo->conf && strlen(eo->conf)) { + config = spdk_conf_allocate(); + if (!config) { + SPDK_ERRLOG("Unable to allocate configuration file\n"); + rc = ENOMEM; + goto err_exit; + } + + rc = spdk_conf_read(config, eo->conf); + if (rc != 0) { + SPDK_ERRLOG("Invalid configuration file format\n"); + spdk_conf_free(config); + goto err_exit; + } + if (spdk_conf_first_section(config) == NULL) { + SPDK_ERRLOG("Invalid configuration file format\n"); + spdk_conf_free(config); + rc = EINVAL; + goto err_exit; + } + spdk_conf_set_as_default(config); + } else if (eo->json_conf && strlen(eo->json_conf)) { + g_json_config_file = eo->json_conf; + } else { + SPDK_ERRLOG("No configuration file provided\n"); + rc = EINVAL; + goto err_exit; + } + + /* Initialize the environment library */ + spdk_env_opts_init(&opts); + opts.name = "fio"; + + if (eo->mem_mb) { + opts.mem_size = eo->mem_mb; + } + opts.hugepage_single_segments = eo->mem_single_seg; + + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + spdk_conf_free(config); + rc = EINVAL; + goto err_exit; + } + spdk_unaffinitize_thread(); + + spdk_thread_lib_init(NULL, 0); + + /* Create an SPDK thread temporarily */ + rc = spdk_fio_init_thread(&td); + if (rc < 0) { + SPDK_ERRLOG("Failed to create initialization thread\n"); + goto err_exit; + } + + fio_thread = td.io_ops_data; + + /* Initialize the bdev layer */ + done = false; + spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_init_start, &done); + + do { + spdk_fio_poll_thread(fio_thread); + } while (!done); + + /* + * Continue polling until there are no more events. + * This handles any final events posted by pollers. + */ + while (spdk_fio_poll_thread(fio_thread) > 0) {}; + + /* Set condition variable */ + pthread_mutex_lock(&g_init_mtx); + pthread_cond_signal(&g_init_cond); + + while (g_poll_loop) { + spdk_fio_poll_thread(fio_thread); + + clock_gettime(CLOCK_MONOTONIC, &ts); + spdk_fio_calc_timeout(fio_thread, &ts); + + rc = pthread_cond_timedwait(&g_init_cond, &g_init_mtx, &ts); + if (rc != ETIMEDOUT) { + break; + } + } + + pthread_mutex_unlock(&g_init_mtx); + + /* Finalize the bdev layer */ + done = false; + spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_fini_start, &done); + + do { + spdk_fio_poll_thread(fio_thread); + } while (!done && !spdk_thread_is_idle(fio_thread->thread)); + + spdk_fio_cleanup_thread(fio_thread); + + pthread_exit(NULL); + +err_exit: + exit(rc); + return NULL; +} + +static int +spdk_fio_init_env(struct thread_data *td) +{ + pthread_condattr_t attr; + int rc = -1; + + if (pthread_condattr_init(&attr)) { + SPDK_ERRLOG("Unable to initialize condition variable\n"); + return -1; + } + + if (pthread_condattr_setclock(&attr, CLOCK_MONOTONIC)) { + SPDK_ERRLOG("Unable to initialize condition variable\n"); + goto out; + } + + if (pthread_cond_init(&g_init_cond, &attr)) { + SPDK_ERRLOG("Unable to initialize condition variable\n"); + goto out; + } + + /* + * Spawn a thread to handle initialization operations and to poll things + * like the admin queues periodically. + */ + rc = pthread_create(&g_init_thread_id, NULL, &spdk_init_thread_poll, td->eo); + if (rc != 0) { + SPDK_ERRLOG("Unable to spawn thread to poll admin queue. It won't be polled.\n"); + } + + /* Wait for background thread to advance past the initialization */ + pthread_mutex_lock(&g_init_mtx); + pthread_cond_wait(&g_init_cond, &g_init_mtx); + pthread_mutex_unlock(&g_init_mtx); +out: + pthread_condattr_destroy(&attr); + return rc; +} + +/* Called for each thread to fill in the 'real_file_size' member for + * each file associated with this thread. This is called prior to + * the init operation (spdk_fio_init()) below. This call will occur + * on the initial start up thread if 'create_serialize' is true, or + * on the thread actually associated with 'thread_data' if 'create_serialize' + * is false. + */ +static int +spdk_fio_setup(struct thread_data *td) +{ + unsigned int i; + struct fio_file *f; + + /* we might be running in a daemonized FIO instance where standard + * input and output were closed and fds 0, 1, and 2 are reused + * for something important by FIO. We can't ensure we won't print + * anything (and so will our dependencies, e.g. DPDK), so abort early. + * (is_backend is an fio global variable) + */ + if (is_backend) { + char buf[1024]; + snprintf(buf, sizeof(buf), + "SPDK FIO plugin won't work with daemonized FIO server."); + fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf)); + return -1; + } + + if (!td->o.use_thread) { + SPDK_ERRLOG("must set thread=1 when using spdk plugin\n"); + return -1; + } + + if (!g_spdk_env_initialized) { + if (spdk_fio_init_env(td)) { + SPDK_ERRLOG("failed to initialize\n"); + return -1; + } + + g_spdk_env_initialized = true; + } + + if (td->o.nr_files == 1 && strcmp(td->files[0]->file_name, "*") == 0) { + struct spdk_bdev *bdev; + + /* add all available bdevs as fio targets */ + for (bdev = spdk_bdev_first_leaf(); bdev; bdev = spdk_bdev_next_leaf(bdev)) { + add_file(td, spdk_bdev_get_name(bdev), 0, 1); + } + } + + for_each_file(td, f, i) { + struct spdk_bdev *bdev; + + if (strcmp(f->file_name, "*") == 0) { + continue; + } + + bdev = spdk_bdev_get_by_name(f->file_name); + if (!bdev) { + SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name); + return -1; + } + + f->real_file_size = spdk_bdev_get_num_blocks(bdev) * + spdk_bdev_get_block_size(bdev); + + } + + return 0; +} + +static void +spdk_fio_bdev_open(void *arg) +{ + struct thread_data *td = arg; + struct spdk_fio_thread *fio_thread; + unsigned int i; + struct fio_file *f; + int rc; + + fio_thread = td->io_ops_data; + + for_each_file(td, f, i) { + struct spdk_fio_target *target; + + if (strcmp(f->file_name, "*") == 0) { + continue; + } + + target = calloc(1, sizeof(*target)); + if (!target) { + SPDK_ERRLOG("Unable to allocate memory for I/O target.\n"); + fio_thread->failed = true; + return; + } + + target->bdev = spdk_bdev_get_by_name(f->file_name); + if (!target->bdev) { + SPDK_ERRLOG("Unable to find bdev with name %s\n", f->file_name); + free(target); + fio_thread->failed = true; + return; + } + + rc = spdk_bdev_open(target->bdev, true, NULL, NULL, &target->desc); + if (rc) { + SPDK_ERRLOG("Unable to open bdev %s\n", f->file_name); + free(target); + fio_thread->failed = true; + return; + } + + target->ch = spdk_bdev_get_io_channel(target->desc); + if (!target->ch) { + SPDK_ERRLOG("Unable to get I/O channel for bdev.\n"); + spdk_bdev_close(target->desc); + free(target); + fio_thread->failed = true; + return; + } + + f->engine_data = target; + + TAILQ_INSERT_TAIL(&fio_thread->targets, target, link); + } +} + +/* Called for each thread, on that thread, shortly after the thread + * starts. + */ +static int +spdk_fio_init(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + + spdk_fio_init_thread(td); + + fio_thread = td->io_ops_data; + fio_thread->failed = false; + + spdk_thread_send_msg(fio_thread->thread, spdk_fio_bdev_open, td); + + while (spdk_fio_poll_thread(fio_thread) > 0) {} + + if (fio_thread->failed) { + return -1; + } + + return 0; +} + +static void +spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + spdk_fio_cleanup_thread(fio_thread); + td->io_ops_data = NULL; +} + +static int +spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + + return 0; +} + +static int +spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int +spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = spdk_dma_zmalloc(total_mem, 0x1000, NULL); + return td->orig_buffer == NULL; +} + +static void +spdk_fio_iomem_free(struct thread_data *td) +{ + spdk_dma_free(td->orig_buffer); +} + +static int +spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req; + + io_u->engine_data = NULL; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + fio_req->io = io_u; + fio_req->td = td; + + io_u->engine_data = fio_req; + + return 0; +} + +static void +spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static void +spdk_fio_completion_cb(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg) +{ + struct spdk_fio_request *fio_req = cb_arg; + struct thread_data *td = fio_req->td; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(fio_thread->iocq_count < fio_thread->iocq_size); + fio_req->io->error = success ? 0 : EIO; + fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; + + spdk_bdev_free_io(bdev_io); +} + +#if FIO_IOOPS_VERSION >= 24 +typedef enum fio_q_status fio_q_status_t; +#else +typedef int fio_q_status_t; +#endif + +static fio_q_status_t +spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_target *target = io_u->file->engine_data; + + assert(fio_req->td == td); + + if (!target) { + SPDK_ERRLOG("Unable to look up correct I/O target.\n"); + fio_req->io->error = ENODEV; + return FIO_Q_COMPLETED; + } + + switch (io_u->ddir) { + case DDIR_READ: + rc = spdk_bdev_read(target->desc, target->ch, + io_u->buf, io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + case DDIR_WRITE: + rc = spdk_bdev_write(target->desc, target->ch, + io_u->buf, io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + case DDIR_TRIM: + rc = spdk_bdev_unmap(target->desc, target->ch, + io_u->offset, io_u->xfer_buflen, + spdk_fio_completion_cb, fio_req); + break; + default: + assert(false); + break; + } + + if (rc == -ENOMEM) { + return FIO_Q_BUSY; + } + + if (rc != 0) { + fio_req->io->error = abs(rc); + return FIO_Q_COMPLETED; + } + + return FIO_Q_QUEUED; +} + +static struct io_u * +spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(event >= 0); + assert((unsigned)event < fio_thread->iocq_count); + return fio_thread->iocq[event]; +} + +static size_t +spdk_fio_poll_thread(struct spdk_fio_thread *fio_thread) +{ + return spdk_thread_poll(fio_thread->thread, 0, 0); +} + +static int +spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * SPDK_SEC_TO_NSEC + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->iocq_count = 0; + + for (;;) { + spdk_fio_poll_thread(fio_thread); + + if (fio_thread->iocq_count >= min) { + return fio_thread->iocq_count; + } + + if (t) { + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + uint64_t elapse = ((t1.tv_sec - t0.tv_sec) * SPDK_SEC_TO_NSEC) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + return fio_thread->iocq_count; +} + +static int +spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static struct fio_option options[] = { + { + .name = "spdk_conf", + .lname = "SPDK configuration file", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, conf), + .help = "A SPDK configuration file", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "spdk_json_conf", + .lname = "SPDK JSON configuration file", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, json_conf), + .help = "A SPDK JSON configuration file", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "spdk_mem", + .lname = "SPDK memory in MB", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, mem_mb), + .help = "Amount of memory in MB to allocate for SPDK", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "spdk_single_seg", + .lname = "SPDK switch to create just a single hugetlbfs file", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct spdk_fio_options, mem_single_seg), + .help = "If set to 1, SPDK will use just a single hugetlbfs file", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = NULL, + }, +}; + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk_bdev", + .version = FIO_IOOPS_VERSION, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, + .setup = spdk_fio_setup, + .init = spdk_fio_init, + /* .prep = unused, */ + .queue = spdk_fio_queue, + /* .commit = unused, */ + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + /* .errdetails = unused, */ + /* .cancel = unused, */ + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + /* .unlink_file = unused, */ + /* .get_file_size = unused, */ + /* .terminate = unused, */ + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .option_struct_size = sizeof(struct spdk_fio_options), + .options = options, +}; + +static void fio_init spdk_fio_register(void) +{ + register_ioengine(&ioengine); +} + +static void +spdk_fio_finish_env(void) +{ + pthread_mutex_lock(&g_init_mtx); + g_poll_loop = false; + pthread_cond_signal(&g_init_cond); + pthread_mutex_unlock(&g_init_mtx); + pthread_join(g_init_thread_id, NULL); + + spdk_thread_lib_fini(); +} + +static void fio_exit spdk_fio_unregister(void) +{ + if (g_spdk_env_initialized) { + spdk_fio_finish_env(); + g_spdk_env_initialized = false; + } + unregister_ioengine(&ioengine); +} diff --git a/src/spdk/examples/bdev/fio_plugin/full_bench.fio b/src/spdk/examples/bdev/fio_plugin/full_bench.fio new file mode 100644 index 000000000..f76da18db --- /dev/null +++ b/src/spdk/examples/bdev/fio_plugin/full_bench.fio @@ -0,0 +1,20 @@ +[global] +ioengine=spdk_bdev +spdk_conf=./examples/bdev/fio_plugin/bdev.conf.in +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +percentile_list=50:99:99.9:99.99:99.999 + +[4k_randread_qd1] +filename=Malloc0 +description="4KiB Random Read QD=1" +bs=4k +rw=randread +iodepth=1 +time_based=1 +ramp_time=0 +runtime=10 diff --git a/src/spdk/examples/bdev/hello_world/.gitignore b/src/spdk/examples/bdev/hello_world/.gitignore new file mode 100644 index 000000000..7bdf93936 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/.gitignore @@ -0,0 +1 @@ +hello_bdev diff --git a/src/spdk/examples/bdev/hello_world/Makefile b/src/spdk/examples/bdev/hello_world/Makefile new file mode 100644 index 000000000..f4a5a5b69 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/Makefile @@ -0,0 +1,44 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_bdev + +C_SRCS := hello_bdev.c + +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += bdev_rpc bdev accel event thread util conf trace log jsonrpc json rpc sock notify + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/bdev/hello_world/bdev.conf b/src/spdk/examples/bdev/hello_world/bdev.conf new file mode 100644 index 000000000..80af878f8 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/bdev.conf @@ -0,0 +1,3 @@ +[Malloc] + NumberOfLuns 1 + LunSizeInMB 32 diff --git a/src/spdk/examples/bdev/hello_world/hello_bdev.c b/src/spdk/examples/bdev/hello_world/hello_bdev.c new file mode 100644 index 000000000..565005212 --- /dev/null +++ b/src/spdk/examples/bdev/hello_world/hello_bdev.c @@ -0,0 +1,295 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/bdev_module.h" + +static char *g_bdev_name = "Malloc0"; + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *bdev_io_channel; + char *buff; + char *bdev_name; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +/* + * Usage function for printing parameters that are specific to this application + */ +static void +hello_bdev_usage(void) +{ + printf(" -b <bdev> name of the bdev to use\n"); +} + +/* + * This function is called to parse the parameters that are specific to this application + */ +static int hello_bdev_parse_arg(int ch, char *arg) +{ + switch (ch) { + case 'b': + g_bdev_name = arg; + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * Callback function for read io completion. + */ +static void +read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct hello_context_t *hello_context = cb_arg; + + if (success) { + SPDK_NOTICELOG("Read string from bdev : %s\n", hello_context->buff); + } else { + SPDK_ERRLOG("bdev io read error\n"); + } + + /* Complete the bdev io and close the channel */ + spdk_bdev_free_io(bdev_io); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + SPDK_NOTICELOG("Stopping app\n"); + spdk_app_stop(success ? 0 : -1); +} + +static void +hello_read(void *arg) +{ + struct hello_context_t *hello_context = arg; + int rc = 0; + uint32_t length = spdk_bdev_get_block_size(hello_context->bdev); + + SPDK_NOTICELOG("Reading io\n"); + rc = spdk_bdev_read(hello_context->bdev_desc, hello_context->bdev_io_channel, + hello_context->buff, 0, length, read_complete, hello_context); + + if (rc == -ENOMEM) { + SPDK_NOTICELOG("Queueing io\n"); + /* In case we cannot perform I/O now, queue I/O */ + hello_context->bdev_io_wait.bdev = hello_context->bdev; + hello_context->bdev_io_wait.cb_fn = hello_read; + hello_context->bdev_io_wait.cb_arg = hello_context; + spdk_bdev_queue_io_wait(hello_context->bdev, hello_context->bdev_io_channel, + &hello_context->bdev_io_wait); + } else if (rc) { + SPDK_ERRLOG("%s error while reading from bdev: %d\n", spdk_strerror(-rc), rc); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + } +} + +/* + * Callback function for write io completion. + */ +static void +write_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct hello_context_t *hello_context = cb_arg; + uint32_t length; + + /* Complete the I/O */ + spdk_bdev_free_io(bdev_io); + + if (success) { + SPDK_NOTICELOG("bdev io write completed successfully\n"); + } else { + SPDK_ERRLOG("bdev io write error: %d\n", EIO); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + + /* Zero the buffer so that we can use it for reading */ + length = spdk_bdev_get_block_size(hello_context->bdev); + memset(hello_context->buff, 0, length); + + hello_read(hello_context); +} + +static void +hello_write(void *arg) +{ + struct hello_context_t *hello_context = arg; + int rc = 0; + uint32_t length = spdk_bdev_get_block_size(hello_context->bdev); + + SPDK_NOTICELOG("Writing to the bdev\n"); + rc = spdk_bdev_write(hello_context->bdev_desc, hello_context->bdev_io_channel, + hello_context->buff, 0, length, write_complete, hello_context); + + if (rc == -ENOMEM) { + SPDK_NOTICELOG("Queueing io\n"); + /* In case we cannot perform I/O now, queue I/O */ + hello_context->bdev_io_wait.bdev = hello_context->bdev; + hello_context->bdev_io_wait.cb_fn = hello_write; + hello_context->bdev_io_wait.cb_arg = hello_context; + spdk_bdev_queue_io_wait(hello_context->bdev, hello_context->bdev_io_channel, + &hello_context->bdev_io_wait); + } else if (rc) { + SPDK_ERRLOG("%s error while writing to bdev: %d\n", spdk_strerror(-rc), rc); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + } +} + +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1) +{ + struct hello_context_t *hello_context = arg1; + uint32_t blk_size, buf_align; + int rc = 0; + hello_context->bdev = NULL; + hello_context->bdev_desc = NULL; + + SPDK_NOTICELOG("Successfully started the application\n"); + + /* + * Get the bdev. There can be many bdevs configured, but this + * application will only use the one input by the user at runtime so + * we get it via its name. + */ + hello_context->bdev = spdk_bdev_get_by_name(hello_context->bdev_name); + if (hello_context->bdev == NULL) { + SPDK_ERRLOG("Could not find the bdev: %s\n", hello_context->bdev_name); + spdk_app_stop(-1); + return; + } + + /* + * Open the bdev by calling spdk_bdev_open() + * The function will return a descriptor + */ + SPDK_NOTICELOG("Opening the bdev %s\n", hello_context->bdev_name); + rc = spdk_bdev_open(hello_context->bdev, true, NULL, NULL, &hello_context->bdev_desc); + if (rc) { + SPDK_ERRLOG("Could not open bdev: %s\n", hello_context->bdev_name); + spdk_app_stop(-1); + return; + } + + SPDK_NOTICELOG("Opening io channel\n"); + /* Open I/O channel */ + hello_context->bdev_io_channel = spdk_bdev_get_io_channel(hello_context->bdev_desc); + if (hello_context->bdev_io_channel == NULL) { + SPDK_ERRLOG("Could not create bdev I/O channel!!\n"); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + + /* Allocate memory for the write buffer. + * Initialize the write buffer with the string "Hello World!" + */ + blk_size = spdk_bdev_get_block_size(hello_context->bdev); + buf_align = spdk_bdev_get_buf_align(hello_context->bdev); + hello_context->buff = spdk_dma_zmalloc(blk_size, buf_align, NULL); + if (!hello_context->buff) { + SPDK_ERRLOG("Failed to allocate buffer\n"); + spdk_put_io_channel(hello_context->bdev_io_channel); + spdk_bdev_close(hello_context->bdev_desc); + spdk_app_stop(-1); + return; + } + snprintf(hello_context->buff, blk_size, "%s", "Hello World!\n"); + + hello_write(hello_context); +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t hello_context = {}; + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + opts.name = "hello_bdev"; + + /* + * Parse built-in SPDK command line parameters as well + * as our custom one(s). + */ + if ((rc = spdk_app_parse_args(argc, argv, &opts, "b:", NULL, hello_bdev_parse_arg, + hello_bdev_usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { + exit(rc); + } + hello_context.bdev_name = g_bdev_name; + + /* + * spdk_app_start() will initialize the SPDK framework, call hello_start(), + * and then block until spdk_app_stop() is called (or if an initialization + * error occurs, spdk_app_start() will return with rc even without calling + * hello_start(). + */ + rc = spdk_app_start(&opts, hello_start, &hello_context); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + } + + /* At this point either spdk_app_stop() was called, or spdk_app_start() + * failed because of internal error. + */ + + /* When the app stops, free up memory that we allocated. */ + spdk_dma_free(hello_context.buff); + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/Makefile b/src/spdk/examples/blob/Makefile new file mode 100644 index 000000000..a297ddb26 --- /dev/null +++ b/src/spdk/examples/blob/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world cli + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/blob/cli/.gitignore b/src/spdk/examples/blob/cli/.gitignore new file mode 100644 index 000000000..6c895d79b --- /dev/null +++ b/src/spdk/examples/blob/cli/.gitignore @@ -0,0 +1 @@ +blobcli diff --git a/src/spdk/examples/blob/cli/Makefile b/src/spdk/examples/blob/cli/Makefile new file mode 100644 index 000000000..3c3ff1f26 --- /dev/null +++ b/src/spdk/examples/blob/cli/Makefile @@ -0,0 +1,46 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = blobcli + +C_SRCS := blobcli.c + +# Don't link bdev_lvol in blobcli - otherwise this utility cannot operate on an lvolstore +SPDK_LIB_LIST = $(filter-out bdev_lvol,$(ALL_MODULES_LIST)) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += bdev accel event thread util conf trace \ + log jsonrpc json rpc sock notify + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/blob/cli/README.md b/src/spdk/examples/blob/cli/README.md new file mode 100644 index 000000000..9c87dbdd4 --- /dev/null +++ b/src/spdk/examples/blob/cli/README.md @@ -0,0 +1,65 @@ +The blobcli tool has several options that are listed by using the -h command +however the three operating modes are covered in more detail here: + +Command Mode +------------ + +This is the default and will just execute one command at a time. It's simple +but the downside is that if you are going to interact quite a bit with the +blobstore, the startup time for the application can be cumbersome. + +Shell Mode +---------- + +You startup shell mode by using the -S command. At that point you will get +a "blob>" prompt where you can enter any of the commands, including -h, +to execute them. You can stil enter just one at a time but the initial +startup time for the application will not get in the way between commands +anymore so it is much more usable. + +Script (aka test) Mode +---------------------- + +In script mode you just supply one command with a filename when you start +the cli, for example `blobcli -T test.bs` will feed the tool the file +called test.bs which contains a series of commands that will all run +automatically and, like shell mode, will only initialize one time so is +quick. + +The script file format (example) is shown below. Comments are allowed and +each line should contain one valid command (and its parameters) only. In +order to operate on blobs via their ID value, use the token $Bn where n +represents the instance of the blob created in the script. + +For example, the line `-s $B0` will operate on the blobid of the first +blob created in the script (0 index based). `$B2` represents the third +blob created in the script. + +If you start test mode with the additional "ignore" option, any invalid +script lines will simply be skipped, otherwise the tool will exit if +it runs into an invalid line (ie './blobcli -T test.bs ignore`). + +Sample test/bs file: + +~~~{.sh} +# this is a comment +-i +-s bs +-l bdevs +-n 1 +-s bs +-s $B0 +-n 2 +-s $B1 +-m $B0 Makefile +-d $B0 M.blob +-f $B1 65 +-d $B1 65.blob +-s bs +-x $B0 b0key boval +-x $B1 b1key b1val +-r $B0 b0key +-s $B0 +-s $B1 +-s bs +~~~ diff --git a/src/spdk/examples/blob/cli/blobcli.c b/src/spdk/examples/blob/cli/blobcli.c new file mode 100644 index 000000000..edb071847 --- /dev/null +++ b/src/spdk/examples/blob/cli/blobcli.c @@ -0,0 +1,1572 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/blob_bdev.h" +#include "spdk/blob.h" +#include "spdk/log.h" +#include "spdk/version.h" +#include "spdk/string.h" +#include "spdk/uuid.h" + +/* + * The following is not a public header file, but the CLI does expose + * some internals of blobstore for dev/debug puposes so we + * include it here. + */ +#include "../lib/blob/blobstore.h" +static void cli_start(void *arg1); + +static const char *program_name = "blobcli"; +/* default name for .conf file, any name can be used however with -c switch */ +static const char *program_conf = "blobcli.conf"; + +/* + * CMD mode runs one command at a time which can be annoying as the init takes + * a few seconds, so the shell mode, invoked with -S, does the init once and gives + * the user an interactive shell instead. With script mode init is also done just + * once. + */ +enum cli_mode_type { + CLI_MODE_CMD, + CLI_MODE_SHELL, + CLI_MODE_SCRIPT +}; + +enum cli_action_type { + CLI_NONE, + CLI_IMPORT_BLOB, + CLI_DUMP_BLOB, + CLI_FILL, + CLI_REM_XATTR, + CLI_SET_XATTR, + CLI_SET_SUPER, + CLI_SHOW_BS, + CLI_SHOW_BLOB, + CLI_CREATE_BLOB, + CLI_LIST_BDEVS, + CLI_LIST_BLOBS, + CLI_INIT_BS, + CLI_DUMP_BS, + CLI_SHELL_EXIT, + CLI_HELP, +}; + +#define BUFSIZE 255 +#define MAX_ARGS 16 +#define ALIGN_4K 4096 +#define STARTING_IO_UNIT 0 +#define NUM_IO_UNITS 1 + +/* + * The CLI uses the SPDK app framework so is async and callback driven. A + * pointer to this structure is passed to SPDK calls and returned in the + * callbacks for easy access to all the info we may need. + */ +struct cli_context_t { + struct spdk_blob_store *bs; + struct spdk_blob *blob; + struct spdk_bs_dev *bs_dev; + spdk_blob_id blobid; + spdk_blob_id superid; + struct spdk_io_channel *channel; + uint8_t *buff; + uint64_t page_size; + uint64_t io_unit_size; + uint64_t io_unit_count; + uint64_t blob_io_units; + uint64_t bytes_so_far; + FILE *fp; + enum cli_action_type action; + char key[BUFSIZE + 1]; + char value[BUFSIZE + 1]; + char file[BUFSIZE + 1]; + uint64_t filesize; + int fill_value; + char bdev_name[BUFSIZE]; + int rc; + int num_clusters; + enum cli_mode_type cli_mode; + const char *config_file; + int argc; + char *argv[MAX_ARGS]; + bool app_started; + char script_file[BUFSIZE + 1]; +}; + +/* we store a bunch of stuff in a global struct for use by scripting mode */ +#define MAX_SCRIPT_LINES 64 +#define MAX_SCRIPT_BLOBS 16 +struct cli_script_t { + spdk_blob_id blobid[MAX_SCRIPT_BLOBS]; + int blobid_idx; + int max_index; + int cmdline_idx; + bool ignore_errors; + char *cmdline[MAX_SCRIPT_LINES]; +}; +struct cli_script_t g_script; + +/* + * Common printing of commands for CLI and shell modes. + */ +static void +print_cmds(void) +{ + printf("\nCommands include:\n"); + printf("\t-b bdev - name of the block device to use (example: Nvme0n1)\n"); + printf("\t-d <blobid> filename - dump contents of a blob to a file\n"); + printf("\t-D - dump metadata contents of an existing blobstore\n"); + printf("\t-f <blobid> value - fill a blob with a decimal value\n"); + printf("\t-h - this help screen\n"); + printf("\t-i - initialize a blobstore\n"); + printf("\t-l bdevs | blobs - list either available bdevs or existing blobs\n"); + printf("\t-m <blobid> filename - import contents of a file to a blob\n"); + printf("\t-n <# clusters> - create new blob\n"); + printf("\t-p <blobid> - set the superblob to the ID provided\n"); + printf("\t-r <blobid> name - remove xattr name/value pair\n"); + printf("\t-s <blobid> | bs - show blob info or blobstore info\n"); + printf("\t-x <blobid> name value - set xattr name/value pair\n"); + printf("\t-X - exit when in interactive shell mode\n"); + printf("\t-S - enter interactive shell mode\n"); + printf("\t-T <filename> - automated script mode\n"); + printf("\n"); +} + +/* + * Prints usage and relevant error message. + */ +static void +usage(struct cli_context_t *cli_context, char *msg) +{ + if (msg) { + printf("%s", msg); + } + + if (!cli_context || cli_context->cli_mode == CLI_MODE_CMD) { + printf("Version %s\n", SPDK_VERSION_STRING); + printf("Usage: %s [-c SPDK config_file] Command\n", program_name); + printf("\n%s is a command line tool for interacting with blobstore\n", + program_name); + printf("on the underlying device specified in the conf file passed\n"); + printf("in as a command line option.\n"); + } + if (!cli_context || cli_context->cli_mode != CLI_MODE_SCRIPT) { + print_cmds(); + } +} + +/* + * Free up memory that we allocated. + */ +static void +cli_cleanup(struct cli_context_t *cli_context) +{ + if (cli_context->buff) { + spdk_free(cli_context->buff); + } + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + int i; + + for (i = 0; i <= g_script.max_index; i++) { + free(g_script.cmdline[i]); + } + } + free(cli_context); +} + +/* + * Callback routine for the blobstore unload. + */ +static void +unload_complete(void *cb_arg, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + printf("Error %d unloading the bobstore\n", bserrno); + cli_context->rc = bserrno; + } + + /* + * Quit if we're in cmd mode or exiting shell mode, otherwise + * clear the action field and start the main function again. + */ + if (cli_context->cli_mode == CLI_MODE_CMD || + cli_context->action == CLI_SHELL_EXIT) { + spdk_app_stop(cli_context->rc); + } else { + /* when action is CLI_NONE, we know we need to remain in the shell */ + cli_context->bs = NULL; + cli_context->action = CLI_NONE; + cli_start(cli_context); + } +} + +/* + * Unload the blobstore. + */ +static void +unload_bs(struct cli_context_t *cli_context, char *msg, int bserrno) +{ + if (bserrno) { + printf("%s (err %d)\n", msg, bserrno); + cli_context->rc = bserrno; + } + + if (cli_context->bs) { + if (cli_context->channel) { + spdk_bs_free_io_channel(cli_context->channel); + cli_context->channel = NULL; + } + spdk_bs_unload(cli_context->bs, unload_complete, cli_context); + } else if (cli_context->cli_mode != CLI_MODE_SCRIPT) { + spdk_app_stop(bserrno); + + } +} + +/* + * Callback for closing a blob. + */ +static void +close_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in close callback", + bserrno); + return; + } + unload_bs(cli_context, "", 0); +} + +/* + * Callback function for sync'ing metadata. + */ +static void +sync_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in sync callback", + bserrno); + return; + } + + spdk_blob_close(cli_context->blob, close_cb, cli_context); +} + +static void +resize_cb(void *cb_arg, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + uint64_t total = 0; + + if (bserrno) { + unload_bs(cli_context, "Error in blob resize", + bserrno); + return; + } + + total = spdk_blob_get_num_clusters(cli_context->blob); + printf("blob now has USED clusters of %" PRIu64 "\n", + total); + + /* + * Always a good idea to sync after MD changes or the changes + * may be lost if things aren't closed cleanly. + */ + spdk_blob_sync_md(cli_context->blob, sync_cb, cli_context); +} + +/* + * Callback function for opening a blob after creating. + */ +static void +open_now_resize_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in open completion", + bserrno); + return; + } + cli_context->blob = blob; + + spdk_blob_resize(cli_context->blob, cli_context->num_clusters, + resize_cb, cli_context); +} + +/* + * Callback function for creating a blob. + */ +static void +blob_create_cb(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in blob create callback", + bserrno); + return; + } + + cli_context->blobid = blobid; + printf("New blob id %" PRIu64 "\n", cli_context->blobid); + + /* if we're in script mode, we need info on all blobids for later */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + g_script.blobid[g_script.blobid_idx++] = blobid; + } + + /* We have to open the blob before we can do things like resize. */ + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + open_now_resize_cb, cli_context); +} + +/* + * Callback for get_super where we'll continue on to show blobstore info. + */ +static void +show_bs_cb(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + struct spdk_bs_type bstype; + uint64_t val; + struct spdk_bdev *bdev = NULL; + + if (bserrno && bserrno != -ENOENT) { + unload_bs(cli_context, "Error in get_super callback", + bserrno); + return; + } + cli_context->superid = blobid; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + unload_bs(cli_context, "Error w/bdev in get_super callback", + bserrno); + return; + } + + printf("Blobstore Public Info:\n"); + printf("\tUsing bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + printf("\tAPI Version: %d\n", SPDK_BS_VERSION); + + if (bserrno != -ENOENT) { + printf("\tsuper blob ID: %" PRIu64 "\n", cli_context->superid); + } else { + printf("\tsuper blob ID: none assigned\n"); + } + + printf("\tpage size: %" PRIu64 "\n", cli_context->page_size); + printf("\tio unit size: %" PRIu64 "\n", cli_context->io_unit_size); + + val = spdk_bs_get_cluster_size(cli_context->bs); + printf("\tcluster size: %" PRIu64 "\n", val); + + val = spdk_bs_free_cluster_count(cli_context->bs); + printf("\t# free clusters: %" PRIu64 "\n", val); + + bstype = spdk_bs_get_bstype(cli_context->bs); + spdk_log_dump(stdout, "\tblobstore type:", &bstype, sizeof(bstype)); + + /* + * Private info isn't accessible via the public API but + * may be useful for debug of blobstore based applications. + */ + printf("\nBlobstore Private Info:\n"); + printf("\tMetadata start (pages): %" PRIu64 "\n", + cli_context->bs->md_start); + printf("\tMetadata length (pages): %d\n", + cli_context->bs->md_len); + + unload_bs(cli_context, "", 0); +} + +/* + * Show detailed info about a particular blob. + */ +static void +show_blob(struct cli_context_t *cli_context) +{ + uint64_t val; + struct spdk_xattr_names *names; + const void *value; + size_t value_len; + unsigned int i; + + printf("Blob Public Info:\n"); + + printf("blob ID: %" PRIu64 "\n", cli_context->blobid); + + val = spdk_blob_get_num_clusters(cli_context->blob); + printf("# of clusters: %" PRIu64 "\n", val); + + printf("# of bytes: %" PRIu64 "\n", + val * spdk_bs_get_cluster_size(cli_context->bs)); + + val = spdk_blob_get_num_pages(cli_context->blob); + printf("# of pages: %" PRIu64 "\n", val); + + spdk_blob_get_xattr_names(cli_context->blob, &names); + + printf("# of xattrs: %d\n", spdk_xattr_names_get_count(names)); + printf("xattrs:\n"); + for (i = 0; i < spdk_xattr_names_get_count(names); i++) { + spdk_blob_get_xattr_value(cli_context->blob, + spdk_xattr_names_get_name(names, i), + &value, &value_len); + if (value_len > BUFSIZE) { + printf("FYI: adjusting size of xattr due to CLI limits.\n"); + value_len = BUFSIZE + 1; + } + printf("\n(%d) Name:%s\n", i, + spdk_xattr_names_get_name(names, i)); + printf("(%d) Value:\n", i); + spdk_log_dump(stdout, "", value, value_len - 1); + } + + /* + * Private info isn't accessible via the public API but + * may be useful for debug of blobstore based applications. + */ + printf("\nBlob Private Info:\n"); + switch (cli_context->blob->state) { + case SPDK_BLOB_STATE_DIRTY: + printf("state: DIRTY\n"); + break; + case SPDK_BLOB_STATE_CLEAN: + printf("state: CLEAN\n"); + break; + case SPDK_BLOB_STATE_LOADING: + printf("state: LOADING\n"); + break; + default: + printf("state: UNKNOWN\n"); + break; + } + printf("open ref count: %d\n", + cli_context->blob->open_ref); + + spdk_xattr_names_free(names); +} + +/* + * Callback for getting the first blob, shared with simple blob listing as well. + */ +static void +blob_iter_cb(void *arg1, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + if (bserrno == -ENOENT) { + /* this simply means there are no more blobs */ + unload_bs(cli_context, "", 0); + } else { + unload_bs(cli_context, "Error in blob iter callback", + bserrno); + } + return; + } + + if (cli_context->action == CLI_LIST_BLOBS) { + printf("\nList BLOBS:\n"); + printf("Found blob with ID# %" PRIu64 "\n", + spdk_blob_get_id(blob)); + } else if (spdk_blob_get_id(blob) == cli_context->blobid) { + /* + * Found the blob we're looking for, but we need to finish + * iterating even after showing the info so that internally + * the blobstore logic will close the blob. Or we could + * chose to close it now, either way. + */ + cli_context->blob = blob; + show_blob(cli_context); + } + + spdk_bs_iter_next(cli_context->bs, blob, blob_iter_cb, cli_context); +} + +/* + * Callback for setting the super blob ID. + */ +static void +set_super_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in set_super callback", + bserrno); + return; + } + + printf("Super Blob ID has been set.\n"); + unload_bs(cli_context, "", 0); +} + +/* + * Callback for set_xattr_open where we set or delete xattrs. + */ +static void +set_xattr_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in blob open callback", + bserrno); + return; + } + cli_context->blob = blob; + + if (cli_context->action == CLI_SET_XATTR) { + spdk_blob_set_xattr(cli_context->blob, cli_context->key, + cli_context->value, strlen(cli_context->value) + 1); + printf("Xattr has been set.\n"); + } else { + spdk_blob_remove_xattr(cli_context->blob, cli_context->key); + printf("Xattr has been removed.\n"); + } + + spdk_blob_sync_md(cli_context->blob, sync_cb, cli_context); +} + +/* + * Callback function for reading a blob for dumping to a file. + */ +static void +read_dump_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + uint64_t bytes_written; + + if (bserrno) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error in read completion", + bserrno); + return; + } + + bytes_written = fwrite(cli_context->buff, NUM_IO_UNITS, cli_context->io_unit_size, + cli_context->fp); + if (bytes_written != cli_context->io_unit_size) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error with fwrite", + bserrno); + return; + } + + printf("."); + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + /* perform another read */ + spdk_blob_io_read(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, read_dump_cb, cli_context); + } else { + /* done reading */ + printf("\nFile write complete (to %s).\n", cli_context->file); + fclose(cli_context->fp); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback for write completion on the import of a file to a blob. + */ +static void +write_imp_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + uint64_t bytes_read; + + if (bserrno) { + fclose(cli_context->fp); + unload_bs(cli_context, "Error in write completion", + bserrno); + return; + } + + if (cli_context->bytes_so_far < cli_context->filesize) { + /* perform another file read */ + bytes_read = fread(cli_context->buff, 1, + cli_context->io_unit_size, + cli_context->fp); + cli_context->bytes_so_far += bytes_read; + + /* if this read is < 1 io_unit, fill with 0s */ + if (bytes_read < cli_context->io_unit_size) { + uint8_t *offset = cli_context->buff + bytes_read; + memset(offset, 0, cli_context->io_unit_size - bytes_read); + } + } else { + /* + * Done reading the file, fill the rest of the blob with 0s, + * yeah we're memsetting the same io_unit over and over here + */ + memset(cli_context->buff, 0, cli_context->io_unit_size); + } + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + printf("."); + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_imp_cb, cli_context); + } else { + /* done writing */ + printf("\nBlob import complete (from %s).\n", cli_context->file); + fclose(cli_context->fp); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback for open blobs where we'll continue on dump a blob to a file or + * import a file to a blob. For dump, the resulting file will always be the + * full size of the blob. For import, the blob will fill with the file + * contents first and then 0 out the rest of the blob. + */ +static void +dump_imp_open_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in blob open callback", + bserrno); + return; + } + cli_context->blob = blob; + + /* + * We'll transfer just one io_unit at a time to keep the buffer + * small. This could be bigger of course. + */ + cli_context->buff = spdk_malloc(cli_context->io_unit_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (cli_context->buff == NULL) { + printf("Error in allocating memory\n"); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + printf("Working"); + cli_context->blob_io_units = spdk_blob_get_num_io_units(cli_context->blob); + cli_context->io_unit_count = 0; + if (cli_context->action == CLI_DUMP_BLOB) { + cli_context->fp = fopen(cli_context->file, "w"); + if (cli_context->fp == NULL) { + printf("Error in opening file\n"); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + + /* read a io_unit of data from the blob */ + spdk_blob_io_read(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, read_dump_cb, cli_context); + } else { + cli_context->fp = fopen(cli_context->file, "r"); + if (cli_context->fp == NULL) { + printf("Error in opening file: errno %d\n", errno); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + return; + } + + /* get the filesize then rewind read a io_unit of data from file */ + fseek(cli_context->fp, 0L, SEEK_END); + cli_context->filesize = ftell(cli_context->fp); + rewind(cli_context->fp); + cli_context->bytes_so_far = fread(cli_context->buff, NUM_IO_UNITS, + cli_context->io_unit_size, + cli_context->fp); + + /* if the file is < a io_unit, fill the rest with 0s */ + if (cli_context->filesize < cli_context->io_unit_size) { + uint8_t *offset = + cli_context->buff + cli_context->filesize; + + memset(offset, 0, + cli_context->io_unit_size - cli_context->filesize); + } + + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_imp_cb, cli_context); + } +} + +/* + * Callback function for writing a specific pattern to io_unit 0. + */ +static void +write_cb(void *arg1, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in write completion", + bserrno); + return; + } + printf("."); + if (++cli_context->io_unit_count < cli_context->blob_io_units) { + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, cli_context->io_unit_count, + NUM_IO_UNITS, write_cb, cli_context); + } else { + /* done writing */ + printf("\nBlob fill complete (with 0x%x).\n", cli_context->fill_value); + spdk_blob_close(cli_context->blob, close_cb, cli_context); + } +} + +/* + * Callback function to fill a blob with a value, callback from open. + */ +static void +fill_blob_cb(void *arg1, struct spdk_blob *blob, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in open callback", + bserrno); + return; + } + + cli_context->blob = blob; + cli_context->io_unit_count = 0; + cli_context->blob_io_units = spdk_blob_get_num_io_units(cli_context->blob); + cli_context->buff = spdk_malloc(cli_context->io_unit_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (cli_context->buff == NULL) { + unload_bs(cli_context, "Error in allocating memory", + -ENOMEM); + return; + } + + memset(cli_context->buff, cli_context->fill_value, + cli_context->io_unit_size); + printf("Working"); + spdk_blob_io_write(cli_context->blob, cli_context->channel, + cli_context->buff, + STARTING_IO_UNIT, NUM_IO_UNITS, write_cb, cli_context); +} + +/* + * Multiple actions require us to open the bs first so here we use + * a common callback to set a bunch of values and then move on to + * the next step saved off via function pointer. + */ +static void +load_bs_cb(void *arg1, struct spdk_blob_store *bs, int bserrno) +{ + struct cli_context_t *cli_context = arg1; + + if (bserrno) { + unload_bs(cli_context, "Error in load callback", + bserrno); + return; + } + + cli_context->bs = bs; + cli_context->page_size = spdk_bs_get_page_size(cli_context->bs); + cli_context->io_unit_size = spdk_bs_get_io_unit_size(cli_context->bs); + cli_context->channel = spdk_bs_alloc_io_channel(cli_context->bs); + if (cli_context->channel == NULL) { + unload_bs(cli_context, "Error in allocating channel", + -ENOMEM); + return; + } + + switch (cli_context->action) { + case CLI_SET_SUPER: + spdk_bs_set_super(cli_context->bs, cli_context->superid, + set_super_cb, cli_context); + break; + case CLI_SHOW_BS: + spdk_bs_get_super(cli_context->bs, show_bs_cb, cli_context); + break; + case CLI_CREATE_BLOB: + spdk_bs_create_blob(cli_context->bs, blob_create_cb, cli_context); + break; + case CLI_SET_XATTR: + case CLI_REM_XATTR: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + set_xattr_cb, cli_context); + break; + case CLI_SHOW_BLOB: + case CLI_LIST_BLOBS: + spdk_bs_iter_first(cli_context->bs, blob_iter_cb, cli_context); + + break; + case CLI_DUMP_BLOB: + case CLI_IMPORT_BLOB: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + dump_imp_open_cb, cli_context); + break; + case CLI_FILL: + spdk_bs_open_blob(cli_context->bs, cli_context->blobid, + fill_blob_cb, cli_context); + break; + + default: + /* should never get here */ + exit(-1); + break; + } +} + +/* + * Load the blobstore. + */ +static void +load_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + struct spdk_bs_dev *bs_dev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + + bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_load(bs_dev, NULL, load_bs_cb, cli_context); +} + +/* + * Lists all the blobs on this blobstore. + */ +static void +list_bdevs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + printf("\nList bdevs:\n"); + + bdev = spdk_bdev_first(); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + } + while (bdev) { + printf("\tbdev Name: %s\n", spdk_bdev_get_name(bdev)); + printf("\tbdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + bdev = spdk_bdev_next(bdev); + } + + printf("\n"); + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + } else { + cli_context->action = CLI_NONE; + cli_start(cli_context); + } +} + +/* + * Callback function for initializing a blob. + */ +static void +bs_init_cb(void *cb_arg, struct spdk_blob_store *bs, + int bserrno) +{ + struct cli_context_t *cli_context = cb_arg; + + if (bserrno) { + unload_bs(cli_context, "Error in bs init callback", + bserrno); + return; + } + cli_context->bs = bs; + printf("blobstore init'd: (%p)\n", cli_context->bs); + + unload_bs(cli_context, "", 0); +} + +/* + * Initialize a new blobstore. + */ +static void +init_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + printf("Init blobstore using bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + + cli_context->bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (cli_context->bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_init(cli_context->bs_dev, NULL, bs_init_cb, + cli_context); +} + +static void +spdk_bsdump_done(void *arg, int bserrno) +{ + struct cli_context_t *cli_context = arg; + + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + } else { + cli_context->action = CLI_NONE; + cli_start(cli_context); + } +} + +static void +bsdump_print_xattr(FILE *fp, const char *bstype, const char *name, const void *value, + size_t value_len) +{ + if (strncmp(bstype, "BLOBFS", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%.*s", (int)value_len, (char *)value); + } else if (strcmp(name, "length") == 0 && value_len == sizeof(uint64_t)) { + uint64_t length; + + memcpy(&length, value, sizeof(length)); + fprintf(fp, "%" PRIu64, length); + } else { + fprintf(fp, "?"); + } + } else if (strncmp(bstype, "LVOLSTORE", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%s", (char *)value); + } else if (strcmp(name, "uuid") == 0 && value_len == sizeof(struct spdk_uuid)) { + char uuid[SPDK_UUID_STRING_LEN]; + + spdk_uuid_fmt_lower(uuid, sizeof(uuid), (struct spdk_uuid *)value); + fprintf(fp, "%s", uuid); + } else { + fprintf(fp, "?"); + } + } else { + fprintf(fp, "?"); + } +} + +/* + * Dump metadata of an existing blobstore in a human-readable format. + */ +static void +dump_bs(struct cli_context_t *cli_context) +{ + struct spdk_bdev *bdev = NULL; + + bdev = spdk_bdev_get_by_name(cli_context->bdev_name); + if (bdev == NULL) { + printf("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + printf("Init blobstore using bdev Product Name: %s\n", + spdk_bdev_get_product_name(bdev)); + + cli_context->bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (cli_context->bs_dev == NULL) { + printf("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_dump(cli_context->bs_dev, stdout, bsdump_print_xattr, spdk_bsdump_done, cli_context); +} + +/* + * Common cmd/option parser for command and shell modes. + */ +static bool +cmd_parser(int argc, char **argv, struct cli_context_t *cli_context) +{ + int op; + int cmd_chosen = 0; + char resp; + + while ((op = getopt(argc, argv, "b:c:d:f:hil:m:n:p:r:s:DST:Xx:")) != -1) { + switch (op) { + case 'b': + if (strcmp(cli_context->bdev_name, "") == 0) { + snprintf(cli_context->bdev_name, BUFSIZE, "%s", optarg); + } else { + printf("Current setting for -b is: %s\n", cli_context->bdev_name); + usage(cli_context, "ERROR: -b option can only be set once.\n"); + } + break; + case 'c': + if (cli_context->app_started == false) { + cli_context->config_file = optarg; + } else { + usage(cli_context, "ERROR: -c option not valid during shell mode.\n"); + } + break; + case 'D': + cmd_chosen++; + cli_context->action = CLI_DUMP_BS; + break; + case 'd': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_DUMP_BLOB; + cli_context->blobid = spdk_strtoll(optarg, 10); + snprintf(cli_context->file, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'f': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_FILL; + cli_context->blobid = spdk_strtoll(optarg, 10); + cli_context->fill_value = spdk_strtol(argv[optind], 10); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'h': + cmd_chosen++; + cli_context->action = CLI_HELP; + break; + case 'i': + if (cli_context->cli_mode != CLI_MODE_SCRIPT) { + printf("Your entire blobstore will be destroyed. Are you sure? (y/n) "); + if (scanf("%c%*c", &resp)) { + if (resp == 'y' || resp == 'Y') { + cmd_chosen++; + cli_context->action = CLI_INIT_BS; + } else { + if (cli_context->cli_mode == CLI_MODE_CMD) { + spdk_app_stop(0); + return false; + } + } + } + } else { + cmd_chosen++; + cli_context->action = CLI_INIT_BS; + } + break; + case 'r': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_REM_XATTR; + cli_context->blobid = spdk_strtoll(optarg, 10); + snprintf(cli_context->key, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'l': + if (strcmp("bdevs", optarg) == 0) { + cmd_chosen++; + cli_context->action = CLI_LIST_BDEVS; + } else if (strcmp("blobs", optarg) == 0) { + cmd_chosen++; + cli_context->action = CLI_LIST_BLOBS; + } else { + usage(cli_context, "ERROR: invalid option for list\n"); + } + break; + case 'm': + if (argv[optind] != NULL) { + cmd_chosen++; + cli_context->action = CLI_IMPORT_BLOB; + cli_context->blobid = spdk_strtoll(optarg, 10); + snprintf(cli_context->file, BUFSIZE, "%s", argv[optind]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + case 'n': + cli_context->num_clusters = spdk_strtol(optarg, 10); + if (cli_context->num_clusters > 0) { + cmd_chosen++; + cli_context->action = CLI_CREATE_BLOB; + } else { + usage(cli_context, "ERROR: invalid option for new\n"); + } + break; + case 'p': + cmd_chosen++; + cli_context->action = CLI_SET_SUPER; + cli_context->superid = spdk_strtoll(optarg, 10); + break; + case 'S': + if (cli_context->cli_mode == CLI_MODE_CMD) { + cmd_chosen++; + cli_context->cli_mode = CLI_MODE_SHELL; + } + cli_context->action = CLI_NONE; + break; + case 's': + cmd_chosen++; + if (strcmp("bs", optarg) == 0) { + cli_context->action = CLI_SHOW_BS; + } else { + cli_context->action = CLI_SHOW_BLOB; + cli_context->blobid = spdk_strtoll(optarg, 10); + } + break; + case 'T': + if (cli_context->cli_mode == CLI_MODE_CMD) { + cmd_chosen++; + cli_context->cli_mode = CLI_MODE_SCRIPT; + if (argv[optind] && (strcmp("ignore", argv[optind]) == 0)) { + g_script.ignore_errors = true; + } else { + g_script.ignore_errors = false; + } + snprintf(cli_context->script_file, BUFSIZE, "%s", optarg); + } else { + cli_context->action = CLI_NONE; + } + break; + case 'X': + cmd_chosen++; + cli_context->action = CLI_SHELL_EXIT; + break; + case 'x': + if (argv[optind] != NULL || argv[optind + 1] != NULL) { + cmd_chosen++; + cli_context->action = CLI_SET_XATTR; + cli_context->blobid = spdk_strtoll(optarg, 10); + snprintf(cli_context->key, BUFSIZE, "%s", argv[optind]); + snprintf(cli_context->value, BUFSIZE, "%s", argv[optind + 1]); + } else { + usage(cli_context, "ERROR: missing parameter.\n"); + } + break; + default: + usage(cli_context, "ERROR: invalid option\n"); + } + /* only one actual command can be done at a time */ + if (cmd_chosen > 1) { + usage(cli_context, "Error: Please choose only one command\n"); + } + } + + if (cli_context->cli_mode == CLI_MODE_CMD && cmd_chosen == 0) { + usage(cli_context, "Error: Please choose a command.\n"); + } + + /* + * We don't check the local boolean because in some modes it will have been set + * on and earlier command. + */ + if (strcmp(cli_context->bdev_name, "") == 0) { + usage(cli_context, "Error: -b option is required.\n"); + cmd_chosen = 0; + } + + /* in shell mode we'll call getopt multiple times so need to reset its index */ + optind = 0; + return (cmd_chosen == 1); +} + +/* + * In script mode, we parsed a script file at startup and saved off a bunch of cmd + * lines that we now parse with each run of cli_start so we us the same cmd parser + * as cmd and shell modes. + */ +static bool +line_parser(struct cli_context_t *cli_context) +{ + bool cmd_chosen; + char *tok = NULL; + int blob_num = 0; + int start_idx = cli_context->argc; + int i; + + printf("\nSCRIPT NOW PROCESSING: %s\n", g_script.cmdline[g_script.cmdline_idx]); + tok = strtok(g_script.cmdline[g_script.cmdline_idx], " "); + while (tok != NULL) { + /* + * We support one replaceable token right now, a $Bn + * represents the blobid that was created in position n + * so fish this out now and use it here. + */ + cli_context->argv[cli_context->argc] = strdup(tok); + if (tok[0] == '$' && tok[1] == 'B') { + tok += 2; + blob_num = spdk_strtol(tok, 10); + if (blob_num >= 0 && blob_num < MAX_SCRIPT_BLOBS) { + cli_context->argv[cli_context->argc] = + realloc(cli_context->argv[cli_context->argc], BUFSIZE); + if (cli_context->argv[cli_context->argc] == NULL) { + printf("ERROR: unable to realloc memory\n"); + spdk_app_stop(-1); + } + if (g_script.blobid[blob_num] == 0) { + printf("ERROR: There is no blob for $B%d\n", + blob_num); + } + snprintf(cli_context->argv[cli_context->argc], BUFSIZE, + "%" PRIu64, g_script.blobid[blob_num]); + } else { + printf("ERROR: Invalid token or exceeded max blobs of %d\n", + MAX_SCRIPT_BLOBS); + } + } + cli_context->argc++; + tok = strtok(NULL, " "); + } + + /* call parse cmd line with user input as args */ + cmd_chosen = cmd_parser(cli_context->argc, &cli_context->argv[0], cli_context); + + /* free strdup memory and reset arg count for next shell interaction */ + for (i = start_idx; i < cli_context->argc; i++) { + free(cli_context->argv[i]); + cli_context->argv[i] = NULL; + } + cli_context->argc = 1; + + g_script.cmdline_idx++; + assert(g_script.cmdline_idx < MAX_SCRIPT_LINES); + + if (cmd_chosen == false) { + printf("ERROR: Invalid script line starting with: %s\n\n", + g_script.cmdline[g_script.cmdline_idx - 1]); + if (g_script.ignore_errors == false) { + printf("** Aborting **\n"); + cli_context->action = CLI_SHELL_EXIT; + cmd_chosen = true; + unload_bs(cli_context, "", 0); + } else { + printf("** Skipping **\n"); + } + } + + return cmd_chosen; +} + +/* + * For script mode, we read a series of commands from a text file and store them + * in a global struct. That, along with the cli_mode that tells us we're in + * script mode is what feeds the rest of the app in the same way as is it were + * getting commands from shell mode. + */ +static void +parse_script(struct cli_context_t *cli_context) +{ + FILE *fp = NULL; + size_t bufsize = BUFSIZE; + int64_t bytes_in = 0; + int i = 0; + + /* initialize global script values */ + for (i = 0; i < MAX_SCRIPT_BLOBS; i++) { + g_script.blobid[i] = 0; + } + g_script.blobid_idx = 0; + g_script.cmdline_idx = 0; + i = 0; + + fp = fopen(cli_context->script_file, "r"); + if (fp == NULL) { + printf("ERROR: unable to open script: %s\n", + cli_context->script_file); + cli_cleanup(cli_context); + exit(-1); + } + + do { + bytes_in = getline(&g_script.cmdline[i], &bufsize, fp); + if (bytes_in > 0) { + /* replace newline with null */ + spdk_str_chomp(g_script.cmdline[i]); + + /* ignore comments */ + if (g_script.cmdline[i][0] != '#') { + i++; + } + } + } while (bytes_in != -1 && i < MAX_SCRIPT_LINES - 1); + fclose(fp); + + /* add an exit cmd in case they didn't */ + g_script.cmdline[i] = realloc(g_script.cmdline[i], BUFSIZE); + if (g_script.cmdline[i] == NULL) { + int j; + + for (j = 0; j < i; j++) { + free(g_script.cmdline[j]); + g_script.cmdline[j] = NULL; + } + unload_bs(cli_context, "ERROR: unable to alloc memory.\n", 0); + } + snprintf(g_script.cmdline[i], BUFSIZE, "%s", "-X"); + g_script.max_index = i; +} + +/* + * Provides for a shell interface as opposed to one shot command line. + */ +static bool +cli_shell(void *arg1, void *arg2) +{ + struct cli_context_t *cli_context = arg1; + char *line = NULL; + ssize_t buf_size = 0; + ssize_t bytes_in = 0; + ssize_t tok_len = 0; + char *tok = NULL; + bool cmd_chosen = false; + int start_idx = cli_context->argc; + int i; + + printf("blob> "); + bytes_in = getline(&line, &buf_size, stdin); + + /* If getline() failed (EOF), exit the shell. */ + if (bytes_in < 0) { + free(line); + cli_context->action = CLI_SHELL_EXIT; + return true; + } + + /* parse input and update cli_context so we can use common option parser */ + if (bytes_in > 0) { + tok = strtok(line, " "); + } + while ((tok != NULL) && (cli_context->argc < MAX_ARGS)) { + cli_context->argv[cli_context->argc] = strdup(tok); + tok_len = strlen(tok); + cli_context->argc++; + tok = strtok(NULL, " "); + } + + /* replace newline on last arg with null */ + if (tok_len) { + spdk_str_chomp(cli_context->argv[cli_context->argc - 1]); + } + + /* call parse cmd line with user input as args */ + cmd_chosen = cmd_parser(cli_context->argc, &cli_context->argv[0], cli_context); + + /* free strdup mem & reset arg count for next shell interaction */ + for (i = start_idx; i < cli_context->argc; i++) { + free(cli_context->argv[i]); + cli_context->argv[i] = NULL; + } + cli_context->argc = 1; + + free(line); + + return cmd_chosen; +} + +/* + * This is the function we pass into the SPDK framework that gets + * called first. + */ +static void +cli_start(void *arg1) +{ + struct cli_context_t *cli_context = arg1; + + /* + * If we're in script mode, we already have a list of commands so + * just need to pull them out one at a time and process them. + */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + while (line_parser(cli_context) == false); + } + + /* + * The initial cmd line options are parsed once before this function is + * called so if there is no action, we're in shell mode and will loop + * here until a a valid option is parsed and returned. + */ + if (cli_context->action == CLI_NONE) { + while (cli_shell(cli_context, NULL) == false); + } + + /* Decide what to do next based on cmd line parsing. */ + switch (cli_context->action) { + case CLI_SET_SUPER: + case CLI_SHOW_BS: + case CLI_CREATE_BLOB: + case CLI_SET_XATTR: + case CLI_REM_XATTR: + case CLI_SHOW_BLOB: + case CLI_LIST_BLOBS: + case CLI_DUMP_BLOB: + case CLI_IMPORT_BLOB: + case CLI_FILL: + load_bs(cli_context); + break; + case CLI_INIT_BS: + init_bs(cli_context); + break; + case CLI_DUMP_BS: + dump_bs(cli_context); + break; + case CLI_LIST_BDEVS: + list_bdevs(cli_context); + break; + case CLI_SHELL_EXIT: + /* + * Because shell mode reuses cmd mode functions, the blobstore + * is loaded/unloaded with every action so we just need to + * stop the framework. For this app there's no need to optimize + * and keep the blobstore open while the app is in shell mode. + */ + spdk_app_stop(0); + break; + case CLI_HELP: + usage(cli_context, ""); + unload_complete(cli_context, 0); + break; + default: + /* should never get here */ + exit(-1); + break; + } +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + struct cli_context_t *cli_context = NULL; + bool cmd_chosen; + int rc = 0; + + if (argc < 2) { + usage(cli_context, "ERROR: Invalid option\n"); + exit(-1); + } + + cli_context = calloc(1, sizeof(struct cli_context_t)); + if (cli_context == NULL) { + printf("ERROR: could not allocate context structure\n"); + exit(-1); + } + + /* default to CMD mode until we've parsed the first parms */ + cli_context->cli_mode = CLI_MODE_CMD; + cli_context->argv[0] = strdup(argv[0]); + cli_context->argc = 1; + + /* parse command line */ + cmd_chosen = cmd_parser(argc, argv, cli_context); + free(cli_context->argv[0]); + cli_context->argv[0] = NULL; + if (cmd_chosen == false) { + cli_cleanup(cli_context); + exit(-1); + } + + /* after displaying help, just exit */ + if (cli_context->action == CLI_HELP) { + usage(cli_context, ""); + cli_cleanup(cli_context); + exit(-1); + } + + /* if they don't supply a conf name, use the default */ + if (!cli_context->config_file) { + cli_context->config_file = program_conf; + } + + /* if the config file doesn't exist, tell them how to make one */ + if (access(cli_context->config_file, F_OK) == -1) { + printf("Error: No config file found.\n"); + printf("To create a config file named 'blobcli.conf' for your NVMe device:\n"); + printf(" <path to spdk>/scripts/gen_nvme.sh > blobcli.conf\n"); + printf("and then re-run the cli tool.\n"); + exit(-1); + } + + /* + * For script mode we keep a bunch of stuff in a global since + * none if it is passed back and forth to SPDK. + */ + if (cli_context->cli_mode == CLI_MODE_SCRIPT) { + /* + * Now we'll build up the global which will direct this run of the app + * as it will have a list (g_script) of all of the commands line by + * line as if they were typed in on the shell at cmd line. + */ + parse_script(cli_context); + } + + /* Set default values in opts struct along with name and conf file. */ + spdk_app_opts_init(&opts); + opts.name = "blobcli"; + opts.config_file = cli_context->config_file; + + cli_context->app_started = true; + rc = spdk_app_start(&opts, cli_start, cli_context); + if (rc) { + printf("ERROR!\n"); + } + + /* Free up memory that we allocated */ + cli_cleanup(cli_context); + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/hello_world/.gitignore b/src/spdk/examples/blob/hello_world/.gitignore new file mode 100644 index 000000000..683a22556 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/.gitignore @@ -0,0 +1 @@ +hello_blob diff --git a/src/spdk/examples/blob/hello_world/Makefile b/src/spdk/examples/blob/hello_world/Makefile new file mode 100644 index 000000000..0b5e89396 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/Makefile @@ -0,0 +1,45 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_blob + +C_SRCS := hello_blob.c + +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += bdev accel event thread util conf trace \ + log jsonrpc json rpc sock notify + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/blob/hello_world/hello_blob.c b/src/spdk/examples/blob/hello_world/hello_blob.c new file mode 100644 index 000000000..41730ce86 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/hello_blob.c @@ -0,0 +1,498 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/blob_bdev.h" +#include "spdk/blob.h" +#include "spdk/log.h" + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + struct spdk_blob_store *bs; + struct spdk_blob *blob; + spdk_blob_id blobid; + struct spdk_io_channel *channel; + uint8_t *read_buff; + uint8_t *write_buff; + uint64_t io_unit_size; + int rc; +}; + +/* + * Free up memory that we allocated. + */ +static void +hello_cleanup(struct hello_context_t *hello_context) +{ + spdk_free(hello_context->read_buff); + spdk_free(hello_context->write_buff); + free(hello_context); +} + +/* + * Callback routine for the blobstore unload. + */ +static void +unload_complete(void *cb_arg, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + SPDK_ERRLOG("Error %d unloading the bobstore\n", bserrno); + hello_context->rc = bserrno; + } + + spdk_app_stop(hello_context->rc); +} + +/* + * Unload the blobstore, cleaning up as needed. + */ +static void +unload_bs(struct hello_context_t *hello_context, char *msg, int bserrno) +{ + if (bserrno) { + SPDK_ERRLOG("%s (err %d)\n", msg, bserrno); + hello_context->rc = bserrno; + } + if (hello_context->bs) { + if (hello_context->channel) { + spdk_bs_free_io_channel(hello_context->channel); + } + spdk_bs_unload(hello_context->bs, unload_complete, hello_context); + } else { + spdk_app_stop(bserrno); + } +} + +/* + * Callback routine for the deletion of a blob. + */ +static void +delete_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in delete completion", + bserrno); + return; + } + + /* We're all done, we can unload the blobstore. */ + unload_bs(hello_context, "", 0); +} + +/* + * Function for deleting a blob. + */ +static void +delete_blob(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in close completion", + bserrno); + return; + } + + spdk_bs_delete_blob(hello_context->bs, hello_context->blobid, + delete_complete, hello_context); +} + +/* + * Callback function for reading a blob. + */ +static void +read_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + int match_res = -1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in read completion", + bserrno); + return; + } + + /* Now let's make sure things match. */ + match_res = memcmp(hello_context->write_buff, hello_context->read_buff, + hello_context->io_unit_size); + if (match_res) { + unload_bs(hello_context, "Error in data compare", -1); + return; + } else { + SPDK_NOTICELOG("read SUCCESS and data matches!\n"); + } + + /* Now let's close it and delete the blob in the callback. */ + spdk_blob_close(hello_context->blob, delete_blob, hello_context); +} + +/* + * Function for reading a blob. + */ +static void +read_blob(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + + hello_context->read_buff = spdk_malloc(hello_context->io_unit_size, + 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (hello_context->read_buff == NULL) { + unload_bs(hello_context, "Error in memory allocation", + -ENOMEM); + return; + } + + /* Issue the read and compare the results in the callback. */ + spdk_blob_io_read(hello_context->blob, hello_context->channel, + hello_context->read_buff, 0, 1, read_complete, + hello_context); +} + +/* + * Callback function for writing a blob. + */ +static void +write_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in write completion", + bserrno); + return; + } + + /* Now let's read back what we wrote and make sure it matches. */ + read_blob(hello_context); +} + +/* + * Function for writing to a blob. + */ +static void +blob_write(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + + /* + * Buffers for data transfer need to be allocated via SPDK. We will + * transfer 1 io_unit of 4K aligned data at offset 0 in the blob. + */ + hello_context->write_buff = spdk_malloc(hello_context->io_unit_size, + 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (hello_context->write_buff == NULL) { + unload_bs(hello_context, "Error in allocating memory", + -ENOMEM); + return; + } + memset(hello_context->write_buff, 0x5a, hello_context->io_unit_size); + + /* Now we have to allocate a channel. */ + hello_context->channel = spdk_bs_alloc_io_channel(hello_context->bs); + if (hello_context->channel == NULL) { + unload_bs(hello_context, "Error in allocating channel", + -ENOMEM); + return; + } + + /* Let's perform the write, 1 io_unit at offset 0. */ + spdk_blob_io_write(hello_context->blob, hello_context->channel, + hello_context->write_buff, + 0, 1, write_complete, hello_context); +} + +/* + * Callback function for sync'ing metadata. + */ +static void +sync_complete(void *arg1, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in sync callback", + bserrno); + return; + } + + /* Blob has been created & sized & MD sync'd, let's write to it. */ + blob_write(hello_context); +} + +static void +resize_complete(void *cb_arg, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + uint64_t total = 0; + + if (bserrno) { + unload_bs(hello_context, "Error in blob resize", bserrno); + return; + } + + total = spdk_blob_get_num_clusters(hello_context->blob); + SPDK_NOTICELOG("resized blob now has USED clusters of %" PRIu64 "\n", + total); + + /* + * Metadata is stored in volatile memory for performance + * reasons and therefore needs to be synchronized with + * non-volatile storage to make it persistent. This can be + * done manually, as shown here, or if not it will be done + * automatically when the blob is closed. It is always a + * good idea to sync after making metadata changes unless + * it has an unacceptable impact on application performance. + */ + spdk_blob_sync_md(hello_context->blob, sync_complete, hello_context); +} + +/* + * Callback function for opening a blob. + */ +static void +open_complete(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + uint64_t free = 0; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in open completion", + bserrno); + return; + } + + + hello_context->blob = blob; + free = spdk_bs_free_cluster_count(hello_context->bs); + SPDK_NOTICELOG("blobstore has FREE clusters of %" PRIu64 "\n", + free); + + /* + * Before we can use our new blob, we have to resize it + * as the initial size is 0. For this example we'll use the + * full size of the blobstore but it would be expected that + * there'd usually be many blobs of various sizes. The resize + * unit is a cluster. + */ + spdk_blob_resize(hello_context->blob, free, resize_complete, hello_context); +} + +/* + * Callback function for creating a blob. + */ +static void +blob_create_complete(void *arg1, spdk_blob_id blobid, int bserrno) +{ + struct hello_context_t *hello_context = arg1; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error in blob create callback", + bserrno); + return; + } + + hello_context->blobid = blobid; + SPDK_NOTICELOG("new blob id %" PRIu64 "\n", hello_context->blobid); + + /* We have to open the blob before we can do things like resize. */ + spdk_bs_open_blob(hello_context->bs, hello_context->blobid, + open_complete, hello_context); +} + +/* + * Function for creating a blob. + */ +static void +create_blob(struct hello_context_t *hello_context) +{ + SPDK_NOTICELOG("entry\n"); + spdk_bs_create_blob(hello_context->bs, blob_create_complete, hello_context); +} + +/* + * Callback function for initializing the blobstore. + */ +static void +bs_init_complete(void *cb_arg, struct spdk_blob_store *bs, + int bserrno) +{ + struct hello_context_t *hello_context = cb_arg; + + SPDK_NOTICELOG("entry\n"); + if (bserrno) { + unload_bs(hello_context, "Error init'ing the blobstore", + bserrno); + return; + } + + hello_context->bs = bs; + SPDK_NOTICELOG("blobstore: %p\n", hello_context->bs); + /* + * We will use the io_unit size in allocating buffers, etc., later + * so we'll just save it in out context buffer here. + */ + hello_context->io_unit_size = spdk_bs_get_io_unit_size(hello_context->bs); + + /* + * The blobstore has been initialized, let's create a blob. + * Note that we could pass a message back to ourselves using + * spdk_thread_send_msg() if we wanted to keep our processing + * time limited. + */ + create_blob(hello_context); +} + +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1) +{ + struct hello_context_t *hello_context = arg1; + struct spdk_bdev *bdev = NULL; + struct spdk_bs_dev *bs_dev = NULL; + + SPDK_NOTICELOG("entry\n"); + /* + * Get the bdev. For this example it is our malloc (RAM) + * disk configured via hello_blob.conf that was passed + * in when we started the SPDK app framework so we can + * get it via its name. + */ + bdev = spdk_bdev_get_by_name("Malloc0"); + if (bdev == NULL) { + SPDK_ERRLOG("Could not find a bdev\n"); + spdk_app_stop(-1); + return; + } + + /* + * spdk_bs_init() requires us to fill out the structure + * spdk_bs_dev with a set of callbacks. These callbacks + * implement read, write, and other operations on the + * underlying disks. As a convenience, a utility function + * is provided that creates an spdk_bs_dev that implements + * all of the callbacks by forwarding the I/O to the + * SPDK bdev layer. Other helper functions are also + * available in the blob lib in blob_bdev.c that simply + * make it easier to layer blobstore on top of a bdev. + * However blobstore can be more tightly integrated into + * any lower layer, such as NVMe for example. + */ + bs_dev = spdk_bdev_create_bs_dev(bdev, NULL, NULL); + if (bs_dev == NULL) { + SPDK_ERRLOG("Could not create blob bdev!!\n"); + spdk_app_stop(-1); + return; + } + + spdk_bs_init(bs_dev, NULL, bs_init_complete, hello_context); +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t *hello_context = NULL; + + SPDK_NOTICELOG("entry\n"); + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + + /* + * Setup a few specifics before we init, for most SPDK cmd line + * apps, the config file will be passed in as an arg but to make + * this example super simple we just hardcode it. We also need to + * specify a name for the app. + */ + opts.name = "hello_blob"; + opts.json_config_file = argv[1]; + + + /* + * Now we'll allocate and initialize the blobstore itself. We + * can pass in an spdk_bs_opts if we want something other than + * the defaults (cluster size, etc), but here we'll just take the + * defaults. We'll also pass in a struct that we'll use for + * callbacks so we've got efficient bookeeping of what we're + * creating. This is an async operation and bs_init_complete() + * will be called when it is complete. + */ + hello_context = calloc(1, sizeof(struct hello_context_t)); + if (hello_context != NULL) { + /* + * spdk_app_start() will block running hello_start() until + * spdk_app_stop() is called by someone (not simply when + * hello_start() returns), or if an error occurs during + * spdk_app_start() before hello_start() runs. + */ + rc = spdk_app_start(&opts, hello_start, hello_context); + if (rc) { + SPDK_NOTICELOG("ERROR!\n"); + } else { + SPDK_NOTICELOG("SUCCESS!\n"); + } + /* Free up memory that we allocated */ + hello_cleanup(hello_context); + } else { + SPDK_ERRLOG("Could not alloc hello_context struct!!\n"); + rc = -ENOMEM; + } + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/blob/hello_world/hello_blob.json b/src/spdk/examples/blob/hello_world/hello_blob.json new file mode 100644 index 000000000..10ded9d47 --- /dev/null +++ b/src/spdk/examples/blob/hello_world/hello_blob.json @@ -0,0 +1,17 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_malloc_create", + "params": { + "name": "Malloc0", + "num_blocks": 32768, + "block_size": 512 + } + } + ] + } + ] +} diff --git a/src/spdk/examples/ioat/Makefile b/src/spdk/examples/ioat/Makefile new file mode 100644 index 000000000..d4d62b91e --- /dev/null +++ b/src/spdk/examples/ioat/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += perf verify + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/ioat/perf/.gitignore b/src/spdk/examples/ioat/perf/.gitignore new file mode 100644 index 000000000..60abaee49 --- /dev/null +++ b/src/spdk/examples/ioat/perf/.gitignore @@ -0,0 +1 @@ +ioat_perf diff --git a/src/spdk/examples/ioat/perf/Makefile b/src/spdk/examples/ioat/perf/Makefile new file mode 100644 index 000000000..5586b8120 --- /dev/null +++ b/src/spdk/examples/ioat/perf/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = ioat_perf + +C_SRCS := perf.c + +SPDK_LIB_LIST = ioat thread util log + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/ioat/perf/perf.c b/src/spdk/examples/ioat/perf/perf.c new file mode 100644 index 000000000..e2d94f268 --- /dev/null +++ b/src/spdk/examples/ioat/perf/perf.c @@ -0,0 +1,596 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/ioat.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" + +struct user_config { + int xfer_size_bytes; + int queue_depth; + int time_in_sec; + bool verify; + char *core_mask; + int ioat_chan_num; +}; + +struct ioat_device { + struct spdk_ioat_chan *ioat; + TAILQ_ENTRY(ioat_device) tailq; +}; + +static TAILQ_HEAD(, ioat_device) g_devices; +static struct ioat_device *g_next_device; + +static struct user_config g_user_config; + +struct ioat_chan_entry { + struct spdk_ioat_chan *chan; + int ioat_chan_id; + uint64_t xfer_completed; + uint64_t xfer_failed; + uint64_t current_queue_depth; + uint64_t waiting_for_flush; + uint64_t flush_threshold; + bool is_draining; + struct spdk_mempool *data_pool; + struct spdk_mempool *task_pool; + struct ioat_chan_entry *next; +}; + +struct worker_thread { + struct ioat_chan_entry *ctx; + struct worker_thread *next; + unsigned core; +}; + +struct ioat_task { + struct ioat_chan_entry *ioat_chan_entry; + void *src; + void *dst; +}; + +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; +static int g_ioat_chan_num = 0; + +static void submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task, + void *dst, void *src); + +static void +construct_user_config(struct user_config *self) +{ + self->xfer_size_bytes = 4096; + self->ioat_chan_num = 1; + self->queue_depth = 256; + self->time_in_sec = 10; + self->verify = false; + self->core_mask = "0x1"; +} + +static void +dump_user_config(struct user_config *self) +{ + printf("User configuration:\n"); + printf("Number of channels: %u\n", self->ioat_chan_num); + printf("Transfer size: %u bytes\n", self->xfer_size_bytes); + printf("Queue depth: %u\n", self->queue_depth); + printf("Run time: %u seconds\n", self->time_in_sec); + printf("Core mask: %s\n", self->core_mask); + printf("Verify: %s\n\n", self->verify ? "Yes" : "No"); +} + +static void +ioat_exit(void) +{ + struct ioat_device *dev; + + while (!TAILQ_EMPTY(&g_devices)) { + dev = TAILQ_FIRST(&g_devices); + TAILQ_REMOVE(&g_devices, dev, tailq); + if (dev->ioat) { + spdk_ioat_detach(dev->ioat); + } + spdk_dma_free(dev); + } +} + +static void +ioat_done(void *cb_arg) +{ + struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; + struct ioat_chan_entry *ioat_chan_entry = ioat_task->ioat_chan_entry; + + if (g_user_config.verify && memcmp(ioat_task->src, ioat_task->dst, g_user_config.xfer_size_bytes)) { + ioat_chan_entry->xfer_failed++; + } else { + ioat_chan_entry->xfer_completed++; + } + + ioat_chan_entry->current_queue_depth--; + + if (ioat_chan_entry->is_draining) { + spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->src); + spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->dst); + spdk_mempool_put(ioat_chan_entry->task_pool, ioat_task); + } else { + submit_single_xfer(ioat_chan_entry, ioat_task, ioat_task->dst, ioat_task->src); + } +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return 1; + } + + worker->core = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + struct ioat_chan_entry *entry, *entry1; + + /* Free ioat_chan_entry and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + entry = worker->ctx; + while (entry) { + entry1 = entry->next; + spdk_mempool_free(entry->data_pool); + spdk_mempool_free(entry->task_pool); + free(entry); + entry = entry1; + } + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) +{ + printf(" Found matching device at %04x:%02x:%02x.%x " + "vendor:0x%04x device:0x%04x\n", + spdk_pci_device_get_domain(pci_dev), + spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), + spdk_pci_device_get_func(pci_dev), + spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); + + return true; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) +{ + struct ioat_device *dev; + + if (g_ioat_chan_num >= g_user_config.ioat_chan_num) { + return; + } + + dev = spdk_dma_zmalloc(sizeof(*dev), 0, NULL); + if (dev == NULL) { + printf("Failed to allocate device struct\n"); + return; + } + + dev->ioat = ioat; + g_ioat_chan_num++; + TAILQ_INSERT_TAIL(&g_devices, dev, tailq); +} + +static int +ioat_init(void) +{ + TAILQ_INIT(&g_devices); + + if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { + fprintf(stderr, "ioat_probe() failed\n"); + return 1; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-h help message]\n"); + printf("\t[-c core mask for distributing I/O submission/completion work]\n"); + printf("\t[-q queue depth]\n"); + printf("\t[-n number of channels]\n"); + printf("\t[-o transfer size in bytes]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-v verify copy result if this switch is on]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + construct_user_config(&g_user_config); + while ((op = getopt(argc, argv, "c:hn:o:q:t:v")) != -1) { + switch (op) { + case 'o': + g_user_config.xfer_size_bytes = spdk_strtol(optarg, 10); + break; + case 'n': + g_user_config.ioat_chan_num = spdk_strtol(optarg, 10); + break; + case 'q': + g_user_config.queue_depth = spdk_strtol(optarg, 10); + break; + case 't': + g_user_config.time_in_sec = spdk_strtol(optarg, 10); + break; + case 'c': + g_user_config.core_mask = optarg; + break; + case 'v': + g_user_config.verify = true; + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return 1; + } + } + if (g_user_config.xfer_size_bytes <= 0 || g_user_config.queue_depth <= 0 || + g_user_config.time_in_sec <= 0 || !g_user_config.core_mask || + g_user_config.ioat_chan_num <= 0) { + usage(argv[0]); + return 1; + } + + return 0; +} + +static void +drain_io(struct ioat_chan_entry *ioat_chan_entry) +{ + spdk_ioat_flush(ioat_chan_entry->chan); + while (ioat_chan_entry->current_queue_depth > 0) { + spdk_ioat_process_events(ioat_chan_entry->chan); + } +} + +static void +submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task, void *dst, + void *src) +{ + ioat_task->ioat_chan_entry = ioat_chan_entry; + ioat_task->src = src; + ioat_task->dst = dst; + + spdk_ioat_build_copy(ioat_chan_entry->chan, ioat_task, ioat_done, dst, src, + g_user_config.xfer_size_bytes); + ioat_chan_entry->waiting_for_flush++; + if (ioat_chan_entry->waiting_for_flush >= ioat_chan_entry->flush_threshold) { + spdk_ioat_flush(ioat_chan_entry->chan); + ioat_chan_entry->waiting_for_flush = 0; + } + + ioat_chan_entry->current_queue_depth++; +} + +static int +submit_xfers(struct ioat_chan_entry *ioat_chan_entry, uint64_t queue_depth) +{ + while (queue_depth-- > 0) { + void *src = NULL, *dst = NULL; + struct ioat_task *ioat_task = NULL; + + src = spdk_mempool_get(ioat_chan_entry->data_pool); + dst = spdk_mempool_get(ioat_chan_entry->data_pool); + ioat_task = spdk_mempool_get(ioat_chan_entry->task_pool); + if (!ioat_task) { + fprintf(stderr, "Unable to get ioat_task\n"); + return 1; + } + + submit_single_xfer(ioat_chan_entry, ioat_task, dst, src); + } + return 0; +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ioat_chan_entry *t = NULL; + + printf("Starting thread on core %u\n", worker->core); + + tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); + + t = worker->ctx; + while (t != NULL) { + /* begin to submit transfers */ + t->waiting_for_flush = 0; + t->flush_threshold = g_user_config.queue_depth / 2; + if (submit_xfers(t, g_user_config.queue_depth) != 0) { + return 1; + } + t = t->next; + } + + while (1) { + t = worker->ctx; + while (t != NULL) { + spdk_ioat_process_events(t->chan); + t = t->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + t = worker->ctx; + while (t != NULL) { + /* begin to drain io */ + t->is_draining = true; + drain_io(t); + t = t->next; + } + + return 0; +} + +static int +init(void) +{ + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "ioat_perf"; + opts.core_mask = g_user_config.core_mask; + if (spdk_env_init(&opts) < 0) { + return 1; + } + + return 0; +} + +static int +dump_result(void) +{ + uint64_t total_completed = 0; + uint64_t total_failed = 0; + uint64_t total_xfer_per_sec, total_bw_in_MiBps; + struct worker_thread *worker = g_workers; + + printf("Channel_ID Core Transfers Bandwidth Failed\n"); + printf("-----------------------------------------------------------\n"); + while (worker != NULL) { + struct ioat_chan_entry *t = worker->ctx; + while (t) { + uint64_t xfer_per_sec = t->xfer_completed / g_user_config.time_in_sec; + uint64_t bw_in_MiBps = (t->xfer_completed * g_user_config.xfer_size_bytes) / + (g_user_config.time_in_sec * 1024 * 1024); + + total_completed += t->xfer_completed; + total_failed += t->xfer_failed; + + if (xfer_per_sec) { + printf("%10d%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n", + t->ioat_chan_id, worker->core, xfer_per_sec, + bw_in_MiBps, t->xfer_failed); + } + t = t->next; + } + worker = worker->next; + } + + total_xfer_per_sec = total_completed / g_user_config.time_in_sec; + total_bw_in_MiBps = (total_completed * g_user_config.xfer_size_bytes) / + (g_user_config.time_in_sec * 1024 * 1024); + + printf("===========================================================\n"); + printf("Total:%26" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n", + total_xfer_per_sec, total_bw_in_MiBps, total_failed); + + return total_failed ? 1 : 0; +} + +static struct spdk_ioat_chan * +get_next_chan(void) +{ + struct spdk_ioat_chan *chan; + + if (g_next_device == NULL) { + return NULL; + } + + chan = g_next_device->ioat; + + g_next_device = TAILQ_NEXT(g_next_device, tailq); + + return chan; +} + +static int +associate_workers_with_chan(void) +{ + struct spdk_ioat_chan *chan = get_next_chan(); + struct worker_thread *worker = g_workers; + struct ioat_chan_entry *t; + char buf_pool_name[30], task_pool_name[30]; + int i = 0; + + while (chan != NULL) { + t = calloc(1, sizeof(struct ioat_chan_entry)); + if (!t) { + return 1; + } + + t->ioat_chan_id = i; + snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%d", i); + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", i); + t->data_pool = spdk_mempool_create(buf_pool_name, + g_user_config.queue_depth * 2, /* src + dst */ + g_user_config.xfer_size_bytes, + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + t->task_pool = spdk_mempool_create(task_pool_name, + g_user_config.queue_depth, + sizeof(struct ioat_task), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (!t->data_pool || !t->task_pool) { + fprintf(stderr, "Could not allocate buffer pool.\n"); + spdk_mempool_free(t->data_pool); + spdk_mempool_free(t->task_pool); + free(t); + return 1; + } + printf("Associating ioat_channel %d with core %d\n", i, worker->core); + t->chan = chan; + t->next = worker->ctx; + worker->ctx = t; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + chan = get_next_chan(); + i++; + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + + if (parse_args(argc, argv) != 0) { + return 1; + } + + if (init() != 0) { + return 1; + } + + if (register_workers() != 0) { + rc = 1; + goto cleanup; + } + + if (ioat_init() != 0) { + rc = 1; + goto cleanup; + } + + if (g_ioat_chan_num == 0) { + printf("No channels found\n"); + rc = 1; + goto cleanup; + } + + if (g_user_config.ioat_chan_num > g_ioat_chan_num) { + printf("%d channels are requested, but only %d are found," + "so only test %d channels\n", g_user_config.ioat_chan_num, + g_ioat_chan_num, g_ioat_chan_num); + g_user_config.ioat_chan_num = g_ioat_chan_num; + } + + g_next_device = TAILQ_FIRST(&g_devices); + dump_user_config(&g_user_config); + + if (associate_workers_with_chan() != 0) { + rc = 1; + goto cleanup; + } + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->core != master_core) { + spdk_env_thread_launch_pinned(worker->core, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + if (rc != 0) { + goto cleanup; + } + + spdk_env_thread_wait_all(); + + rc = dump_result(); + +cleanup: + unregister_workers(); + ioat_exit(); + + return rc; +} diff --git a/src/spdk/examples/ioat/verify/.gitignore b/src/spdk/examples/ioat/verify/.gitignore new file mode 100644 index 000000000..0b5987362 --- /dev/null +++ b/src/spdk/examples/ioat/verify/.gitignore @@ -0,0 +1 @@ +verify diff --git a/src/spdk/examples/ioat/verify/Makefile b/src/spdk/examples/ioat/verify/Makefile new file mode 100644 index 000000000..50cfc6665 --- /dev/null +++ b/src/spdk/examples/ioat/verify/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = verify + +C_SRCS := verify.c + +SPDK_LIB_LIST = ioat thread util log + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/ioat/verify/verify.c b/src/spdk/examples/ioat/verify/verify.c new file mode 100644 index 000000000..0df41f69b --- /dev/null +++ b/src/spdk/examples/ioat/verify/verify.c @@ -0,0 +1,521 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/ioat.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#define SRC_BUFFER_SIZE (512*1024) + +enum ioat_task_type { + IOAT_COPY_TYPE, + IOAT_FILL_TYPE, +}; + +struct user_config { + int queue_depth; + int time_in_sec; + char *core_mask; +}; + +struct ioat_device { + struct spdk_ioat_chan *ioat; + TAILQ_ENTRY(ioat_device) tailq; +}; + +static TAILQ_HEAD(, ioat_device) g_devices; +static struct ioat_device *g_next_device; + +static struct user_config g_user_config; + +struct thread_entry { + struct spdk_ioat_chan *chan; + uint64_t xfer_completed; + uint64_t xfer_failed; + uint64_t fill_completed; + uint64_t fill_failed; + uint64_t current_queue_depth; + unsigned lcore_id; + bool is_draining; + bool init_failed; + struct spdk_mempool *data_pool; + struct spdk_mempool *task_pool; +}; + +struct ioat_task { + enum ioat_task_type type; + struct thread_entry *thread_entry; + void *buffer; + int len; + uint64_t fill_pattern; + void *src; + void *dst; +}; + +static __thread unsigned int seed = 0; + +static unsigned char *g_src; + +static void submit_single_xfer(struct ioat_task *ioat_task); + +static void +construct_user_config(struct user_config *self) +{ + self->queue_depth = 32; + self->time_in_sec = 10; + self->core_mask = "0x1"; +} + +static void +dump_user_config(struct user_config *self) +{ + printf("User configuration:\n"); + printf("Run time: %u seconds\n", self->time_in_sec); + printf("Core mask: %s\n", self->core_mask); + printf("Queue depth: %u\n", self->queue_depth); +} + +static void +ioat_exit(void) +{ + struct ioat_device *dev; + + while (!TAILQ_EMPTY(&g_devices)) { + dev = TAILQ_FIRST(&g_devices); + TAILQ_REMOVE(&g_devices, dev, tailq); + if (dev->ioat) { + spdk_ioat_detach(dev->ioat); + } + free(dev); + } +} +static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_task *ioat_task) +{ + int len; + uintptr_t src_offset; + uintptr_t dst_offset; + uint64_t fill_pattern; + + if (ioat_task->type == IOAT_FILL_TYPE) { + fill_pattern = rand_r(&seed); + fill_pattern = fill_pattern << 32 | rand_r(&seed); + + /* Ensure that the length of memset block is 8 Bytes aligned. + * In case the buffer crosses hugepage boundary and must be split, + * we also need to ensure 8 byte address alignment. We do it + * unconditionally to keep things simple. + */ + len = 8 + ((rand_r(&seed) % (SRC_BUFFER_SIZE - 16)) & ~0x7); + dst_offset = 8 + rand_r(&seed) % (SRC_BUFFER_SIZE - 8 - len); + ioat_task->fill_pattern = fill_pattern; + ioat_task->dst = (void *)(((uintptr_t)ioat_task->buffer + dst_offset) & ~0x7); + } else { + src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; + len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); + dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); + + memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); + ioat_task->src = (void *)((uintptr_t)g_src + src_offset); + ioat_task->dst = (void *)((uintptr_t)ioat_task->buffer + dst_offset); + } + ioat_task->len = len; + ioat_task->thread_entry = thread_entry; +} + +static void +ioat_done(void *cb_arg) +{ + char *value; + int i, failed = 0; + struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; + struct thread_entry *thread_entry = ioat_task->thread_entry; + + if (ioat_task->type == IOAT_FILL_TYPE) { + value = ioat_task->dst; + for (i = 0; i < ioat_task->len / 8; i++) { + if (memcmp(value, &ioat_task->fill_pattern, 8) != 0) { + thread_entry->fill_failed++; + failed = 1; + break; + } + value += 8; + } + if (!failed) { + thread_entry->fill_completed++; + } + } else { + if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { + thread_entry->xfer_failed++; + } else { + thread_entry->xfer_completed++; + } + } + + thread_entry->current_queue_depth--; + if (thread_entry->is_draining) { + spdk_mempool_put(thread_entry->data_pool, ioat_task->buffer); + spdk_mempool_put(thread_entry->task_pool, ioat_task); + } else { + prepare_ioat_task(thread_entry, ioat_task); + submit_single_xfer(ioat_task); + } +} + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) +{ + printf(" Found matching device at %04x:%02x:%02x.%x " + "vendor:0x%04x device:0x%04x\n", + spdk_pci_device_get_domain(pci_dev), + spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), + spdk_pci_device_get_func(pci_dev), + spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); + + return true; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) +{ + struct ioat_device *dev; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + printf("Failed to allocate device struct\n"); + return; + } + memset(dev, 0, sizeof(*dev)); + + dev->ioat = ioat; + TAILQ_INSERT_TAIL(&g_devices, dev, tailq); +} + +static int +ioat_init(void) +{ + TAILQ_INIT(&g_devices); + + if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { + fprintf(stderr, "ioat_probe() failed\n"); + return 1; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-h help message]\n"); + printf("\t[-c core mask for distributing I/O submission/completion work]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-q queue depth]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + construct_user_config(&g_user_config); + while ((op = getopt(argc, argv, "c:ht:q:")) != -1) { + switch (op) { + case 't': + g_user_config.time_in_sec = spdk_strtol(optarg, 10); + break; + case 'c': + g_user_config.core_mask = optarg; + break; + case 'q': + g_user_config.queue_depth = spdk_strtol(optarg, 10); + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return 1; + } + } + if (g_user_config.time_in_sec <= 0 || !g_user_config.core_mask || + g_user_config.queue_depth <= 0) { + usage(argv[0]); + return 1; + } + + return 0; +} + +static void +drain_xfers(struct thread_entry *thread_entry) +{ + while (thread_entry->current_queue_depth > 0) { + spdk_ioat_process_events(thread_entry->chan); + } +} + +static void +submit_single_xfer(struct ioat_task *ioat_task) +{ + if (ioat_task->type == IOAT_FILL_TYPE) + spdk_ioat_submit_fill(ioat_task->thread_entry->chan, ioat_task, ioat_done, + ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); + else + spdk_ioat_submit_copy(ioat_task->thread_entry->chan, ioat_task, ioat_done, + ioat_task->dst, ioat_task->src, ioat_task->len); + ioat_task->thread_entry->current_queue_depth++; +} + +static void +submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) +{ + while (queue_depth-- > 0) { + struct ioat_task *ioat_task = NULL; + ioat_task = spdk_mempool_get(thread_entry->task_pool); + ioat_task->buffer = spdk_mempool_get(thread_entry->data_pool); + + ioat_task->type = IOAT_COPY_TYPE; + if (spdk_ioat_get_dma_capabilities(thread_entry->chan) & SPDK_IOAT_ENGINE_FILL_SUPPORTED) { + if (queue_depth % 2) { + ioat_task->type = IOAT_FILL_TYPE; + } + } + prepare_ioat_task(thread_entry, ioat_task); + submit_single_xfer(ioat_task); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + char buf_pool_name[20], task_pool_name[20]; + struct thread_entry *t = (struct thread_entry *)arg; + + if (!t->chan) { + return 1; + } + + t->lcore_id = spdk_env_get_current_core(); + + snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%u", t->lcore_id); + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%u", t->lcore_id); + t->data_pool = spdk_mempool_create(buf_pool_name, g_user_config.queue_depth, SRC_BUFFER_SIZE, + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + t->task_pool = spdk_mempool_create(task_pool_name, g_user_config.queue_depth, + sizeof(struct ioat_task), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (!t->data_pool || !t->task_pool) { + fprintf(stderr, "Could not allocate buffer pool.\n"); + t->init_failed = true; + return 1; + } + + tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); + + submit_xfers(t, g_user_config.queue_depth); + while (spdk_get_ticks() < tsc_end) { + spdk_ioat_process_events(t->chan); + } + + t->is_draining = true; + drain_xfers(t); + + return 0; +} + +static int +init_src_buffer(void) +{ + int i; + + g_src = spdk_dma_zmalloc(SRC_BUFFER_SIZE, 512, NULL); + if (g_src == NULL) { + fprintf(stderr, "Allocate src buffer failed\n"); + return 1; + } + + for (i = 0; i < SRC_BUFFER_SIZE / 4; i++) { + memset((g_src + (4 * i)), i, 4); + } + + return 0; +} + +static int +init(void) +{ + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "verify"; + opts.core_mask = g_user_config.core_mask; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (init_src_buffer() != 0) { + fprintf(stderr, "Could not init src buffer\n"); + return 1; + } + if (ioat_init() != 0) { + fprintf(stderr, "Could not init ioat\n"); + return 1; + } + + return 0; +} + +static int +dump_result(struct thread_entry *threads, uint32_t num_threads) +{ + uint32_t i; + uint64_t total_completed = 0; + uint64_t total_failed = 0; + + for (i = 0; i < num_threads; i++) { + struct thread_entry *t = &threads[i]; + + if (!t->chan) { + continue; + } + + if (t->init_failed) { + total_failed++; + continue; + } + + total_completed += t->xfer_completed; + total_completed += t->fill_completed; + total_failed += t->xfer_failed; + total_failed += t->fill_failed; + if (total_completed || total_failed) + printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld\n", + t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); + } + return total_failed ? 1 : 0; +} + +static struct spdk_ioat_chan * +get_next_chan(void) +{ + struct spdk_ioat_chan *chan; + + if (g_next_device == NULL) { + fprintf(stderr, "Not enough ioat channels found. Check that ioat channels are bound\n"); + fprintf(stderr, "to uio_pci_generic or vfio-pci. scripts/setup.sh can help with this.\n"); + return NULL; + } + + chan = g_next_device->ioat; + + g_next_device = TAILQ_NEXT(g_next_device, tailq); + + return chan; +} + +static uint32_t +get_max_core(void) +{ + uint32_t i; + uint32_t max_core = 0; + + SPDK_ENV_FOREACH_CORE(i) { + if (i > max_core) { + max_core = i; + } + } + + return max_core; +} + +int +main(int argc, char **argv) +{ + uint32_t i, current_core; + struct thread_entry *threads; + uint32_t num_threads; + int rc; + + if (parse_args(argc, argv) != 0) { + return 1; + } + + if (init() != 0) { + return 1; + } + + dump_user_config(&g_user_config); + + g_next_device = TAILQ_FIRST(&g_devices); + + num_threads = get_max_core() + 1; + threads = calloc(num_threads, sizeof(*threads)); + if (!threads) { + fprintf(stderr, "Thread memory allocation failed\n"); + rc = 1; + goto cleanup; + } + + current_core = spdk_env_get_current_core(); + SPDK_ENV_FOREACH_CORE(i) { + if (i != current_core) { + threads[i].chan = get_next_chan(); + spdk_env_thread_launch_pinned(i, work_fn, &threads[i]); + } + } + + threads[current_core].chan = get_next_chan(); + if (work_fn(&threads[current_core]) != 0) { + rc = 1; + goto cleanup; + } + + spdk_env_thread_wait_all(); + rc = dump_result(threads, num_threads); + +cleanup: + spdk_dma_free(g_src); + ioat_exit(); + free(threads); + + return rc; +} diff --git a/src/spdk/examples/nvme/Makefile b/src/spdk/examples/nvme/Makefile new file mode 100644 index 000000000..14eeb9be7 --- /dev/null +++ b/src/spdk/examples/nvme/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world identify perf reconnect nvme_manage arbitration \ + hotplug cmb_copy abort + +DIRS-$(CONFIG_FIO_PLUGIN) += fio_plugin + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/nvme/abort/.gitignore b/src/spdk/examples/nvme/abort/.gitignore new file mode 100644 index 000000000..f7d13fd04 --- /dev/null +++ b/src/spdk/examples/nvme/abort/.gitignore @@ -0,0 +1 @@ +abort diff --git a/src/spdk/examples/nvme/abort/Makefile b/src/spdk/examples/nvme/abort/Makefile new file mode 100644 index 000000000..5073a842d --- /dev/null +++ b/src/spdk/examples/nvme/abort/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = abort + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/abort/abort.c b/src/spdk/examples/nvme/abort/abort.c new file mode 100644 index 000000000..728790513 --- /dev/null +++ b/src/spdk/examples/nvme/abort/abort.c @@ -0,0 +1,1144 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/likely.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + enum spdk_nvme_transport_type trtype; + + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint32_t num_io_requests; + uint64_t size_in_ios; + uint32_t block_size; + char name[1024]; +}; + +struct ctrlr_worker_ctx { + pthread_mutex_t mutex; + struct ctrlr_entry *entry; + uint64_t abort_submitted; + uint64_t abort_submit_failed; + uint64_t successful_abort; + uint64_t unsuccessful_abort; + uint64_t abort_failed; + uint64_t current_queue_depth; + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_worker_ctx *next; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_submitted; + uint64_t io_completed; + uint64_t io_aborted; + uint64_t io_failed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + struct spdk_nvme_qpair *qpair; + struct ctrlr_worker_ctx *ctrlr_ctx; + struct ns_worker_ctx *next; +}; + +struct perf_task { + struct ns_worker_ctx *ns_ctx; + void *buf; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct ctrlr_worker_ctx *ctrlr_ctx; + struct worker_thread *next; + unsigned lcore; +}; + +static const char *g_workload_type = "read"; +static struct ctrlr_entry *g_controllers; +static struct ns_entry *g_namespaces; +static int g_num_namespaces; +static struct worker_thread *g_workers; +static int g_num_workers; +static uint32_t g_master_core; + +static int g_abort_interval = 1; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_size_bytes = 131072; +static uint32_t g_max_io_size_blocks; +static int g_rw_percentage = -1; +static int g_is_random; +static int g_queue_depth = 128; +static int g_time_in_sec = 3; +static int g_dpdk_mem; +static int g_shm_id = -1; +static bool g_no_pci; +static bool g_warn; +static bool g_mix_specified; + +static const char *g_core_mask; + +struct trid_entry { + struct spdk_nvme_transport_id trid; + uint16_t nsid; + TAILQ_ENTRY(trid_entry) tailq; +}; + +static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list); + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *cpl); + +static int +build_nvme_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_transport_id *trid; + int res = 0; + + trid = spdk_nvme_ctrlr_get_transport_id(ctrlr); + + switch (trid->trtype) { + case SPDK_NVME_TRANSPORT_PCIE: + res = snprintf(name, length, "PCIE (%s)", trid->traddr); + break; + case SPDK_NVME_TRANSPORT_RDMA: + res = snprintf(name, length, "RDMA (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + case SPDK_NVME_TRANSPORT_TCP: + res = snprintf(name, length, "TCP (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + + default: + fprintf(stderr, "Unknown transport type %d\n", trid->trtype); + break; + } + return res; +} + +static void +build_nvme_ns_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) +{ + int res = 0; + + res = build_nvme_name(name, length, ctrlr); + if (res > 0) { + snprintf(name + res, length - res, " NSID %u", nsid); + } + +} + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + uint32_t max_xfer_size, entries, sector_size; + uint64_t ns_size; + struct spdk_nvme_io_qpair_opts opts; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + g_warn = true; + return; + } + + ns_size = spdk_nvme_ns_get_size(ns); + sector_size = spdk_nvme_ns_get_sector_size(ns); + + if (ns_size < g_io_size_bytes || sector_size > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + ns_size, spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + g_warn = true; + return; + } + + max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + /* NVMe driver may add additional entries based on + * stripe size and maximum transfer size, we assume + * 1 more entry be used for stripe. + */ + entries = (g_io_size_bytes - 1) / max_xfer_size + 2; + if ((g_queue_depth * entries) > opts.io_queue_size) { + printf("controller IO queue size %u less than required\n", + opts.io_queue_size); + printf("Consider using lower queue depth or small IO size because " + "IO requests may be queued at the NVMe driver.\n"); + } + /* For requests which have children requests, parent request itself + * will also occupy 1 entry. + */ + entries += 1; + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->num_io_requests = g_queue_depth * entries; + + entry->size_in_ios = ns_size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / sector_size; + + entry->block_size = spdk_nvme_ns_get_sector_size(ns); + + if (g_max_io_size_blocks < entry->io_size_blocks) { + g_max_io_size_blocks = entry->io_size_blocks; + } + + build_nvme_ns_name(entry->name, sizeof(entry->name), ctrlr, spdk_nvme_ns_get_id(ns)); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +unregister_namespaces(void) +{ + struct ns_entry *entry = g_namespaces; + + while (entry) { + struct ns_entry *next = entry->next; + free(entry); + entry = next; + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry) +{ + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + uint32_t nsid; + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + build_nvme_name(entry->name, sizeof(entry->name), ctrlr); + + entry->ctrlr = ctrlr; + entry->trtype = trid_entry->trid.trtype; + entry->next = g_controllers; + g_controllers = entry; + + if (trid_entry->nsid == 0) { + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + } else { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid); + if (!ns) { + perror("Namespace does not exist."); + exit(1); + } + + register_ns(ctrlr, ns); + } +} + +static void +abort_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct ctrlr_worker_ctx *ctrlr_ctx = ctx; + + ctrlr_ctx->current_queue_depth--; + if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) { + ctrlr_ctx->abort_failed++; + } else if ((cpl->cdw0 & 0x1) == 0) { + ctrlr_ctx->successful_abort++; + } else { + ctrlr_ctx->unsuccessful_abort++; + } +} + +static void +abort_task(struct perf_task *task) +{ + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ctrlr_worker_ctx *ctrlr_ctx = ns_ctx->ctrlr_ctx; + int rc; + + /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */ + pthread_mutex_lock(&ctrlr_ctx->mutex); + + rc = spdk_nvme_ctrlr_cmd_abort_ext(ctrlr_ctx->ctrlr, ns_ctx->qpair, task, abort_complete, + ctrlr_ctx); + + if (spdk_unlikely(rc != 0)) { + ctrlr_ctx->abort_submit_failed++; + } else { + ctrlr_ctx->current_queue_depth++; + ctrlr_ctx->abort_submitted++; + } + + pthread_mutex_unlock(&ctrlr_ctx->mutex); +} + +static __thread unsigned int seed = 0; + +static inline void +submit_single_io(struct perf_task *task) +{ + uint64_t offset_in_ios, lba; + int rc; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ns_entry *entry = ns_ctx->entry; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + lba = offset_in_ios * entry->io_size_blocks; + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && (rand_r(&seed) % 100) < g_rw_percentage)) { + rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, + lba, entry->io_size_blocks, io_complete, task, 0); + } else { + rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, + lba, entry->io_size_blocks, io_complete, task, 0); + } + + if (spdk_unlikely(rc != 0)) { + fprintf(stderr, "I/O submission failed\n"); + } else { + ns_ctx->current_queue_depth++; + ns_ctx->io_submitted++; + + if ((ns_ctx->io_submitted % g_abort_interval) == 0) { + abort_task(task); + } + } + +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct perf_task *task = ctx; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + + ns_ctx->current_queue_depth--; + if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) { + ns_ctx->io_failed++; + } else { + ns_ctx->io_completed++; + } + + /* is_draining indicates when time has expired for the test run and we are + * just waiting for the previously submitted I/O to complete. In this case, + * do not submit a new I/O to replace the one just completed. + */ + if (spdk_unlikely(ns_ctx->is_draining)) { + spdk_dma_free(task->buf); + free(task); + } else { + submit_single_io(task); + } +} + +static struct perf_task * +allocate_task(struct ns_worker_ctx *ns_ctx) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + fprintf(stderr, "Failed to allocate task\n"); + exit(1); + } + + task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL); + if (task->buf == NULL) { + free(task); + fprintf(stderr, "Failed to allocate task->buf\n"); + exit(1); + } + + task->ns_ctx = ns_ctx; + + return task; +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = allocate_task(ns_ctx); + submit_single_io(task); + } +} + +static int +work_fn(void *arg) +{ + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx; + struct ctrlr_worker_ctx *ctrlr_ctx; + struct ns_entry *ns_entry; + struct spdk_nvme_io_qpair_opts opts; + uint64_t tsc_end; + uint32_t unfinished_ctx; + + /* Allocate queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + ns_entry = ns_ctx->entry; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ns_entry->ctrlr, &opts, sizeof(opts)); + if (opts.io_queue_requests < ns_entry->num_io_requests) { + opts.io_queue_requests = ns_entry->num_io_requests; + } + + ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, &opts, sizeof(opts)); + if (ns_ctx->qpair == NULL) { + fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return 1; + } + + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); + ns_ctx = ns_ctx->next; + } + + if (worker->lcore == g_master_core) { + ctrlr_ctx = worker->ctrlr_ctx; + while (ctrlr_ctx) { + /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */ + pthread_mutex_lock(&ctrlr_ctx->mutex); + spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr); + pthread_mutex_unlock(&ctrlr_ctx->mutex); + ctrlr_ctx = ctrlr_ctx->next; + } + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + do { + unfinished_ctx = 0; + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (!ns_ctx->is_draining) { + ns_ctx->is_draining = true; + } + if (ns_ctx->current_queue_depth > 0) { + spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); + if (ns_ctx->current_queue_depth == 0) { + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); + } else { + unfinished_ctx++; + } + } + ns_ctx = ns_ctx->next; + } + } while (unfinished_ctx > 0); + + if (worker->lcore == g_master_core) { + do { + unfinished_ctx = 0; + + ctrlr_ctx = worker->ctrlr_ctx; + while (ctrlr_ctx != NULL) { + pthread_mutex_lock(&ctrlr_ctx->mutex); + if (ctrlr_ctx->current_queue_depth > 0) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr); + if (ctrlr_ctx->current_queue_depth > 0) { + unfinished_ctx++; + } + } + pthread_mutex_unlock(&ctrlr_ctx->mutex); + ctrlr_ctx = ctrlr_ctx->next; + } + } while (unfinished_ctx > 0); + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options", program_name); + + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-o io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 1)\n"); + printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n"); + printf("\t Format: 'key:value [key:value] ...'\n"); + printf("\t Keys:\n"); + printf("\t trtype Transport type (e.g. PCIe, RDMA)\n"); + printf("\t adrfam Address family (e.g. IPv4, IPv6)\n"); + printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n"); + printf("\t trsvcid Transport service identifier (e.g. 4420)\n"); + printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n"); + printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n"); + printf("\t[-s DPDK huge memory size in MB.]\n"); + printf("\t[-i shared memory group ID]\n"); + printf("\t[-a abort interval.]\n"); + printf("\t"); + spdk_log_usage(stdout, "-T"); +#ifdef DEBUG + printf("\t[-G enable debug logging]\n"); +#else + printf("\t[-G enable debug logging (flag disabled, must reconfigure with --enable-debug)\n"); +#endif +} + +static void +unregister_trids(void) +{ + struct trid_entry *trid_entry, *tmp; + + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) { + TAILQ_REMOVE(&g_trid_list, trid_entry, tailq); + free(trid_entry); + } +} + +static int +add_trid(const char *trid_str) +{ + struct trid_entry *trid_entry; + struct spdk_nvme_transport_id *trid; + char *ns; + + trid_entry = calloc(1, sizeof(*trid_entry)); + if (trid_entry == NULL) { + return -1; + } + + trid = &trid_entry->trid; + trid->trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) { + fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str); + free(trid_entry); + return 1; + } + + spdk_nvme_transport_id_populate_trstring(trid, + spdk_nvme_transport_id_trtype_str(trid->trtype)); + + ns = strcasestr(trid_str, "ns:"); + if (ns) { + char nsid_str[6]; /* 5 digits maximum in an nsid */ + int len; + int nsid; + + ns += 3; + + len = strcspn(ns, " \t\n"); + if (len > 5) { + fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n"); + free(trid_entry); + return 1; + } + + memcpy(nsid_str, ns, len); + nsid_str[len] = '\0'; + + nsid = spdk_strtol(nsid_str, 10); + if (nsid <= 0 || nsid > 65535) { + fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n"); + free(trid_entry); + return 1; + } + + trid_entry->nsid = (uint16_t)nsid; + } + + TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq); + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + int op; + long int val; + int rc; + + while ((op = getopt(argc, argv, "a:c:i:o:q:r:s:t:w:M:")) != -1) { + switch (op) { + case 'a': + case 'i': + case 'o': + case 'q': + case 's': + case 't': + case 'M': + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'a': + g_abort_interval = val; + break; + case 'i': + g_shm_id = val; + break; + case 'o': + g_io_size_bytes = val; + break; + case 'q': + g_queue_depth = val; + break; + case 's': + g_dpdk_mem = val; + break; + case 't': + g_time_in_sec = val; + break; + case 'M': + g_rw_percentage = val; + g_mix_specified = true; + break; + } + break; + case 'c': + g_core_mask = optarg; + break; + case 'r': + if (add_trid(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'w': + g_workload_type = optarg; + break; + case 'G': +#ifndef DEBUG + fprintf(stderr, "%s must be configured with --enable-debug for -G flag\n", + argv[0]); + usage(argv[0]); + return 1; +#else + spdk_log_set_flag("nvme"); + spdk_log_set_print_level(SPDK_LOG_DEBUG); + break; +#endif + case 'T': + rc = spdk_log_set_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -T flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_queue_depth) { + fprintf(stderr, "missing -q (queue size) operand\n"); + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + fprintf(stderr, "missing -o (block size) operand\n"); + usage(argv[0]); + return 1; + } + if (!g_workload_type) { + fprintf(stderr, "missing -t (test time in seconds) operand\n"); + usage(argv[0]); + return 1; + } + + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + if (strncmp(g_workload_type, "rand", 4) == 0) { + g_is_random = 1; + g_workload_type = &g_workload_type[4]; + } + + if (strcmp(g_workload_type, "read") == 0 || strcmp(g_workload_type, "write") == 0) { + g_rw_percentage = strcmp(g_workload_type, "read") == 0 ? 100 : 0; + if (g_mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } else if (strcmp(g_workload_type, "rw") == 0) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } else { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (TAILQ_EMPTY(&g_trid_list)) { + /* If no transport IDs specified, default to enumerating all local PCIe devices */ + add_trid("trtype:PCIe"); + } else { + struct trid_entry *trid_entry, *trid_entry_tmp; + + g_no_pci = true; + /* check whether there is local PCIe type */ + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) { + if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + g_no_pci = false; + break; + } + } + } + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + + /* Free namespace context and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + + printf("NS: %s I/O completed: %lu, failed: %lu\n", + ns_ctx->entry->name, ns_ctx->io_completed, ns_ctx->io_failed); + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + struct ctrlr_worker_ctx *ctrlr_ctx = worker->ctrlr_ctx; + + while (ctrlr_ctx) { + struct ctrlr_worker_ctx *next_ctrlr_ctx = ctrlr_ctx->next; + + printf("CTRLR: %s abort submitted %lu, failed to submit %lu\n", + ctrlr_ctx->entry->name, ctrlr_ctx->abort_submitted, + ctrlr_ctx->abort_submit_failed); + printf("\t success %lu, unsuccess %lu, failed %lu\n", + ctrlr_ctx->successful_abort, ctrlr_ctx->unsuccessful_abort, + ctrlr_ctx->abort_failed); + free(ctrlr_ctx); + ctrlr_ctx = next_ctrlr_ctx; + } + + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct trid_entry *trid_entry = cb_ctx; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("Attached to NVMe Controller at %s [%04x:%04x]\n", + trid->traddr, + pci_id.vendor_id, pci_id.device_id); + } + + register_ctrlr(ctrlr, trid_entry); +} + +static int +register_controllers(void) +{ + struct trid_entry *trid_entry; + + printf("Initializing NVMe Controllers\n"); + + TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) { + if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n", + trid_entry->trid.traddr); + return -1; + } + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_master_worker_with_ctrlr(void) +{ + struct ctrlr_entry *entry = g_controllers; + struct worker_thread *worker = g_workers; + struct ctrlr_worker_ctx *ctrlr_ctx; + + while (worker) { + if (worker->lcore == g_master_core) { + break; + } + worker = worker->next; + } + + if (!worker) { + return -1; + } + + while (entry) { + ctrlr_ctx = calloc(1, sizeof(struct ctrlr_worker_ctx)); + if (!ctrlr_ctx) { + return -1; + } + + pthread_mutex_init(&ctrlr_ctx->mutex, NULL); + ctrlr_ctx->entry = entry; + ctrlr_ctx->ctrlr = entry->ctrlr; + ctrlr_ctx->next = worker->ctrlr_ctx; + worker->ctrlr_ctx = ctrlr_ctx; + + entry = entry->next; + } + + return 0; +} + +static struct ctrlr_worker_ctx * +get_ctrlr_worker_ctx(struct spdk_nvme_ctrlr *ctrlr) +{ + struct worker_thread *worker = g_workers; + struct ctrlr_worker_ctx *ctrlr_ctx; + + while (worker != NULL) { + if (worker->lcore == g_master_core) { + break; + } + worker = worker->next; + } + + if (!worker) { + return NULL; + } + + ctrlr_ctx = worker->ctrlr_ctx; + + while (ctrlr_ctx != NULL) { + if (ctrlr_ctx->ctrlr == ctrlr) { + return ctrlr_ctx; + } + ctrlr_ctx = ctrlr_ctx->next; + } + + return NULL; +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = calloc(1, sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->ctrlr_ctx = get_ctrlr_worker_ctx(entry->ctrlr); + if (!ns_ctx->ctrlr_ctx) { + free(ns_ctx); + return -1; + } + + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "abort"; + opts.shm_id = g_shm_id; + if (g_core_mask) { + opts.core_mask = g_core_mask; + } + + if (g_dpdk_mem) { + opts.mem_size = g_dpdk_mem; + } + if (g_no_pci) { + opts.no_pci = g_no_pci; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + rc = -1; + goto cleanup; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + rc = -1; + goto cleanup; + } + + if (register_controllers() != 0) { + rc = -1; + goto cleanup; + } + + if (g_warn) { + printf("WARNING: Some requested NVMe devices were skipped\n"); + } + + if (g_num_namespaces == 0) { + fprintf(stderr, "No valid NVMe controllers found\n"); + goto cleanup; + } + + if (associate_master_worker_with_ctrlr() != 0) { + rc = -1; + goto cleanup; + } + + if (associate_workers_with_ns() != 0) { + rc = -1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + g_master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != g_master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + +cleanup: + unregister_trids(); + unregister_workers(); + unregister_namespaces(); + unregister_controllers(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/arbitration/.gitignore b/src/spdk/examples/nvme/arbitration/.gitignore new file mode 100644 index 000000000..f1d6e38dd --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/.gitignore @@ -0,0 +1 @@ +arbitration diff --git a/src/spdk/examples/nvme/arbitration/Makefile b/src/spdk/examples/nvme/arbitration/Makefile new file mode 100644 index 000000000..71cff76e6 --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = arbitration + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/arbitration/arbitration.c b/src/spdk/examples/nvme/arbitration/arbitration.c new file mode 100644 index 000000000..444076041 --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/arbitration.c @@ -0,0 +1,1158 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_intel_rw_latency_page latency_page; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint64_t size_in_ios; + char name[1024]; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + struct spdk_nvme_qpair *qpair; + struct ns_worker_ctx *next; +}; + +struct arb_task { + struct ns_worker_ctx *ns_ctx; + void *buf; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; + enum spdk_nvme_qprio qprio; +}; + +struct arb_context { + int shm_id; + int outstanding_commands; + int num_namespaces; + int num_workers; + int rw_percentage; + int is_random; + int queue_depth; + int time_in_sec; + int io_count; + uint8_t latency_tracking_enable; + uint8_t arbitration_mechanism; + uint8_t arbitration_config; + uint32_t io_size_bytes; + uint32_t max_completions; + uint64_t tsc_rate; + const char *core_mask; + const char *workload_type; +}; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct spdk_mempool *task_pool = NULL; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static struct worker_thread *g_workers = NULL; + +static struct feature features[SPDK_NVME_FEAT_ARBITRATION + 1] = {}; + +static struct arb_context g_arbitration = { + .shm_id = -1, + .outstanding_commands = 0, + .num_workers = 0, + .num_namespaces = 0, + .rw_percentage = 50, + .queue_depth = 64, + .time_in_sec = 60, + .io_count = 100000, + .latency_tracking_enable = 0, + .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, + .arbitration_config = 0, + .io_size_bytes = 131072, + .max_completions = 0, + /* Default 4 cores for urgent/high/medium/low */ + .core_mask = "0xf", + .workload_type = "randrw", +}; + +/* + * For weighted round robin arbitration mechanism, the smaller value between + * weight and burst will be picked to execute the commands in one queue. + */ +#define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 +#define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 +#define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 + +static void task_complete(struct arb_task *task); + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static const char *print_qprio(enum spdk_nvme_qprio); + + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || + spdk_nvme_ns_get_extended_sector_size(ns) > g_arbitration.io_size_bytes || + g_arbitration.io_size_bytes % spdk_nvme_ns_get_extended_sector_size(ns)) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_extended_sector_size(ns), + g_arbitration.io_size_bytes); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->nvme.ctrlr = ctrlr; + entry->nvme.ns = ns; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; + entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_arbitration.num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_arbitration.outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + uint32_t nsid; + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); + union spdk_nvme_cap_register cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + if ((g_arbitration.latency_tracking_enable != 0) && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; + nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR && + (cap.bits.ams & SPDK_NVME_CAP_AMS_WRR)) { + get_arb_feature(ctrlr); + + if (g_arbitration.arbitration_config != 0) { + set_arb_feature(ctrlr); + get_arb_feature(ctrlr); + } + } +} + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct ns_worker_ctx *ns_ctx) +{ + struct arb_task *task = NULL; + uint64_t offset_in_ios; + int rc; + struct ns_entry *entry = ns_ctx->entry; + + task = spdk_mempool_get(task_pool); + if (!task) { + fprintf(stderr, "Failed to get task from task_pool\n"); + exit(1); + } + + task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); + if (!task->buf) { + spdk_mempool_put(task_pool, task); + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + + task->ns_ctx = ns_ctx; + + if (g_arbitration.is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + if ((g_arbitration.rw_percentage == 100) || + (g_arbitration.rw_percentage != 0 && + ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { + rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } else { + rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static void +task_complete(struct arb_task *task) +{ + struct ns_worker_ctx *ns_ctx; + + ns_ctx = task->ns_ctx; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + + spdk_dma_free(task->buf); + spdk_mempool_put(task_pool, task); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!ns_ctx->is_draining) { + submit_single_io(ns_ctx); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct arb_task *)ctx); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + while (queue_depth-- > 0) { + submit_single_io(ns_ctx); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) +{ + struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; + struct spdk_nvme_io_qpair_opts opts; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + opts.qprio = qprio; + + ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); + if (!ns_ctx->qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return 1; + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); +} + +static void +cleanup(uint32_t task_count) +{ + struct ns_entry *entry = g_namespaces; + struct ns_entry *next_entry = NULL; + struct worker_thread *worker = g_workers; + struct worker_thread *next_worker = NULL; + + while (entry) { + next_entry = entry->next; + free(entry); + entry = next_entry; + }; + + while (worker) { + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + /* ns_worker_ctx is a list in the worker */ + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + next_worker = worker->next; + free(worker); + worker = next_worker; + }; + + if (spdk_mempool_count(task_pool) != (size_t)task_count) { + fprintf(stderr, "task_pool count is %zu but should be %u\n", + spdk_mempool_count(task_pool), task_count); + } + spdk_mempool_free(task_pool); +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + + printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); + + /* Allocate a queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_arbitration.queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + cleanup_ns_worker_ctx(ns_ctx); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-l enable latency tracking, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 0xf - 4 cores)]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-a arbitration mechanism, must be one of below]\n"); + printf("\t\t(0, 1, 2)]\n"); + printf("\t\t(0: default round robin mechanism)]\n"); + printf("\t\t(1: weighted round robin mechanism)]\n"); + printf("\t\t(2: vendor specific mechanism)]\n"); + printf("\t[-b enable arbitration user configuration, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-n subjected IOs for performance comparison]\n"); + printf("\t[-i shared memory group ID]\n"); +} + +static const char * +print_qprio(enum spdk_nvme_qprio qprio) +{ + switch (qprio) { + case SPDK_NVME_QPRIO_URGENT: + return "urgent priority queue"; + case SPDK_NVME_QPRIO_HIGH: + return "high priority queue"; + case SPDK_NVME_QPRIO_MEDIUM: + return "medium priority queue"; + case SPDK_NVME_QPRIO_LOW: + return "low priority queue"; + default: + return "invalid priority queue"; + } +} + + +static void +print_configuration(char *program_name) +{ + printf("%s run with configuration:\n", program_name); + printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", + program_name, + g_arbitration.queue_depth, + g_arbitration.io_size_bytes, + g_arbitration.workload_type, + g_arbitration.rw_percentage, + g_arbitration.latency_tracking_enable, + g_arbitration.time_in_sec, + g_arbitration.core_mask, + g_arbitration.max_completions, + g_arbitration.arbitration_mechanism, + g_arbitration.arbitration_config, + g_arbitration.io_count, + g_arbitration.shm_id); +} + + +static void +print_performance(void) +{ + float io_per_second, sent_all_io_in_secs; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; + sent_all_io_in_secs = g_arbitration.io_count / io_per_second; + printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", + ns_ctx->entry->name, worker->lcore, + io_per_second, sent_all_io_in_secs, g_arbitration.io_count); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + printf("========================================================\n"); + + printf("\n"); +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page.buckets_32us[i]) + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, + entry->latency_page.buckets_32us[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_1ms[i]) + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, + entry->latency_page.buckets_1ms[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page.buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page( + ctrlr->ctrlr, log_page, + SPDK_NVME_GLOBAL_NS_TAG, + &ctrlr->latency_page, + sizeof(struct spdk_nvme_intel_rw_latency_page), + 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_arbitration.outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", + ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_arbitration.outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_arbitration.latency_tracking_enable) { + if (g_arbitration.rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_arbitration.rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type = NULL; + int op = 0; + bool mix_specified = false; + long int val; + + while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) { + switch (op) { + case 'c': + g_arbitration.core_mask = optarg; + break; + case 'w': + g_arbitration.workload_type = optarg; + break; + case 'h': + case '?': + usage(argv[0]); + return 1; + default: + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'i': + g_arbitration.shm_id = val; + break; + case 'l': + g_arbitration.latency_tracking_enable = val; + break; + case 'm': + g_arbitration.max_completions = val; + break; + case 'q': + g_arbitration.queue_depth = val; + break; + case 's': + g_arbitration.io_size_bytes = val; + break; + case 't': + g_arbitration.time_in_sec = val; + break; + case 'M': + g_arbitration.rw_percentage = val; + mix_specified = true; + break; + case 'a': + g_arbitration.arbitration_mechanism = val; + break; + case 'b': + g_arbitration.arbitration_config = val; + break; + case 'n': + g_arbitration.io_count = val; + break; + default: + usage(argv[0]); + return -EINVAL; + } + } + } + + workload_type = g_arbitration.workload_type; + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_arbitration.rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_arbitration.rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_arbitration.is_random = 0; + } else { + g_arbitration.is_random = 1; + } + + if (g_arbitration.latency_tracking_enable != 0 && + g_arbitration.latency_tracking_enable != 1) { + fprintf(stderr, + "-l must be specified to value 0 or 1.\n"); + return 1; + } + + switch (g_arbitration.arbitration_mechanism) { + case SPDK_NVME_CC_AMS_RR: + case SPDK_NVME_CC_AMS_WRR: + case SPDK_NVME_CC_AMS_VS: + break; + default: + fprintf(stderr, + "-a must be specified to value 0, 1, or 7.\n"); + return 1; + } + + if (g_arbitration.arbitration_config != 0 && + g_arbitration.arbitration_config != 1) { + fprintf(stderr, + "-b must be specified to value 0 or 1.\n"); + return 1; + } else if (g_arbitration.arbitration_config == 1 && + g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { + fprintf(stderr, + "-a must be specified to 1 (WRR) together.\n"); + return 1; + } + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; + + g_workers = NULL; + g_arbitration.num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_arbitration.num_workers++; + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { + qprio++; + } + + worker->qprio = qprio & SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK; + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* Update with user specified arbitration configuration */ + opts->arb_mechanism = g_arbitration.arbitration_mechanism; + + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attached to %s\n", trid->traddr); + + /* Update with actual arbitration configuration in use */ + g_arbitration.arbitration_mechanism = opts->arb_mechanism; + + register_ctrlr(ctrlr); +} + +static int +register_controllers(void) +{ + printf("Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_arbitration.num_namespaces == 0) { + fprintf(stderr, "No valid namespaces to continue IO testing\n"); + return 1; + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + if (g_arbitration.latency_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return 1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + + g_arbitration.outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + struct feature *feature = &features[fid]; + + feature->valid = false; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10_bits.get_features.fid = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, feature); +} + +static void +get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + union spdk_nvme_cmd_cdw11 arb; + arb.feat_arbitration.raw = features[SPDK_NVME_FEAT_ARBITRATION].result; + + printf("Current Arbitration Configuration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (arb.feat_arbitration.bits.ab == SPDK_NVME_ARBITRATION_BURST_UNLIMITED) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << arb.feat_arbitration.bits.ab); + } + + printf("Low Priority Weight: %u\n", arb.feat_arbitration.bits.lpw + 1); + printf("Medium Priority Weight: %u\n", arb.feat_arbitration.bits.mpw + 1); + printf("High Priority Weight: %u\n", arb.feat_arbitration.bits.hpw + 1); + printf("\n"); + } +} + +static void +set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("set_feature(0x%02X) failed\n", fid); + feature->valid = false; + } else { + printf("Set Arbitration Feature Successfully\n"); + } + + g_arbitration.outstanding_commands--; +} + +static int +set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + struct spdk_nvme_cmd cmd = {}; + + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_ARBITRATION; + + g_arbitration.outstanding_commands = 0; + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + cmd.cdw11_bits.feat_arbitration.bits.ab = SPDK_NVME_ARBITRATION_BURST_UNLIMITED; + cmd.cdw11_bits.feat_arbitration.bits.lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT; + cmd.cdw11_bits.feat_arbitration.bits.mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT; + cmd.cdw11_bits.feat_arbitration.bits.hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT; + } + + ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, + set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); + if (ret) { + printf("Set Arbitration Feature: Failed 0x%x\n", ret); + return 1; + } + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { + printf("Set Arbitration Feature failed and use default configuration\n"); + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + char task_pool_name[30]; + uint32_t task_count; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "arb"; + opts.core_mask = g_arbitration.core_mask; + opts.shm_id = g_arbitration.shm_id; + if (spdk_env_init(&opts) < 0) { + return 1; + } + + g_arbitration.tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + return 1; + } + + if (register_controllers() != 0) { + return 1; + } + + if (associate_workers_with_ns() != 0) { + return 1; + } + + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); + + /* + * The task_count will be dynamically calculated based on the + * number of attached active namespaces, queue depth and number + * of cores (workers) involved in the IO perations. + */ + task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + task_count *= g_arbitration.queue_depth; + + task_pool = spdk_mempool_create(task_pool_name, task_count, + sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY); + if (task_pool == NULL) { + fprintf(stderr, "could not initialize task pool\n"); + return 1; + } + + print_configuration(argv[0]); + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + + unregister_controllers(); + + cleanup(task_count); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/cmb_copy/.gitignore b/src/spdk/examples/nvme/cmb_copy/.gitignore new file mode 100644 index 000000000..fce738032 --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/.gitignore @@ -0,0 +1 @@ +cmb_copy diff --git a/src/spdk/examples/nvme/cmb_copy/Makefile b/src/spdk/examples/nvme/cmb_copy/Makefile new file mode 100644 index 000000000..77a143abb --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Eideticom Inc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Eideticom Inc nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = cmb_copy + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/cmb_copy/cmb_copy.c b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c new file mode 100644 index 000000000..50eedcbba --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c @@ -0,0 +1,412 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Eideticom Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Eideticom Inc, nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/nvme.h" +#include "spdk/string.h" + +#define CMB_COPY_DELIM "-" +#define CMB_COPY_READ 0 +#define CMB_COPY_WRITE 1 + +struct nvme_io { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_transport_id trid; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + unsigned nsid; + unsigned slba; + unsigned nlbas; + uint32_t lba_size; + unsigned done; +}; + +struct cmb_t { + struct spdk_nvme_transport_id trid; + struct spdk_nvme_ctrlr *ctrlr; +}; + +struct config { + struct nvme_io read; + struct nvme_io write; + struct cmb_t cmb; + size_t copy_size; +}; + +static struct config g_config; + +/* Namespaces index from 1. Return 0 to invoke an error */ +static unsigned +get_nsid(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return g_config.read.nsid; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return g_config.write.nsid; + } + return 0; +} + +static int +get_rw(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return CMB_COPY_READ; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return CMB_COPY_WRITE; + } + return -1; +} + +static void +check_io(void *arg, const struct spdk_nvme_cpl *completion) +{ + int *rw = (unsigned *)arg; + + if (*rw == CMB_COPY_READ) { + g_config.read.done = 1; + } else { + g_config.write.done = 1; + } +} + +static int +cmb_copy(void) +{ + int rc = 0, rw; + void *buf; + size_t sz; + + /* Allocate QPs for the read and write controllers */ + g_config.read.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.read.ctrlr, NULL, 0); + g_config.write.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.write.ctrlr, NULL, 0); + if (g_config.read.qpair == NULL || g_config.read.qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return -ENOMEM; + } + + /* Allocate a buffer from our CMB */ + buf = spdk_nvme_ctrlr_map_cmb(g_config.cmb.ctrlr, &sz); + if (buf == NULL || sz < g_config.copy_size) { + printf("ERROR: buffer allocation failed\n"); + printf("Are you sure %s has a valid CMB?\n", + g_config.cmb.trid.traddr); + return -ENOMEM; + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + rw = CMB_COPY_READ; + /* Do the read to the CMB IO buffer */ + rc = spdk_nvme_ns_cmd_read(g_config.read.ns, g_config.read.qpair, buf, + g_config.read.slba, g_config.read.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + return -EIO; + } + while (!g_config.read.done) { + spdk_nvme_qpair_process_completions(g_config.read.qpair, 0); + } + + /* Do the write from the CMB IO buffer */ + rw = CMB_COPY_WRITE; + rc = spdk_nvme_ns_cmd_write(g_config.write.ns, g_config.write.qpair, buf, + g_config.write.slba, g_config.write.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + return -EIO; + } + while (!g_config.write.done) { + spdk_nvme_qpair_process_completions(g_config.write.qpair, 0); + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + /* Free CMB buffer */ + spdk_nvme_ctrlr_unmap_cmb(g_config.cmb.ctrlr); + + /* Free the queues */ + spdk_nvme_ctrlr_free_io_qpair(g_config.read.qpair); + spdk_nvme_ctrlr_free_io_qpair(g_config.write.qpair); + + return rc; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* We will only attach to the read or write controller */ + if (strcmp(trid->traddr, g_config.read.trid.traddr) && + strcmp(trid->traddr, g_config.write.trid.traddr)) { + printf("%s - not probed %s!\n", __func__, trid->traddr); + return 0; + } + + opts->use_cmb_sqs = false; + + printf("%s - probed %s!\n", __func__, trid->traddr); + return 1; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, get_nsid(trid)); + if (ns == NULL) { + fprintf(stderr, "Could not locate namespace %d on controller %s.\n", + get_nsid(trid), trid->traddr); + exit(-1); + } + if (get_rw(trid) == CMB_COPY_READ) { + g_config.read.ctrlr = ctrlr; + g_config.read.ns = ns; + g_config.read.lba_size = spdk_nvme_ns_get_sector_size(ns); + } else { + g_config.write.ctrlr = ctrlr; + g_config.write.ns = ns; + g_config.write.lba_size = spdk_nvme_ns_get_sector_size(ns); + } + printf("%s - attached %s!\n", __func__, trid->traddr); + + return; +} + +static void +usage(char *program_name) +{ + printf("%s options (all mandatory)", program_name); + printf("\n"); + printf("\t[-r NVMe read parameters]\n"); + printf("\t[-w NVMe write parameters]\n"); + printf("\t[-c CMB to use for data buffers]\n"); + printf("\n"); + printf("Read/Write params:\n"); + printf(" <pci id>-<namespace>-<start LBA>-<number of LBAs>\n"); +} + +static void +parse(char *in, struct nvme_io *io) +{ + char *tok = NULL; + long int val; + + tok = strtok(in, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + snprintf(&io->trid.traddr[0], SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + val = spdk_strtol(tok, 10); + if (val < 0) { + goto err; + } + io->nsid = (unsigned)val; + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + val = spdk_strtol(tok, 10); + if (val < 0) { + goto err; + } + io->slba = (unsigned)val; + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + val = spdk_strtol(tok, 10); + if (val < 0) { + goto err; + } + io->nlbas = (unsigned)val; + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok != NULL) { + goto err; + } + return; + +err: + fprintf(stderr, "%s: error parsing %s\n", __func__, in); + exit(-1); + +} + +static int +parse_args(int argc, char **argv) +{ + int op; + unsigned read = 0, write = 0, cmb = 0; + + while ((op = getopt(argc, argv, "r:w:c:")) != -1) { + switch (op) { + case 'r': + parse(optarg, &g_config.read); + read = 1; + break; + case 'w': + parse(optarg, &g_config.write); + write = 1; + break; + case 'c': + snprintf(g_config.cmb.trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", optarg); + cmb = 1; + break; + default: + usage(argv[0]); + return 1; + } + } + + if ((!read || !write || !cmb)) { + usage(argv[0]); + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc = 0; + struct spdk_env_opts opts; + + /* + * Parse the input arguments. For now we use the following + * format list: + * + * <pci id>-<namespace>-<start LBA>-<number of LBAs> + * + */ + rc = parse_args(argc, argv); + if (rc) { + fprintf(stderr, "Error in parse_args(): %d\n", + rc); + return -1; + } + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "cmb_copy"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + /* + * CMBs only apply to PCIe attached NVMe controllers so we + * only probe the PCIe bus. This is the default when we pass + * in NULL for the first argument. + */ + + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc) { + fprintf(stderr, "Error in spdk_nvme_probe(): %d\n", + rc); + return -1; + } + + /* + * For now enforce that the read and write controller are not + * the same. This avoids an internal only DMA. + */ + if (!strcmp(g_config.write.trid.traddr, g_config.read.trid.traddr)) { + fprintf(stderr, "Read and Write controllers must differ!\n"); + return -1; + } + + /* + * Perform a few sanity checks and set the buffer size for the + * CMB. + */ + if (g_config.read.nlbas * g_config.read.lba_size != + g_config.write.nlbas * g_config.write.lba_size) { + fprintf(stderr, "Read and write sizes do not match!\n"); + return -1; + } + g_config.copy_size = g_config.read.nlbas * g_config.read.lba_size; + + /* + * Get the ctrlr pointer for the CMB. For now we assume this + * is either the read or write NVMe controller though in + * theory that is not a necessary condition. + */ + + if (!strcmp(g_config.cmb.trid.traddr, g_config.read.trid.traddr)) { + g_config.cmb.ctrlr = g_config.read.ctrlr; + } + if (!strcmp(g_config.cmb.trid.traddr, g_config.write.trid.traddr)) { + g_config.cmb.ctrlr = g_config.write.ctrlr; + } + + /* + * Call the cmb_copy() function which performs the CMB + * based copy or returns an error code if it fails. + */ + rc = cmb_copy(); + if (rc) { + fprintf(stderr, "Error in spdk_cmb_copy(): %d\n", + rc); + return -1; + } + + return rc; +} diff --git a/src/spdk/examples/nvme/fio_plugin/.gitignore b/src/spdk/examples/nvme/fio_plugin/.gitignore new file mode 100644 index 000000000..1b0b36ac4 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/src/spdk/examples/nvme/fio_plugin/Makefile b/src/spdk/examples/nvme/fio_plugin/Makefile new file mode 100644 index 000000000..1f71802df --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/Makefile @@ -0,0 +1,51 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +FIO_PLUGIN := spdk_nvme + +C_SRCS = fio_plugin.c + +# Unable to combine the FIO plugin and the VPP socket abstraction (license incompatibility) +SPDK_LIB_LIST = $(filter-out sock_vpp,$(SOCK_MODULES_LIST)) +SPDK_LIB_LIST += nvme thread util log sock vmd jsonrpc json rpc + +ifeq ($(CONFIG_RDMA),y) +SPDK_LIB_LIST += rdma +endif + +include $(SPDK_ROOT_DIR)/mk/spdk.fio.mk diff --git a/src/spdk/examples/nvme/fio_plugin/README.md b/src/spdk/examples/nvme/fio_plugin/README.md new file mode 100644 index 000000000..e7a8b7c01 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/README.md @@ -0,0 +1,107 @@ +# Compiling fio + +First, clone the fio source repository from https://github.com/axboe/fio + + git clone https://github.com/axboe/fio + +Then check out the latest fio version and compile the code: + + make + +# Compiling SPDK + +First, clone the SPDK source repository from https://github.com/spdk/spdk + + git clone https://github.com/spdk/spdk + git submodule update --init + +Then, run the SPDK configure script to enable fio (point it to the root of the fio repository): + + cd spdk + ./configure --with-fio=/path/to/fio/repo <other configuration options> + +Finally, build SPDK: + + make + +**Note to advanced users**: These steps assume you're using the DPDK submodule. If you are using your +own version of DPDK, the fio plugin requires that DPDK be compiled with -fPIC. You can compile DPDK +with -fPIC by modifying your DPDK configuration file and adding the line: + + EXTRA_CFLAGS=-fPIC + +# Usage + +To use the SPDK fio plugin with fio, specify the plugin binary using LD_PRELOAD when running +fio and set ioengine=spdk in the fio configuration file (see example_config.fio in the same +directory as this README). + + LD_PRELOAD=<path to spdk repo>/build/fio/spdk_nvme fio + +To select NVMe devices, you pass an SPDK Transport Identifier string as the filename. These are in the +form: + + filename=key=value [key=value] ... ns=value + +Specifically, for local PCIe NVMe devices it will look like this: + + filename=trtype=PCIe traddr=0000.04.00.0 ns=1 + +And remote devices accessed via NVMe over Fabrics will look like this: + + filename=trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1 + +**Note**: The specification of the PCIe address should not use the normal ':' +and instead only use '.'. This is a limitation in fio - it splits filenames on +':'. Also, the NVMe namespaces start at 1, not 0, and the namespace must be +specified at the end of the string. + +Currently the SPDK fio plugin is limited to the thread usage model, so fio jobs must also specify thread=1 +when using the SPDK fio plugin. + +fio also currently has a race condition on shutdown if dynamically loading the ioengine by specifying the +engine's full path via the ioengine parameter - LD_PRELOAD is recommended to avoid this race condition. + +When testing random workloads, it is recommended to set norandommap=1. fio's random map +processing consumes extra CPU cycles which will degrade performance over time with +the fio_plugin since all I/O are submitted and completed on a single CPU core. + +When testing FIO on multiple NVMe SSDs with SPDK plugin, it is recommended to use multiple jobs in FIO configurion. +It has been observed that there are some performance gap between FIO(with SPDK plugin enabled) and SPDK perf +(examples/nvme/perf/perf) on testing multiple NVMe SSDs. If you use one job(i.e., use one CPU core) configured for +FIO test, the performance is worse than SPDK perf (also using one CPU core) against many NVMe SSDs. But if you use +multiple jobs for FIO test, the performance of FIO is similiar with SPDK perf. After analyzing this phenomenon, we +think that is caused by the FIO architecture. Mainly FIO can scale with multiple threads (i.e., using CPU cores), +but it is not good to use one thread against many I/O devices. + +# End-to-end Data Protection (Optional) + +Running with PI setting, following settings steps are required. +First, format device namespace with proper PI setting. For example: + + nvme format /dev/nvme0n1 -l 1 -i 1 -p 0 -m 1 + +In fio configure file, add PRACT and set PRCHK by flags(GUARD|REFTAG|APPTAG) properly. For example: + + pi_act=0 + pi_chk=GUARD + +Blocksize should be set as the sum of data and metadata. For example, if data blocksize is 512 Byte, host generated +PI metadata is 8 Byte, then blocksize in fio configure file should be 520 Byte: + + bs=520 + +The storage device may use a block format that requires separate metadata (DIX). In this scenario, the fio_plugin +will automatically allocate an extra 4KiB buffer per I/O to hold this metadata. For some cases, such as 512 byte +blocks with 32 metadata bytes per block and a 128KiB I/O size, 4KiB isn't large enough. In this case, the +`md_per_io_size` option may be specified to increase the size of the metadata buffer. + +Expose two options 'apptag' and 'apptag_mask', users can change them in the configuration file when using +application tag and application tag mask in end-to-end data protection. Application tag and application +tag mask are set to 0x1234 and 0xFFFF by default. + +# VMD (Optional) + +To enable VMD enumeration add enable_vmd flag in fio configuration file: + + enable_vmd=1 diff --git a/src/spdk/examples/nvme/fio_plugin/example_config.fio b/src/spdk/examples/nvme/fio_plugin/example_config.fio new file mode 100644 index 000000000..a8e62ccb9 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/example_config.fio @@ -0,0 +1,15 @@ +[global] +ioengine=spdk +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=4k + +[test] +numjobs=1 diff --git a/src/spdk/examples/nvme/fio_plugin/fio_plugin.c b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c new file mode 100644 index 000000000..7aabeb8cb --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c @@ -0,0 +1,1267 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/vmd.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/endian.h" +#include "spdk/dif.h" +#include "spdk/util.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +/* FreeBSD is missing CLOCK_MONOTONIC_RAW, + * so alternative is provided. */ +#ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC +#endif + +#define NVME_IO_ALIGN 4096 + +static bool g_spdk_env_initialized; +static int g_spdk_enable_sgl = 0; +static uint32_t g_spdk_sge_size = 4096; +static uint32_t g_spdk_bit_bucket_data_len = 0; +static uint32_t g_spdk_pract_flag; +static uint32_t g_spdk_prchk_flags; +static uint32_t g_spdk_md_per_io_size = 4096; +static uint16_t g_spdk_apptag; +static uint16_t g_spdk_apptag_mask; + +struct spdk_fio_options { + void *pad; /* off1 used in option descriptions may not be 0 */ + int enable_wrr; + int arbitration_burst; + int low_weight; + int medium_weight; + int high_weight; + int wrr_priority; + int mem_size; + int shm_id; + int enable_sgl; + int sge_size; + int bit_bucket_data_len; + char *hostnqn; + int pi_act; + char *pi_chk; + int md_per_io_size; + int apptag; + int apptag_mask; + char *digest_enable; + int enable_vmd; +}; + +struct spdk_fio_request { + struct io_u *io; + /** Offset in current iovec, fio only uses 1 vector */ + uint32_t iov_offset; + + /** Amount of data used for Bit Bucket SGL */ + uint32_t bit_bucket_data_len; + + /** Context for NVMe PI */ + struct spdk_dif_ctx dif_ctx; + /** Separate metadata buffer pointer */ + void *md_buf; + + struct spdk_fio_thread *fio_thread; + struct spdk_fio_qpair *fio_qpair; +}; + +struct spdk_fio_ctrlr { + struct spdk_nvme_transport_id tr_id; + struct spdk_nvme_ctrlr_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_fio_ctrlr *next; +}; + +static struct spdk_fio_ctrlr *g_ctrlr; +static int g_td_count; +static pthread_t g_ctrlr_thread_id = 0; +static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; +static bool g_error; + +struct spdk_fio_qpair { + struct fio_file *f; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + uint32_t io_flags; + bool nvme_pi_enabled; + /* True for DIF and false for DIX, and this is valid only if nvme_pi_enabled is true. */ + bool extended_lba; + /* True for protection info transferred at start of metadata, + * false for protection info transferred at end of metadata, and + * this is valid only if nvme_pi_enabled is true. + */ + bool md_start; + struct spdk_fio_qpair *next; + struct spdk_fio_ctrlr *fio_ctrlr; +}; + +struct spdk_fio_thread { + struct thread_data *td; + + struct spdk_fio_qpair *fio_qpair; + struct spdk_fio_qpair *fio_qpair_current; /* the current fio_qpair to be handled. */ + + struct io_u **iocq; /* io completion queue */ + unsigned int iocq_count; /* number of iocq entries filled by last getevents */ + unsigned int iocq_size; /* number of iocq entries allocated */ + struct fio_file *current_f; /* fio_file given by user */ + +}; + +static void * +spdk_fio_poll_ctrlrs(void *arg) +{ + struct spdk_fio_ctrlr *fio_ctrlr; + int oldstate; + int rc; + + /* Loop until the thread is cancelled */ + while (true) { + rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + pthread_mutex_lock(&g_mutex); + fio_ctrlr = g_ctrlr; + + while (fio_ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); + fio_ctrlr = fio_ctrlr->next; + } + + pthread_mutex_unlock(&g_mutex); + + rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_options *fio_options = td->eo; + + if (fio_options->hostnqn) { + snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); + } + + if (fio_options->enable_wrr) { + opts->arb_mechanism = SPDK_NVME_CC_AMS_WRR; + opts->arbitration_burst = fio_options->arbitration_burst; + opts->low_priority_weight = fio_options->low_weight; + opts->medium_priority_weight = fio_options->medium_weight; + opts->high_priority_weight = fio_options->high_weight; + } + + if (fio_options->digest_enable) { + if (strcasecmp(fio_options->digest_enable, "HEADER") == 0) { + opts->header_digest = true; + } else if (strcasecmp(fio_options->digest_enable, "DATA") == 0) { + opts->data_digest = true; + } else if (strcasecmp(fio_options->digest_enable, "BOTH") == 0) { + opts->header_digest = true; + opts->data_digest = true; + } + } + + return true; +} + +static struct spdk_fio_ctrlr * +get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) +{ + struct spdk_fio_ctrlr *fio_ctrlr = g_ctrlr; + while (fio_ctrlr) { + if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { + return fio_ctrlr; + } + + fio_ctrlr = fio_ctrlr->next; + } + + return NULL; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_nvme_io_qpair_opts qpopts; + struct spdk_fio_ctrlr *fio_ctrlr; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ns_data *nsdata; + struct fio_file *f = fio_thread->current_f; + uint32_t ns_id; + char *p; + long int tmp; + struct spdk_fio_options *fio_options = td->eo; + + p = strstr(f->file_name, "ns="); + if (p != NULL) { + tmp = spdk_strtol(p + 3, 10); + if (tmp <= 0) { + SPDK_ERRLOG("namespace id should be >=1, but was invalid: %ld\n", tmp); + g_error = true; + return; + } + ns_id = (uint32_t)tmp; + } else { + ns_id = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + if (ns_id == 0) { + /* The ctrlr has no active namespaces and we didn't specify any so nothing to do. */ + return; + } + } + + pthread_mutex_lock(&g_mutex); + fio_ctrlr = get_fio_ctrlr(trid); + /* it is a new ctrlr and needs to be added */ + if (!fio_ctrlr) { + /* Create an fio_ctrlr and add it to the list */ + fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); + if (!fio_ctrlr) { + SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); + g_error = true; + pthread_mutex_unlock(&g_mutex); + return; + } + fio_ctrlr->opts = *opts; + fio_ctrlr->ctrlr = ctrlr; + fio_ctrlr->tr_id = *trid; + fio_ctrlr->next = g_ctrlr; + g_ctrlr = fio_ctrlr; + } + pthread_mutex_unlock(&g_mutex); + + ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); + if (ns == NULL) { + SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + + if (!spdk_nvme_ns_is_active(ns)) { + SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + nsdata = spdk_nvme_ns_get_data(ns); + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if ((fio_qpair->f == f) || + ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && + (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { + /* Not the error case. Avoid duplicated connection */ + return; + } + fio_qpair = fio_qpair->next; + } + + /* create a new qpair */ + fio_qpair = calloc(1, sizeof(*fio_qpair)); + if (!fio_qpair) { + g_error = true; + SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); + return; + } + + spdk_nvme_ctrlr_get_default_io_qpair_opts(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); + qpopts.delay_cmd_submit = true; + if (fio_options->enable_wrr) { + qpopts.qprio = fio_options->wrr_priority; + } + + fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts)); + if (!fio_qpair->qpair) { + SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); + g_error = true; + free(fio_qpair); + return; + } + + fio_qpair->ns = ns; + fio_qpair->f = f; + fio_qpair->fio_ctrlr = fio_ctrlr; + fio_qpair->next = fio_thread->fio_qpair; + fio_thread->fio_qpair = fio_qpair; + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + assert(spdk_nvme_ns_get_pi_type(ns) != SPDK_NVME_FMT_NVM_PROTECTION_DISABLE); + fio_qpair->io_flags = g_spdk_pract_flag | g_spdk_prchk_flags; + fio_qpair->nvme_pi_enabled = true; + fio_qpair->md_start = nsdata->dps.md_start; + fio_qpair->extended_lba = spdk_nvme_ns_supports_extended_lba(ns); + fprintf(stdout, "PI type%u enabled with %s\n", spdk_nvme_ns_get_pi_type(ns), + fio_qpair->extended_lba ? "extended lba" : "separate metadata"); + } + + f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); + if (f->real_file_size <= 0) { + g_error = true; + SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); + return; + } + + f->filetype = FIO_TYPE_BLOCK; + fio_file_set_size_known(f); +} + +static void parse_prchk_flags(const char *prchk_str) +{ + if (!prchk_str) { + return; + } + + if (strstr(prchk_str, "GUARD") != NULL) { + g_spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(prchk_str, "REFTAG") != NULL) { + g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(prchk_str, "APPTAG") != NULL) { + g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } +} + +static void parse_pract_flag(int pract) +{ + if (pract == 1) { + g_spdk_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; + } else { + g_spdk_pract_flag = 0; + } +} + +/* Called once at initialization. This is responsible for gathering the size of + * each "file", which in our case are in the form + * 'key=value [key=value] ... ns=value' + * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' + * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ +static int spdk_fio_setup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + struct spdk_fio_options *fio_options = td->eo; + struct spdk_env_opts opts; + struct fio_file *f; + char *p; + int rc = 0; + struct spdk_nvme_transport_id trid; + struct spdk_fio_ctrlr *fio_ctrlr; + char *trid_info; + unsigned int i; + + /* we might be running in a daemonized FIO instance where standard + * input and output were closed and fds 0, 1, and 2 are reused + * for something important by FIO. We can't ensure we won't print + * anything (and so will our dependencies, e.g. DPDK), so abort early. + * (is_backend is an fio global variable) + */ + if (is_backend) { + char buf[1024]; + snprintf(buf, sizeof(buf), + "SPDK FIO plugin won't work with daemonized FIO server."); + fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf)); + return -1; + } + + if (!td->o.use_thread) { + log_err("spdk: must set thread=1 when using spdk plugin\n"); + return 1; + } + + pthread_mutex_lock(&g_mutex); + + fio_thread = calloc(1, sizeof(*fio_thread)); + assert(fio_thread != NULL); + + td->io_ops_data = fio_thread; + fio_thread->td = td; + + fio_thread->iocq_size = td->o.iodepth; + fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + if (!g_spdk_env_initialized) { + spdk_env_opts_init(&opts); + opts.name = "fio"; + opts.mem_size = fio_options->mem_size; + opts.shm_id = fio_options->shm_id; + g_spdk_enable_sgl = fio_options->enable_sgl; + g_spdk_sge_size = fio_options->sge_size; + g_spdk_bit_bucket_data_len = fio_options->bit_bucket_data_len; + parse_pract_flag(fio_options->pi_act); + g_spdk_md_per_io_size = spdk_max(fio_options->md_per_io_size, 4096); + g_spdk_apptag = (uint16_t)fio_options->apptag; + g_spdk_apptag_mask = (uint16_t)fio_options->apptag_mask; + parse_prchk_flags(fio_options->pi_chk); + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + free(fio_thread->iocq); + free(fio_thread); + fio_thread = NULL; + pthread_mutex_unlock(&g_mutex); + return 1; + } + g_spdk_env_initialized = true; + spdk_unaffinitize_thread(); + + /* Spawn a thread to continue polling the controllers */ + rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); + if (rc != 0) { + SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); + } + + if (fio_options->enable_vmd && spdk_vmd_init()) { + SPDK_ERRLOG("Failed to initialize VMD. Some NVMe devices can be unavailable.\n"); + } + } + pthread_mutex_unlock(&g_mutex); + + for_each_file(td, f, i) { + memset(&trid, 0, sizeof(trid)); + + trid.trtype = SPDK_NVME_TRANSPORT_PCIE; + + p = strstr(f->file_name, " ns="); + if (p != NULL) { + trid_info = strndup(f->file_name, p - f->file_name); + } else { + trid_info = strndup(f->file_name, strlen(f->file_name)); + } + + if (!trid_info) { + SPDK_ERRLOG("Failed to allocate space for trid_info\n"); + continue; + } + + rc = spdk_nvme_transport_id_parse(&trid, trid_info); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); + free(trid_info); + continue; + } + free(trid_info); + + if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_pci_addr pci_addr; + if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { + SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); + continue; + } + spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); + } else { + if (trid.subnqn[0] == '\0') { + snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", + SPDK_NVMF_DISCOVERY_NQN); + } + } + + fio_thread->current_f = f; + + pthread_mutex_lock(&g_mutex); + fio_ctrlr = get_fio_ctrlr(&trid); + pthread_mutex_unlock(&g_mutex); + if (fio_ctrlr) { + attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); + } else { + /* Enumerate all of the controllers */ + if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) { + SPDK_ERRLOG("spdk_nvme_probe() failed\n"); + continue; + } + } + + if (g_error) { + log_err("Failed to initialize spdk fio plugin\n"); + rc = 1; + break; + } + } + + pthread_mutex_lock(&g_mutex); + g_td_count++; + pthread_mutex_unlock(&g_mutex); + + return rc; +} + +static int spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL); + return td->orig_buffer == NULL; +} + +static void spdk_fio_iomem_free(struct thread_data *td) +{ + spdk_dma_free(td->orig_buffer); +} + +static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req; + + io_u->engine_data = NULL; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + + fio_req->md_buf = spdk_dma_zmalloc(g_spdk_md_per_io_size, NVME_IO_ALIGN, NULL); + if (fio_req->md_buf == NULL) { + fprintf(stderr, "Allocate %u metadata failed\n", g_spdk_md_per_io_size); + free(fio_req); + return 1; + } + + fio_req->io = io_u; + fio_req->fio_thread = fio_thread; + + io_u->engine_data = fio_req; + + return 0; +} + +static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + spdk_dma_free(fio_req->md_buf); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static int +fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t md_size, extended_lba_size, lba_count; + uint64_t lba; + struct iovec iov; + int rc; + + /* Set appmask and apptag when PRACT is enabled */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; + fio_req->dif_ctx.app_tag = g_spdk_apptag; + return 0; + } + + extended_lba_size = spdk_nvme_ns_get_extended_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + rc = spdk_dif_ctx_init(&fio_req->dif_ctx, extended_lba_size, md_size, + true, fio_qpair->md_start, + (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), + fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 0, 0); + if (rc != 0) { + fprintf(stderr, "Initialization of DIF context failed\n"); + return rc; + } + + if (io_u->ddir != DDIR_WRITE) { + return 0; + } + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + rc = spdk_dif_generate(&iov, 1, lba_count, &fio_req->dif_ctx); + if (rc != 0) { + fprintf(stderr, "Generation of DIF failed\n"); + } + + return rc; +} + +static int +fio_separate_md_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t md_size, block_size, lba_count; + uint64_t lba; + struct iovec iov, md_iov; + int rc; + + /* Set appmask and apptag when PRACT is enabled */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask; + fio_req->dif_ctx.app_tag = g_spdk_apptag; + return 0; + } + + block_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + lba = io_u->offset / block_size; + lba_count = io_u->xfer_buflen / block_size; + + rc = spdk_dif_ctx_init(&fio_req->dif_ctx, block_size, md_size, + false, fio_qpair->md_start, + (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), + fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 0, 0); + if (rc != 0) { + fprintf(stderr, "Initialization of DIF context failed\n"); + return rc; + } + + if (io_u->ddir != DDIR_WRITE) { + return 0; + } + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + md_iov.iov_base = fio_req->md_buf; + md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); + rc = spdk_dix_generate(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx); + if (rc < 0) { + fprintf(stderr, "Generation of DIX failed\n"); + } + + return rc; +} + +static int +fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t lba_count; + struct iovec iov; + struct spdk_dif_error err_blk = {}; + int rc; + + /* Do nothing when PRACT is enabled */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return 0; + } + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_extended_sector_size(ns); + + rc = spdk_dif_verify(&iov, 1, lba_count, &fio_req->dif_ctx, &err_blk); + if (rc != 0) { + fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + + return rc; +} + +static int +fio_separate_md_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t md_size, lba_count; + struct iovec iov, md_iov; + struct spdk_dif_error err_blk = {}; + int rc; + + /* Do nothing when PRACT is enabled */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return 0; + } + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + md_iov.iov_base = fio_req->md_buf; + md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); + + rc = spdk_dix_verify(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx, &err_blk); + if (rc != 0) { + fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + + return rc; +} + +static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_fio_request *fio_req = ctx; + struct spdk_fio_thread *fio_thread = fio_req->fio_thread; + struct spdk_fio_qpair *fio_qpair = fio_req->fio_qpair; + int rc; + + if (fio_qpair->nvme_pi_enabled && fio_req->io->ddir == DDIR_READ) { + if (fio_qpair->extended_lba) { + rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io); + } else { + rc = fio_separate_md_verify_pi(fio_qpair, fio_req->io); + } + if (rc != 0) { + fio_req->io->error = abs(rc); + } + } + + assert(fio_thread->iocq_count < fio_thread->iocq_size); + fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; +} + +static void +spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + + fio_req->iov_offset = sgl_offset; + fio_req->bit_bucket_data_len = 0; +} + +static int +spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + struct io_u *io_u = fio_req->io; + uint32_t iov_len; + uint32_t bit_bucket_len; + + *address = io_u->buf; + + if (fio_req->iov_offset) { + assert(fio_req->iov_offset <= io_u->xfer_buflen); + *address += fio_req->iov_offset; + } + + iov_len = io_u->xfer_buflen - fio_req->iov_offset; + if (iov_len > g_spdk_sge_size) { + iov_len = g_spdk_sge_size; + } + + if ((fio_req->bit_bucket_data_len < g_spdk_bit_bucket_data_len) && (io_u->ddir == DDIR_READ)) { + assert(g_spdk_bit_bucket_data_len < io_u->xfer_buflen); + *address = (void *)UINT64_MAX; + bit_bucket_len = g_spdk_bit_bucket_data_len - fio_req->bit_bucket_data_len; + if (iov_len > bit_bucket_len) { + iov_len = bit_bucket_len; + } + fio_req->bit_bucket_data_len += iov_len; + } + + fio_req->iov_offset += iov_len; + *length = iov_len; + + return 0; +} + +#if FIO_IOOPS_VERSION >= 24 +typedef enum fio_q_status fio_q_status_t; +#else +typedef int fio_q_status_t; +#endif + +static fio_q_status_t +spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns = NULL; + void *md_buf = NULL; + struct spdk_dif_ctx *dif_ctx = &fio_req->dif_ctx; + uint32_t block_size; + uint64_t lba; + uint32_t lba_count; + + /* Find the namespace that corresponds to the file in the io_u */ + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if (fio_qpair->f == io_u->file) { + ns = fio_qpair->ns; + break; + } + fio_qpair = fio_qpair->next; + } + if (fio_qpair == NULL || ns == NULL) { + return -ENXIO; + } + if (fio_qpair->nvme_pi_enabled && !fio_qpair->extended_lba) { + md_buf = fio_req->md_buf; + } + fio_req->fio_qpair = fio_qpair; + + block_size = spdk_nvme_ns_get_extended_sector_size(ns); + if ((fio_qpair->io_flags & g_spdk_pract_flag) && (spdk_nvme_ns_get_md_size(ns) == 8)) { + /* If metadata size = 8 bytes, PI is stripped (read) or inserted (write), and + * so reduce metadata size from block size. (If metadata size > 8 bytes, PI + * is passed (read) or replaced (write). So block size is not necessary to + * change.) + */ + block_size = spdk_nvme_ns_get_sector_size(ns); + } + + lba = io_u->offset / block_size; + lba_count = io_u->xfer_buflen / block_size; + + /* TODO: considering situations that fio will randomize and verify io_u */ + if (fio_qpair->nvme_pi_enabled) { + if (fio_qpair->extended_lba) { + rc = fio_extended_lba_setup_pi(fio_qpair, io_u); + } else { + rc = fio_separate_md_setup_pi(fio_qpair, io_u); + } + if (rc < 0) { + io_u->error = -rc; + return FIO_Q_COMPLETED; + } + } + + switch (io_u->ddir) { + case DDIR_READ: + if (!g_spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); + } else { + rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, + dif_ctx->apptag_mask, dif_ctx->app_tag); + } + break; + case DDIR_WRITE: + if (!g_spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, + lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); + } else { + rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf, + dif_ctx->apptag_mask, dif_ctx->app_tag); + } + break; + default: + assert(false); + break; + } + + /* NVMe read/write functions return -ENOMEM if there are no free requests. */ + if (rc == -ENOMEM) { + return FIO_Q_BUSY; + } + + if (rc != 0) { + io_u->error = abs(rc); + return FIO_Q_COMPLETED; + } + + return FIO_Q_QUEUED; +} + +static struct io_u *spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(event >= 0); + assert((unsigned)event < fio_thread->iocq_count); + return fio_thread->iocq[event]; +} + +static int spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair = NULL; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * 1000000000L + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->iocq_count = 0; + + /* fetch the next qpair */ + if (fio_thread->fio_qpair_current) { + fio_qpair = fio_thread->fio_qpair_current->next; + } + + for (;;) { + if (fio_qpair == NULL) { + fio_qpair = fio_thread->fio_qpair; + } + + while (fio_qpair != NULL) { + spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); + + if (fio_thread->iocq_count >= min) { + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; + } + + fio_qpair = fio_qpair->next; + } + + if (t) { + uint64_t elapse; + + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; +} + +static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static void spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; + struct spdk_fio_options *fio_options = td->eo; + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); + fio_qpair_tmp = fio_qpair->next; + free(fio_qpair); + fio_qpair = fio_qpair_tmp; + } + + free(fio_thread->iocq); + free(fio_thread); + + pthread_mutex_lock(&g_mutex); + g_td_count--; + if (g_td_count == 0) { + struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; + + fio_ctrlr = g_ctrlr; + while (fio_ctrlr != NULL) { + spdk_nvme_detach(fio_ctrlr->ctrlr); + fio_ctrlr_tmp = fio_ctrlr->next; + free(fio_ctrlr); + fio_ctrlr = fio_ctrlr_tmp; + } + g_ctrlr = NULL; + + if (fio_options->enable_vmd) { + spdk_vmd_fini(); + } + } + pthread_mutex_unlock(&g_mutex); + if (!g_ctrlr) { + if (pthread_cancel(g_ctrlr_thread_id) == 0) { + pthread_join(g_ctrlr_thread_id, NULL); + } + } +} + +/* This function enables addition of SPDK parameters to the fio config + * Adding new parameters by defining them here and defining a callback + * function to read the parameter value. */ +static struct fio_option options[] = { + { + .name = "enable_wrr", + .lname = "Enable weighted round robin (WRR) for IO submission queues", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, enable_wrr), + .def = "0", + .help = "Enable weighted round robin (WRR) for IO submission queues", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "arbitration_burst", + .lname = "Arbitration Burst", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, arbitration_burst), + .def = "0", + .help = "Arbitration Burst used for WRR (valid range from 0 - 7)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "low_weight", + .lname = "low_weight for WRR", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, low_weight), + .def = "0", + .help = "low_weight used for WRR (valid range from 0 - 255)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "medium_weight", + .lname = "medium_weight for WRR", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, medium_weight), + .def = "0", + .help = "medium weight used for WRR (valid range from 0 - 255)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "high_weight", + .lname = "high_weight for WRR", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, high_weight), + .def = "0", + .help = "high weight used for WRR (valid range from 0 - 255)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "wrr_priority", + .lname = "priority used for WRR", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, wrr_priority), + .def = "0", + .help = "priority used for WRR (valid range from 0-3)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "mem_size_mb", + .lname = "Memory size in MB", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, mem_size), + .def = "0", + .help = "Memory Size for SPDK (MB)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "shm_id", + .lname = "shared memory ID", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, shm_id), + .def = "-1", + .help = "Shared Memory ID", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "enable_sgl", + .lname = "SGL used for I/O commands", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, enable_sgl), + .def = "0", + .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "sge_size", + .lname = "SGL size used for I/O commands", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, sge_size), + .def = "4096", + .help = "SGL size in bytes for I/O Commands (default 4096)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "bit_bucket_data_len", + .lname = "Amount of data used for Bit Bucket", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, bit_bucket_data_len), + .def = "0", + .help = "Bit Bucket Data Length for READ commands (disabled by default)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "hostnqn", + .lname = "Host NQN to use when connecting to controllers.", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, hostnqn), + .help = "Host NQN", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_act", + .lname = "Protection Information Action", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, pi_act), + .def = "1", + .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_chk", + .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, pi_chk), + .def = NULL, + .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "md_per_io_size", + .lname = "Separate Metadata Buffer Size per I/O", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, md_per_io_size), + .def = "4096", + .help = "Size of separate metadata buffer per I/O (Default: 4096)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "apptag", + .lname = "Application Tag used in Protection Information", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, apptag), + .def = "0x1234", + .help = "Application Tag used in Protection Information field (Default: 0x1234)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "apptag_mask", + .lname = "Application Tag Mask", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, apptag_mask), + .def = "0xffff", + .help = "Application Tag Mask used with Application Tag (Default: 0xffff)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "digest_enable", + .lname = "PDU digest choice for NVMe/TCP Transport(NONE|HEADER|DATA|BOTH)", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, digest_enable), + .def = NULL, + .help = "Control the NVMe/TCP control(digest_enable=NONE|HEADER|DATA|BOTH)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "enable_vmd", + .lname = "Enable VMD enumeration", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, enable_vmd), + .def = "0", + .help = "Enable VMD enumeration (enable_vmd=1 or enable_vmd=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = NULL, + }, +}; + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk", + .version = FIO_IOOPS_VERSION, + .queue = spdk_fio_queue, + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .setup = spdk_fio_setup, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, + .options = options, + .option_struct_size = sizeof(struct spdk_fio_options), +}; + +static void fio_init fio_spdk_register(void) +{ + register_ioengine(&ioengine); +} + +static void fio_exit fio_spdk_unregister(void) +{ + unregister_ioengine(&ioengine); +} diff --git a/src/spdk/examples/nvme/fio_plugin/full_bench.fio b/src/spdk/examples/nvme/fio_plugin/full_bench.fio new file mode 100644 index 000000000..4dea21d13 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/full_bench.fio @@ -0,0 +1,40 @@ +[global] +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +disable_slat=1 +disable_bw=1 +lat_percentiles=1 +clat_percentiles=0 +percentile_list=50:99:99.999 + +[precondition-sequential] +stonewall +description="Sequentially write to the device twice" +rw=write +iodepth=128 +bs=128k +loops=2 + +[4k_randwrite_qd1] +stonewall +description="4KiB Random Write QD=1" +bs=4k +rw=randwrite +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 + +[4k_randread_qd1] +stonewall +description="4KiB Random Read QD=1" +bs=4k +rw=randread +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 diff --git a/src/spdk/examples/nvme/fio_plugin/mock_sgl_config.fio b/src/spdk/examples/nvme/fio_plugin/mock_sgl_config.fio new file mode 100644 index 000000000..713fce0a2 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/mock_sgl_config.fio @@ -0,0 +1,17 @@ +[global] +ioengine=spdk +thread=1 +group_reporting=1 +direct=1 +enable_sgl=1 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=16k +verify=md5 +verify_backlog=32 + +[test] +numjobs=1 diff --git a/src/spdk/examples/nvme/hello_world/.gitignore b/src/spdk/examples/nvme/hello_world/.gitignore new file mode 100644 index 000000000..242c034c1 --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/.gitignore @@ -0,0 +1 @@ +hello_world diff --git a/src/spdk/examples/nvme/hello_world/Makefile b/src/spdk/examples/nvme/hello_world/Makefile new file mode 100644 index 000000000..bbb3527cb --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = hello_world + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hello_world/hello_world.c b/src/spdk/examples/nvme/hello_world/hello_world.c new file mode 100644 index 000000000..6e1d9d62a --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/hello_world.c @@ -0,0 +1,435 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/vmd.h" +#include "spdk/env.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct ns_entry *next; + struct spdk_nvme_qpair *qpair; +}; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; + +static bool g_vmd = false; + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + + if (!spdk_nvme_ns_is_active(ns)) { + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->next = g_namespaces; + g_namespaces = entry; + + printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns) / 1000000000); +} + +struct hello_world_sequence { + struct ns_entry *ns_entry; + char *buf; + unsigned using_cmb_io; + int is_completed; +}; + +static void +read_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + + /* Assume the I/O was successful */ + sequence->is_completed = 1; + /* See if an error occurred. If so, display information + * about it, and set completion value so that I/O + * caller is aware that an error occurred. + */ + if (spdk_nvme_cpl_is_error(completion)) { + spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair, (struct spdk_nvme_cpl *)completion); + fprintf(stderr, "I/O error status: %s\n", spdk_nvme_cpl_get_status_string(&completion->status)); + fprintf(stderr, "Read I/O failed, aborting run\n"); + sequence->is_completed = 2; + } + + /* + * The read I/O has completed. Print the contents of the + * buffer, free the buffer, then mark the sequence as + * completed. This will trigger the hello_world() function + * to exit its polling loop. + */ + printf("%s", sequence->buf); + spdk_free(sequence->buf); +} + +static void +write_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + struct ns_entry *ns_entry = sequence->ns_entry; + int rc; + + /* See if an error occurred. If so, display information + * about it, and set completion value so that I/O + * caller is aware that an error occurred. + */ + if (spdk_nvme_cpl_is_error(completion)) { + spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair, (struct spdk_nvme_cpl *)completion); + fprintf(stderr, "I/O error status: %s\n", spdk_nvme_cpl_get_status_string(&completion->status)); + fprintf(stderr, "Write I/O failed, aborting run\n"); + sequence->is_completed = 2; + exit(1); + } + /* + * The write I/O has completed. Free the buffer associated with + * the write I/O and allocate a new zeroed buffer for reading + * the data back from the NVMe namespace. + */ + if (sequence->using_cmb_io) { + spdk_nvme_ctrlr_unmap_cmb(ns_entry->ctrlr); + } else { + spdk_free(sequence->buf); + } + sequence->buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + + rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + read_complete, (void *)sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + exit(1); + } +} + +static void +hello_world(void) +{ + struct ns_entry *ns_entry; + struct hello_world_sequence sequence; + int rc; + size_t sz; + + ns_entry = g_namespaces; + while (ns_entry != NULL) { + /* + * Allocate an I/O qpair that we can use to submit read/write requests + * to namespaces on the controller. NVMe controllers typically support + * many qpairs per controller. Any I/O qpair allocated for a controller + * can submit I/O to any namespace on that controller. + * + * The SPDK NVMe driver provides no synchronization for qpair accesses - + * the application must ensure only a single thread submits I/O to a + * qpair, and that same thread must also check for completions on that + * qpair. This enables extremely efficient I/O processing by making all + * I/O operations completely lockless. + */ + ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, NULL, 0); + if (ns_entry->qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return; + } + + /* + * Use spdk_dma_zmalloc to allocate a 4KB zeroed buffer. This memory + * will be pinned, which is required for data buffers used for SPDK NVMe + * I/O operations. + */ + sequence.using_cmb_io = 1; + sequence.buf = spdk_nvme_ctrlr_map_cmb(ns_entry->ctrlr, &sz); + if (sequence.buf == NULL || sz < 0x1000) { + sequence.using_cmb_io = 0; + sequence.buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + } + if (sequence.buf == NULL) { + printf("ERROR: write buffer allocation failed\n"); + return; + } + if (sequence.using_cmb_io) { + printf("INFO: using controller memory buffer for IO\n"); + } else { + printf("INFO: using host memory buffer for IO\n"); + } + sequence.is_completed = 0; + sequence.ns_entry = ns_entry; + + /* + * Print "Hello world!" to sequence.buf. We will write this data to LBA + * 0 on the namespace, and then later read it back into a separate buffer + * to demonstrate the full I/O path. + */ + snprintf(sequence.buf, 0x1000, "%s", "Hello world!\n"); + + /* + * Write the data buffer to LBA 0 of this namespace. "write_complete" and + * "&sequence" are specified as the completion callback function and + * argument respectively. write_complete() will be called with the + * value of &sequence as a parameter when the write I/O is completed. + * This allows users to potentially specify different completion + * callback routines for each I/O, as well as pass a unique handle + * as an argument so the application knows which I/O has completed. + * + * Note that the SPDK NVMe driver will only check for completions + * when the application calls spdk_nvme_qpair_process_completions(). + * It is the responsibility of the application to trigger the polling + * process. + */ + rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + write_complete, &sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + exit(1); + } + + /* + * Poll for completions. 0 here means process all available completions. + * In certain usage models, the caller may specify a positive integer + * instead of 0 to signify the maximum number of completions it should + * process. This function will never block - if there are no + * completions pending on the specified qpair, it will return immediately. + * + * When the write I/O completes, write_complete() will submit a new I/O + * to read LBA 0 into a separate buffer, specifying read_complete() as its + * completion routine. When the read I/O completes, read_complete() will + * print the buffer contents and set sequence.is_completed = 1. That will + * break this loop and then exit the program. + */ + while (!sequence.is_completed) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + /* + * Free the I/O qpair. This typically is done when an application exits. + * But SPDK does support freeing and then reallocating qpairs during + * operation. It is the responsibility of the caller to ensure all + * pending I/O are completed before trying to free the qpair. + */ + spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair); + ns_entry = ns_entry->next; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + int nsid, num_ns; + struct ctrlr_entry *entry; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata; + + entry = malloc(sizeof(struct ctrlr_entry)); + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + printf("Attached to %s\n", trid->traddr); + + /* + * spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe + * controller. During initialization, the IDENTIFY data for the + * controller is read using an NVMe admin command, and that data + * can be retrieved using spdk_nvme_ctrlr_get_data() to get + * detailed information on the controller. Refer to the NVMe + * specification for more details on IDENTIFY for NVMe controllers. + */ + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + /* + * Each controller has one or more namespaces. An NVMe namespace is basically + * equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how + * many namespaces exist on the controller. For Intel(R) P3X00 controllers, + * it will just be one namespace. + * + * Note that in NVMe, namespace IDs start at 1, not 0. + */ + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + printf("Using controller %s with %d namespaces.\n", entry->name, num_ns); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } +} + +static void +cleanup(void) +{ + struct ns_entry *ns_entry = g_namespaces; + struct ctrlr_entry *ctrlr_entry = g_controllers; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + free(ns_entry); + ns_entry = next; + } + + while (ctrlr_entry) { + struct ctrlr_entry *next = ctrlr_entry->next; + + spdk_nvme_detach(ctrlr_entry->ctrlr); + free(ctrlr_entry); + ctrlr_entry = next; + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -V enumerate VMD\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "V")) != -1) { + switch (op) { + case 'V': + g_vmd = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "hello_world"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Initializing NVMe Controllers\n"); + + if (g_vmd && spdk_vmd_init()) { + fprintf(stderr, "Failed to initialize VMD." + " Some NVMe devices can be unavailable.\n"); + } + + /* + * Start the SPDK NVMe enumeration process. probe_cb will be called + * for each NVMe controller found, giving our application a choice on + * whether to attach to each controller. attach_cb will then be + * called for each controller after the SPDK NVMe driver has completed + * initializing the controller we chose to attach. + */ + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + cleanup(); + return 1; + } + + if (g_controllers == NULL) { + fprintf(stderr, "no NVMe controllers found\n"); + cleanup(); + return 1; + } + + printf("Initialization complete.\n"); + hello_world(); + cleanup(); + if (g_vmd) { + spdk_vmd_fini(); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/hotplug/.gitignore b/src/spdk/examples/nvme/hotplug/.gitignore new file mode 100644 index 000000000..e6ff53805 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/.gitignore @@ -0,0 +1 @@ +hotplug diff --git a/src/spdk/examples/nvme/hotplug/Makefile b/src/spdk/examples/nvme/hotplug/Makefile new file mode 100644 index 000000000..c77c61227 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = hotplug + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hotplug/hotplug.c b/src/spdk/examples/nvme/hotplug/hotplug.c new file mode 100644 index 000000000..ff821c7ed --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/hotplug.c @@ -0,0 +1,525 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/util.h" + +struct dev_ctx { + TAILQ_ENTRY(dev_ctx) tailq; + bool is_new; + bool is_removed; + bool is_draining; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + uint32_t io_size_blocks; + uint64_t size_in_ios; + uint64_t io_completed; + uint64_t prev_io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + char name[1024]; +}; + +struct perf_task { + struct dev_ctx *dev; + void *buf; +}; + +static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs); + +static uint64_t g_tsc_rate; + +static uint32_t g_io_size_bytes = 4096; +static int g_queue_depth = 4; +static int g_time_in_sec; +static int g_expected_insert_times = -1; +static int g_expected_removal_times = -1; +static int g_insert_times; +static int g_removal_times; +static int g_shm_id = -1; +static uint64_t g_timeout_in_us = SPDK_SEC_TO_USEC; + +static void +task_complete(struct perf_task *task); + +static void +timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, uint16_t cid); + +static void +register_dev(struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) { + perror("dev_ctx malloc"); + exit(1); + } + + snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + dev->ctrlr = ctrlr; + dev->is_new = true; + dev->is_removed = false; + dev->is_draining = false; + + spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout_in_us, timeout_cb, NULL); + + dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1); + + if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) { + fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name); + goto skip; + } + + if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) { + fprintf(stderr, "Controller %s: Invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + dev->name, + spdk_nvme_ns_get_size(dev->ns), + spdk_nvme_ns_get_sector_size(dev->ns), + g_io_size_bytes); + goto skip; + } + + dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes; + dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns); + + dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + if (!dev->qpair) { + fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + goto skip; + } + g_insert_times++; + TAILQ_INSERT_TAIL(&g_devs, dev, tailq); + return; + +skip: + free(dev); +} + +static void +unregister_dev(struct dev_ctx *dev) +{ + fprintf(stderr, "unregister_dev: %s\n", dev->name); + + spdk_nvme_ctrlr_free_io_qpair(dev->qpair); + spdk_nvme_detach(dev->ctrlr); + + TAILQ_REMOVE(&g_devs, dev, tailq); + free(dev); +} + +static struct perf_task * +alloc_task(struct dev_ctx *dev) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + return NULL; + } + + task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL); + if (task->buf == NULL) { + free(task); + return NULL; + } + + task->dev = dev; + + return task; +} + +static void +free_task(struct perf_task *task) +{ + spdk_dma_free(task->buf); + free(task); +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void +submit_single_io(struct perf_task *task) +{ + struct dev_ctx *dev = task->dev; + uint64_t offset_in_ios; + int rc; + + offset_in_ios = dev->offset_in_ios++; + if (dev->offset_in_ios == dev->size_in_ios) { + dev->offset_in_ios = 0; + } + + rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf, + offset_in_ios * dev->io_size_blocks, + dev->io_size_blocks, io_complete, task, 0); + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + free_task(task); + } else { + dev->current_queue_depth++; + } +} + +static void +task_complete(struct perf_task *task) +{ + struct dev_ctx *dev; + + dev = task->dev; + dev->current_queue_depth--; + dev->io_completed++; + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!dev->is_draining && !dev->is_removed) { + submit_single_io(task); + } else { + free_task(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct perf_task *)ctx); +} + +static void +check_io(struct dev_ctx *dev) +{ + spdk_nvme_qpair_process_completions(dev->qpair, 0); +} + +static void +submit_io(struct dev_ctx *dev, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = alloc_task(dev); + if (task == NULL) { + fprintf(stderr, "task allocation failed\n"); + exit(1); + } + + submit_single_io(task); + } +} + +static void +drain_io(struct dev_ctx *dev) +{ + dev->is_draining = true; + while (dev->current_queue_depth > 0) { + check_io(dev); + } +} + +static void +print_stats(void) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n", + dev->name, + dev->io_completed, + dev->io_completed - dev->prev_io_completed); + dev->prev_io_completed = dev->io_completed; + } + + fprintf(stderr, "\n"); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attached to %s\n", trid->traddr); + + register_dev(ctrlr); +} + +static void +remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->ctrlr == ctrlr) { + /* + * Mark the device as removed, but don't detach yet. + * + * The I/O handling code will detach once it sees that + * is_removed is true and all outstanding I/O have been completed. + */ + dev->is_removed = true; + fprintf(stderr, "Controller removed: %s\n", dev->name); + return; + } + } + + /* + * If we get here, this remove_cb is for a controller that we are not tracking + * in g_devs (for example, because we skipped it during register_dev), + * so immediately detach it. + */ + spdk_nvme_detach(ctrlr); +} + +static void +timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair, uint16_t cid) +{ + /* leave hotplug monitor loop, use the timeout_cb to monitor the hotplug */ + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + } +} + +static void +io_loop(void) +{ + struct dev_ctx *dev, *dev_tmp; + uint64_t tsc_end; + uint64_t next_stats_tsc; + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + next_stats_tsc = spdk_get_ticks(); + + while (1) { + uint64_t now; + + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->is_new) { + /* Submit initial I/O for this controller. */ + submit_io(dev, g_queue_depth); + dev->is_new = false; + } + + check_io(dev); + } + + /* + * Check for hotplug events. + */ + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + break; + } + + /* + * Check for devices which were hot-removed and have finished + * processing outstanding I/Os. + * + * unregister_dev() may remove devs from the list, so use the + * removal-safe iterator. + */ + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + if (dev->is_removed && dev->current_queue_depth == 0) { + g_removal_times++; + unregister_dev(dev); + } + } + + now = spdk_get_ticks(); + if (now > tsc_end) { + break; + } + if (now > next_stats_tsc) { + print_stats(); + next_stats_tsc += g_tsc_rate; + } + + if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) { + break; + } + } + + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + drain_io(dev); + unregister_dev(dev); + } +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-c timeout for each command in second(default:1s)]\n"); + printf("\t[-i shm id (optional)]\n"); + printf("\t[-n expected hot insert times]\n"); + printf("\t[-r expected hot removal times]\n"); + printf("\t[-t time in seconds]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + long int val; + + /* default value */ + g_time_in_sec = 0; + + while ((op = getopt(argc, argv, "c:i:n:r:t:")) != -1) { + if (op == '?') { + usage(argv[0]); + return 1; + } + + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'c': + g_timeout_in_us = val * SPDK_SEC_TO_USEC; + break; + case 'i': + g_shm_id = val; + break; + case 'n': + g_expected_insert_times = val; + break; + case 'r': + g_expected_removal_times = val; + break; + case 't': + g_time_in_sec = val; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + return 0; +} + + +static int +register_controllers(void) +{ + fprintf(stderr, "Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + /* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */ + g_insert_times = 0; + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "hotplug"; + opts.core_mask = "0x1"; + if (g_shm_id > -1) { + opts.shm_id = g_shm_id; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + /* Detect the controllers that are plugged in at startup. */ + if (register_controllers() != 0) { + return 1; + } + + fprintf(stderr, "Initialization complete. Starting I/O...\n"); + io_loop(); + + if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) { + fprintf(stderr, "Expected inserts %d != actual inserts %d\n", + g_expected_insert_times, g_insert_times); + return 1; + } + + if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) { + fprintf(stderr, "Expected removals %d != actual removals %d\n", + g_expected_removal_times, g_removal_times); + return 1; + } + + return 0; +} diff --git a/src/spdk/examples/nvme/identify/.gitignore b/src/spdk/examples/nvme/identify/.gitignore new file mode 100644 index 000000000..5c5444c1e --- /dev/null +++ b/src/spdk/examples/nvme/identify/.gitignore @@ -0,0 +1 @@ +identify diff --git a/src/spdk/examples/nvme/identify/Makefile b/src/spdk/examples/nvme/identify/Makefile new file mode 100644 index 000000000..ed7aa60a8 --- /dev/null +++ b/src/spdk/examples/nvme/identify/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = identify + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk + +install: $(APP) + $(INSTALL_EXAMPLE) + +uninstall: + $(UNINSTALL_EXAMPLE) diff --git a/src/spdk/examples/nvme/identify/identify.c b/src/spdk/examples/nvme/identify/identify.c new file mode 100644 index 000000000..722f8d3ee --- /dev/null +++ b/src/spdk/examples/nvme/identify/identify.c @@ -0,0 +1,1827 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/vmd.h" +#include "spdk/nvme_ocssd.h" +#include "spdk/env.h" +#include "spdk/nvme_intel.h" +#include "spdk/nvmf_spec.h" +#include "spdk/pci_ids.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/uuid.h" + +#define MAX_DISCOVERY_LOG_ENTRIES ((uint64_t)1000) + +#define NUM_CHUNK_INFO_ENTRIES 8 + +static int outstanding_commands; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct feature features[256] = {}; + +static struct spdk_nvme_error_information_entry error_page[256]; + +static struct spdk_nvme_health_information_page health_page; + +static struct spdk_nvme_firmware_page firmware_page; + +static struct spdk_nvme_cmds_and_effect_log_page cmd_effects_log_page; + +static struct spdk_nvme_intel_smart_information_page intel_smart_page; + +static struct spdk_nvme_intel_temperature_page intel_temperature_page; + +static struct spdk_nvme_intel_marketing_description_page intel_md_page; + +static struct spdk_nvmf_discovery_log_page *g_discovery_page; +static size_t g_discovery_page_size; +static uint64_t g_discovery_page_numrec; + +static struct spdk_ocssd_geometry_data geometry_data; + +static struct spdk_ocssd_chunk_information_entry g_ocssd_chunk_info_page[NUM_CHUNK_INFO_ENTRIES ]; + +static bool g_hex_dump = false; + +static int g_shm_id = -1; + +static int g_dpdk_mem = 0; + +static int g_master_core = 0; + +static char g_core_mask[16] = "0x1"; + +static struct spdk_nvme_transport_id g_trid; + +static int g_controllers_found = 0; + +static bool g_vmd = false; + +static void +hex_dump(const void *data, size_t size) +{ + size_t offset = 0, i; + const uint8_t *bytes = data; + + while (size) { + printf("%08zX:", offset); + + for (i = 0; i < 16; i++) { + if (i == 8) { + printf("-"); + } else { + printf(" "); + } + + if (i < size) { + printf("%02X", bytes[offset + i]); + } else { + printf(" "); + } + } + + printf(" "); + + for (i = 0; i < 16; i++) { + if (i < size) { + if (bytes[offset + i] > 0x20 && bytes[offset + i] < 0x7F) { + printf("%c", bytes[offset + i]); + } else { + printf("."); + } + } + } + + printf("\n"); + + offset += 16; + if (size > 16) { + size -= 16; + } else { + break; + } + } +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + outstanding_commands--; +} + +static void +get_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get log page failed\n"); + } + outstanding_commands--; +} + +static void +get_ocssd_geometry_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get ocssd geometry failed\n"); + } + outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + struct feature *feature = &features[fid]; + + feature->valid = false; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10_bits.get_features.fid = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, feature); +} + +static void +get_features(struct spdk_nvme_ctrlr *ctrlr) +{ + size_t i; + + uint8_t features_to_get[] = { + SPDK_NVME_FEAT_ARBITRATION, + SPDK_NVME_FEAT_POWER_MANAGEMENT, + SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD, + SPDK_NVME_FEAT_ERROR_RECOVERY, + SPDK_NVME_FEAT_NUMBER_OF_QUEUES, + SPDK_OCSSD_FEAT_MEDIA_FEEDBACK, + }; + + /* Submit several GET FEATURES commands and wait for them to complete */ + outstanding_commands = 0; + for (i = 0; i < SPDK_COUNTOF(features_to_get); i++) { + if (!spdk_nvme_ctrlr_is_ocssd_supported(ctrlr) && + features_to_get[i] == SPDK_OCSSD_FEAT_MEDIA_FEEDBACK) { + continue; + } + if (get_feature(ctrlr, features_to_get[i]) == 0) { + outstanding_commands++; + } else { + printf("get_feature(0x%02X) failed to submit command\n", features_to_get[i]); + } + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_error_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ERROR, + SPDK_NVME_GLOBAL_NS_TAG, error_page, + sizeof(*error_page) * (cdata->elpe + 1), + 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_health_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, &health_page, sizeof(health_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_firmware_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_FIRMWARE_SLOT, + SPDK_NVME_GLOBAL_NS_TAG, &firmware_page, sizeof(firmware_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_cmd_effects_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, + SPDK_NVME_GLOBAL_NS_TAG, &cmd_effects_log_page, sizeof(cmd_effects_log_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_smart_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_SMART, SPDK_NVME_GLOBAL_NS_TAG, + &intel_smart_page, sizeof(intel_smart_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_temperature_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE, + SPDK_NVME_GLOBAL_NS_TAG, &intel_temperature_page, sizeof(intel_temperature_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static int +get_intel_md_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION, + SPDK_NVME_GLOBAL_NS_TAG, &intel_md_page, sizeof(intel_md_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static void +get_discovery_log_page_header_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvmf_discovery_log_page *new_discovery_page; + struct spdk_nvme_ctrlr *ctrlr = cb_arg; + uint16_t recfmt; + uint64_t remaining; + uint64_t offset; + + outstanding_commands--; + if (spdk_nvme_cpl_is_error(cpl)) { + /* Return without printing anything - this may not be a discovery controller */ + free(g_discovery_page); + g_discovery_page = NULL; + return; + } + + /* Got the first 4K of the discovery log page */ + recfmt = from_le16(&g_discovery_page->recfmt); + if (recfmt != 0) { + printf("Unrecognized discovery log record format %" PRIu16 "\n", recfmt); + return; + } + + g_discovery_page_numrec = from_le64(&g_discovery_page->numrec); + + /* Pick an arbitrary limit to avoid ridiculously large buffer size. */ + if (g_discovery_page_numrec > MAX_DISCOVERY_LOG_ENTRIES) { + printf("Discovery log has %" PRIu64 " entries - limiting to %" PRIu64 ".\n", + g_discovery_page_numrec, MAX_DISCOVERY_LOG_ENTRIES); + g_discovery_page_numrec = MAX_DISCOVERY_LOG_ENTRIES; + } + + /* + * Now that we now how many entries should be in the log page, we can allocate + * the full log page buffer. + */ + g_discovery_page_size += g_discovery_page_numrec * sizeof(struct + spdk_nvmf_discovery_log_page_entry); + new_discovery_page = realloc(g_discovery_page, g_discovery_page_size); + if (new_discovery_page == NULL) { + free(g_discovery_page); + printf("Discovery page allocation failed!\n"); + return; + } + + g_discovery_page = new_discovery_page; + + /* Retrieve the rest of the discovery log page */ + offset = offsetof(struct spdk_nvmf_discovery_log_page, entries); + remaining = g_discovery_page_size - offset; + while (remaining) { + uint32_t size; + + /* Retrieve up to 4 KB at a time */ + size = spdk_min(remaining, 4096); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, (char *)g_discovery_page + offset, size, offset, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + offset += size; + remaining -= size; + outstanding_commands++; + } +} + +static int +get_discovery_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + /* Allocate the initial discovery log page buffer - this will be resized later. */ + g_discovery_page_size = sizeof(*g_discovery_page); + g_discovery_page = calloc(1, g_discovery_page_size); + if (g_discovery_page == NULL) { + printf("Discovery log page allocation failed!\n"); + exit(1); + } + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, g_discovery_page, g_discovery_page_size, 0, + get_discovery_log_page_header_completion, ctrlr)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static void +get_log_pages(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + outstanding_commands = 0; + bool is_discovery = spdk_nvme_ctrlr_is_discovery(ctrlr); + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!is_discovery) { + /* + * Only attempt to retrieve the following log pages + * when the NVM subsystem that's being targeted is + * NOT the Discovery Controller which only fields + * a Discovery Log Page. + */ + if (get_error_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Error Log Page failed\n"); + } + + if (get_health_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (SMART/health) failed\n"); + } + + if (get_firmware_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Firmware Slot Information) failed\n"); + } + } + + if (cdata->lpa.celp) { + if (get_cmd_effects_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Commands Supported and Effects) failed\n"); + } + } + + if (cdata->vid == SPDK_PCI_VID_INTEL) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + if (get_intel_smart_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel SMART/health) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + if (get_intel_temperature_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel temperature) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + if (get_intel_md_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel Marketing Description) failed\n"); + } + } + + } + + if (is_discovery && (get_discovery_log_page(ctrlr) == 0)) { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_ocssd_chunk_info_log_page(struct spdk_nvme_ns *ns) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, + nsid, &g_ocssd_chunk_info_page, sizeof(g_ocssd_chunk_info_page), 0, + get_log_page_completion, NULL) == 0) { + outstanding_commands++; + } else { + printf("get_ocssd_chunk_info_log_page() failed\n"); + return -1; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + return 0; +} + +static void +get_ocssd_geometry(struct spdk_nvme_ns *ns, struct spdk_ocssd_geometry_data *geometry_data) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ocssd_ctrlr_cmd_geometry(ctrlr, nsid, geometry_data, + sizeof(*geometry_data), get_ocssd_geometry_completion, NULL)) { + printf("Get OpenChannel SSD geometry failed\n"); + exit(1); + } else { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +print_hex_be(const void *v, size_t size) +{ + const uint8_t *buf = v; + + while (size--) { + printf("%02X", *buf++); + } +} + +static void +print_uint128_hex(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + printf("0x%llX%016llX", hi, lo); + } else { + printf("0x%llX", lo); + } +} + +static void +print_uint128_dec(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + /* can't handle large (>64-bit) decimal values for now, so fall back to hex */ + print_uint128_hex(v); + } else { + printf("%llu", (unsigned long long)lo); + } +} + +/* The len should be <= 8. */ +static void +print_uint_var_dec(uint8_t *array, unsigned int len) +{ + uint64_t result = 0; + int i = len; + + while (i > 0) { + result += (uint64_t)array[i - 1] << (8 * (i - 1)); + i--; + } + printf("%lu", result); +} + +/* Print ASCII string as defined by the NVMe spec */ +static void +print_ascii_string(const void *buf, size_t size) +{ + const uint8_t *str = buf; + + /* Trim trailing spaces */ + while (size > 0 && str[size - 1] == ' ') { + size--; + } + + while (size--) { + if (*str >= 0x20 && *str <= 0x7E) { + printf("%c", *str); + } else { + printf("."); + } + str++; + } +} + +static void +print_ocssd_chunk_info(struct spdk_ocssd_chunk_information_entry *chk_info, int chk_num) +{ + int i; + char *cs_str, *ct_str; + + printf("OCSSD Chunk Info Glance\n"); + printf("======================\n"); + + for (i = 0; i < chk_num; i++) { + cs_str = chk_info[i].cs.free ? "Free" : + chk_info[i].cs.closed ? "Closed" : + chk_info[i].cs.open ? "Open" : + chk_info[i].cs.offline ? "Offline" : "Unknown"; + ct_str = chk_info[i].ct.seq_write ? "Sequential Write" : + chk_info[i].ct.rnd_write ? "Random Write" : "Unknown"; + + printf("------------\n"); + printf("Chunk index: %d\n", i); + printf("Chunk state: %s(0x%x)\n", cs_str, *(uint8_t *) & (chk_info[i].cs)); + printf("Chunk type (write mode): %s\n", ct_str); + printf("Chunk type (size_deviate): %s\n", chk_info[i].ct.size_deviate ? "Yes" : "No"); + printf("Wear-level Index: %d\n", chk_info[i].wli); + printf("Starting LBA: %ld\n", chk_info[i].slba); + printf("Number of blocks in chunk: %ld\n", chk_info[i].cnlb); + printf("Write Pointer: %ld\n", chk_info[i].wp); + } +} + +static void +print_ocssd_geometry(struct spdk_ocssd_geometry_data *geometry_data) +{ + printf("Namespace OCSSD Geometry\n"); + printf("=======================\n"); + + if (geometry_data->mjr < 2) { + printf("Open-Channel Spec version is less than 2.0\n"); + printf("OC version: maj:%d\n", geometry_data->mjr); + return; + } + + printf("OC version: maj:%d min:%d\n", geometry_data->mjr, geometry_data->mnr); + printf("LBA format:\n"); + printf(" Group bits: %d\n", geometry_data->lbaf.grp_len); + printf(" PU bits: %d\n", geometry_data->lbaf.pu_len); + printf(" Chunk bits: %d\n", geometry_data->lbaf.chk_len); + printf(" Logical block bits: %d\n", geometry_data->lbaf.lbk_len); + + printf("Media and Controller Capabilities:\n"); + printf(" Namespace supports Vector Chunk Copy: %s\n", + geometry_data->mccap.vec_chk_cpy ? "Supported" : "Not Supported"); + printf(" Namespace supports multiple resets a free chunk: %s\n", + geometry_data->mccap.multi_reset ? "Supported" : "Not Supported"); + + printf("Wear-level Index Delta Threshold: %d\n", geometry_data->wit); + printf("Groups (channels): %d\n", geometry_data->num_grp); + printf("PUs (LUNs) per group: %d\n", geometry_data->num_pu); + printf("Chunks per LUN: %d\n", geometry_data->num_chk); + printf("Logical blks per chunk: %d\n", geometry_data->clba); + printf("MIN write size: %d\n", geometry_data->ws_min); + printf("OPT write size: %d\n", geometry_data->ws_opt); + printf("Cache min write size: %d\n", geometry_data->mw_cunits); + printf("Max open chunks: %d\n", geometry_data->maxoc); + printf("Max open chunks per PU: %d\n", geometry_data->maxocpu); + printf("\n"); +} + +static void +print_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + const struct spdk_uuid *uuid; + uint32_t i; + uint32_t flags; + char uuid_str[SPDK_UUID_STRING_LEN]; + uint32_t blocksize; + + nsdata = spdk_nvme_ns_get_data(ns); + flags = spdk_nvme_ns_get_flags(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + if (g_hex_dump) { + hex_dump(nsdata, sizeof(*nsdata)); + printf("\n"); + } + + /* This function is only called for active namespaces. */ + assert(spdk_nvme_ns_is_active(ns)); + + printf("Deallocate: %s\n", + (flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Deallocated/Unwritten Error: %s\n", + nsdata->nsfeat.dealloc_or_unwritten_error ? "Supported" : "Not Supported"); + printf("Deallocated Read Value: %s\n", + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_00 ? "All 0x00" : + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_FF ? "All 0xFF" : + "Unknown"); + printf("Deallocate in Write Zeroes: %s\n", + nsdata->dlfeat.bits.write_zero_deallocate ? "Supported" : "Not Supported"); + printf("Deallocated Guard Field: %s\n", + nsdata->dlfeat.bits.guard_value ? "CRC for Read Value" : "0xFFFF"); + printf("Flush: %s\n", + (flags & SPDK_NVME_NS_FLUSH_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Reservation: %s\n", + (flags & SPDK_NVME_NS_RESERVATION_SUPPORTED) ? "Supported" : "Not Supported"); + if (flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + printf("End-to-End Data Protection: Supported\n"); + printf("Protection Type: Type%d\n", nsdata->dps.pit); + printf("Protection Information Transferred as: %s\n", + nsdata->dps.md_start ? "First 8 Bytes" : "Last 8 Bytes"); + } + if (nsdata->lbaf[nsdata->flbas.format].ms > 0) { + printf("Metadata Transferred as: %s\n", + nsdata->flbas.extended ? "Extended Data LBA" : "Separate Metadata Buffer"); + } + printf("Namespace Sharing Capabilities: %s\n", + nsdata->nmic.can_share ? "Multiple Controllers" : "Private"); + blocksize = 1 << nsdata->lbaf[nsdata->flbas.format].lbads; + printf("Size (in LBAs): %lld (%lldGiB)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze * blocksize / 1024 / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldGiB)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap * blocksize / 1024 / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldGiB)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse * blocksize / 1024 / 1024 / 1024); + if (nsdata->noiob) { + printf("Optimal I/O Boundary: %u blocks\n", nsdata->noiob); + } + if (!spdk_mem_all_zero(nsdata->nguid, sizeof(nsdata->nguid))) { + printf("NGUID: "); + print_hex_be(nsdata->nguid, sizeof(nsdata->nguid)); + printf("\n"); + } + if (!spdk_mem_all_zero(&nsdata->eui64, sizeof(nsdata->eui64))) { + printf("EUI64: "); + print_hex_be(&nsdata->eui64, sizeof(nsdata->eui64)); + printf("\n"); + } + uuid = spdk_nvme_ns_get_uuid(ns); + if (uuid) { + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), uuid); + printf("UUID: %s\n", uuid_str); + } + printf("Thin Provisioning: %s\n", + nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + printf("Per-NS Atomic Units: %s\n", + nsdata->nsfeat.ns_atomic_write_unit ? "Yes" : "No"); + if (nsdata->nsfeat.ns_atomic_write_unit) { + if (nsdata->nawun) { + printf(" Atomic Write Unit (Normal): %d\n", nsdata->nawun + 1); + } + + if (nsdata->nawupf) { + printf(" Atomic Write Unit (PFail): %d\n", nsdata->nawupf + 1); + } + + if (nsdata->nacwu) { + printf(" Atomic Compare & Write Unit: %d\n", nsdata->nacwu + 1); + } + + printf(" Atomic Boundary Size (Normal): %d\n", nsdata->nabsn); + printf(" Atomic Boundary Size (PFail): %d\n", nsdata->nabspf); + printf(" Atomic Boundary Offset: %d\n", nsdata->nabo); + } + + printf("NGUID/EUI64 Never Reused: %s\n", + nsdata->nsfeat.guid_never_reused ? "Yes" : "No"); + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("\n"); + + if (spdk_nvme_ctrlr_is_ocssd_supported(spdk_nvme_ns_get_ctrlr(ns))) { + get_ocssd_geometry(ns, &geometry_data); + print_ocssd_geometry(&geometry_data); + get_ocssd_chunk_info_log_page(ns); + print_ocssd_chunk_info(g_ocssd_chunk_info_page, NUM_CHUNK_INFO_ENTRIES); + } +} + +static const char * +admin_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_DELETE_IO_SQ: + return "Delete I/O Submission Queue"; + case SPDK_NVME_OPC_CREATE_IO_SQ: + return "Create I/O Submission Queue"; + case SPDK_NVME_OPC_GET_LOG_PAGE: + return "Get Log Page"; + case SPDK_NVME_OPC_DELETE_IO_CQ: + return "Delete I/O Completion Queue"; + case SPDK_NVME_OPC_CREATE_IO_CQ: + return "Create I/O Completion Queue"; + case SPDK_NVME_OPC_IDENTIFY: + return "Identify"; + case SPDK_NVME_OPC_ABORT: + return "Abort"; + case SPDK_NVME_OPC_SET_FEATURES: + return "Set Features"; + case SPDK_NVME_OPC_GET_FEATURES: + return "Get Features"; + case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST: + return "Asynchronous Event Request"; + case SPDK_NVME_OPC_NS_MANAGEMENT: + return "Namespace Management"; + case SPDK_NVME_OPC_FIRMWARE_COMMIT: + return "Firmware Commit"; + case SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD: + return "Firmware Image Download"; + case SPDK_NVME_OPC_DEVICE_SELF_TEST: + return "Device Self-test"; + case SPDK_NVME_OPC_NS_ATTACHMENT: + return "Namespace Attachment"; + case SPDK_NVME_OPC_KEEP_ALIVE: + return "Keep Alive"; + case SPDK_NVME_OPC_DIRECTIVE_SEND: + return "Directive Send"; + case SPDK_NVME_OPC_DIRECTIVE_RECEIVE: + return "Directive Receive"; + case SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT: + return "Virtualization Management"; + case SPDK_NVME_OPC_NVME_MI_SEND: + return "NVMe-MI Send"; + case SPDK_NVME_OPC_NVME_MI_RECEIVE: + return "NVMe-MI Receive"; + case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG: + return "Doorbell Buffer Config"; + case SPDK_NVME_OPC_FORMAT_NVM: + return "Format NVM"; + case SPDK_NVME_OPC_SECURITY_SEND: + return "Security Send"; + case SPDK_NVME_OPC_SECURITY_RECEIVE: + return "Security Receive"; + case SPDK_NVME_OPC_SANITIZE: + return "Sanitize"; + default: + if (opc >= 0xC0) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static const char * +io_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_FLUSH: + return "Flush"; + case SPDK_NVME_OPC_WRITE: + return "Write"; + case SPDK_NVME_OPC_READ: + return "Read"; + case SPDK_NVME_OPC_WRITE_UNCORRECTABLE: + return "Write Uncorrectable"; + case SPDK_NVME_OPC_COMPARE: + return "Compare"; + case SPDK_NVME_OPC_WRITE_ZEROES: + return "Write Zeroes"; + case SPDK_NVME_OPC_DATASET_MANAGEMENT: + return "Dataset Management"; + case SPDK_NVME_OPC_RESERVATION_REGISTER: + return "Reservation Register"; + case SPDK_NVME_OPC_RESERVATION_REPORT: + return "Reservation Report"; + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + return "Reservation Acquire"; + case SPDK_NVME_OPC_RESERVATION_RELEASE: + return "Reservation Release"; + default: + if (opc >= 0x80) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static void +print_controller(struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_transport_id *trid) +{ + const struct spdk_nvme_ctrlr_data *cdata; + union spdk_nvme_cap_register cap; + union spdk_nvme_vs_register vs; + union spdk_nvme_cmbsz_register cmbsz; + uint8_t str[512]; + uint32_t i; + struct spdk_nvme_error_information_entry *error_entry; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + uint32_t nsid; + + cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); + vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr); + cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr); + + if (!spdk_nvme_ctrlr_is_discovery(ctrlr)) { + /* + * Discovery Controller only supports the + * IDENTIFY and GET_LOG_PAGE cmd set, so only + * attempt GET_FEATURES when NOT targeting a + * Discovery Controller. + */ + get_features(ctrlr); + } + get_log_pages(ctrlr); + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr) != 0) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("NVMe Controller at %04x:%02x:%02x.%x [%04x:%04x]\n", + pci_addr.domain, pci_addr.bus, + pci_addr.dev, pci_addr.func, + pci_id.vendor_id, pci_id.device_id); + } + printf("=====================================================\n"); + + if (g_hex_dump) { + hex_dump(cdata, sizeof(*cdata)); + printf("\n"); + } + + printf("Controller Capabilities/Features\n"); + printf("================================\n"); + printf("Vendor ID: %04x\n", cdata->vid); + printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); + printf("Serial Number: "); + print_ascii_string(cdata->sn, sizeof(cdata->sn)); + printf("\n"); + printf("Model Number: "); + print_ascii_string(cdata->mn, sizeof(cdata->mn)); + printf("\n"); + printf("Firmware Version: "); + print_ascii_string(cdata->fr, sizeof(cdata->fr)); + printf("\n"); + printf("Recommended Arb Burst: %d\n", cdata->rab); + printf("IEEE OUI Identifier: %02x %02x %02x\n", + cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); + printf("Multi-path I/O\n"); + printf(" May have multiple subsystem ports: %s\n", cdata->cmic.multi_port ? "Yes" : "No"); + printf(" May be connected to multiple hosts: %s\n", cdata->cmic.multi_host ? "Yes" : "No"); + printf(" Associated with SR-IOV VF: %s\n", cdata->cmic.sr_iov ? "Yes" : "No"); + printf("Max Data Transfer Size: "); + if (cdata->mdts == 0) { + printf("Unlimited\n"); + } else { + printf("%" PRIu64 "\n", (uint64_t)1 << (12 + cap.bits.mpsmin + cdata->mdts)); + } + printf("Max Number of Namespaces: %d\n", cdata->nn); + if (features[SPDK_NVME_FEAT_ERROR_RECOVERY].valid) { + unsigned tler = features[SPDK_NVME_FEAT_ERROR_RECOVERY].result & 0xFFFF; + printf("Error Recovery Timeout: "); + if (tler == 0) { + printf("Unlimited\n"); + } else { + printf("%u milliseconds\n", tler * 100); + } + } + printf("NVMe Specification Version (VS): %u.%u", vs.bits.mjr, vs.bits.mnr); + if (vs.bits.ter) { + printf(".%u", vs.bits.ter); + } + printf("\n"); + if (cdata->ver.raw != 0) { + printf("NVMe Specification Version (Identify): %u.%u", cdata->ver.bits.mjr, cdata->ver.bits.mnr); + if (cdata->ver.bits.ter) { + printf(".%u", cdata->ver.bits.ter); + } + printf("\n"); + } + + printf("Maximum Queue Entries: %u\n", cap.bits.mqes + 1); + printf("Contiguous Queues Required: %s\n", cap.bits.cqr ? "Yes" : "No"); + printf("Arbitration Mechanisms Supported\n"); + printf(" Weighted Round Robin: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_WRR ? "Supported" : "Not Supported"); + printf(" Vendor Specific: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_VS ? "Supported" : "Not Supported"); + printf("Reset Timeout: %" PRIu64 " ms\n", (uint64_t)500 * cap.bits.to); + printf("Doorbell Stride: %" PRIu64 " bytes\n", + (uint64_t)1 << (2 + cap.bits.dstrd)); + printf("NVM Subsystem Reset: %s\n", + cap.bits.nssrs ? "Supported" : "Not Supported"); + printf("Command Sets Supported\n"); + printf(" NVM Command Set: %s\n", + cap.bits.css & SPDK_NVME_CAP_CSS_NVM ? "Supported" : "Not Supported"); + printf("Boot Partition: %s\n", + cap.bits.bps ? "Supported" : "Not Supported"); + printf("Memory Page Size Minimum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmin)); + printf("Memory Page Size Maximum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmax)); + printf("Optional Asynchronous Events Supported\n"); + printf(" Namespace Attribute Notices: %s\n", + cdata->oaes.ns_attribute_notices ? "Supported" : "Not Supported"); + printf(" Firmware Activation Notices: %s\n", + cdata->oaes.fw_activation_notices ? "Supported" : "Not Supported"); + + printf("128-bit Host Identifier: %s\n", + cdata->ctratt.host_id_exhid_supported ? "Supported" : "Not Supported"); + printf("\n"); + + printf("Controller Memory Buffer Support\n"); + printf("================================\n"); + if (cmbsz.raw != 0) { + uint64_t size = cmbsz.bits.sz; + + /* Convert the size to bytes by multiplying by the granularity. + By spec, szu is at most 6 and sz is 20 bits, so size requires + at most 56 bits. */ + size *= (0x1000 << (cmbsz.bits.szu * 4)); + + printf("Supported: Yes\n"); + printf("Total Size: %lu bytes\n", size); + printf("Submission Queues in CMB: %s\n", + cmbsz.bits.sqs ? "Supported" : "Not Supported"); + printf("Completion Queues in CMB: %s\n", + cmbsz.bits.cqs ? "Supported" : "Not Supported"); + printf("Read data and metadata in CMB %s\n", + cmbsz.bits.rds ? "Supported" : "Not Supported"); + printf("Write data and metadata in CMB: %s\n", + cmbsz.bits.wds ? "Supported" : "Not Supported"); + } else { + printf("Supported: No\n"); + } + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Security Send/Receive: %s\n", + cdata->oacs.security ? "Supported" : "Not Supported"); + printf("Format NVM: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("Firmware Activate/Download: %s\n", + cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Namespace Management: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Device Self-Test: %s\n", + cdata->oacs.device_self_test ? "Supported" : "Not Supported"); + printf("Directives: %s\n", + cdata->oacs.directives ? "Supported" : "Not Supported"); + printf("NVMe-MI: %s\n", + cdata->oacs.nvme_mi ? "Supported" : "Not Supported"); + printf("Virtualization Management: %s\n", + cdata->oacs.virtualization_management ? "Supported" : "Not Supported"); + printf("Doorbell Buffer Config: %s\n", + cdata->oacs.doorbell_buffer_config ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl + 1); + printf("Async Event Request Limit: %d\n", cdata->aerl + 1); + printf("Number of Firmware Slots: "); + if (cdata->oacs.firmware != 0) { + printf("%d\n", cdata->frmw.num_slots); + } else { + printf("N/A\n"); + } + printf("Firmware Slot 1 Read-Only: "); + if (cdata->oacs.firmware != 0) { + printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + } else { + printf("N/A\n"); + } + if (cdata->fwug == 0x00) { + printf("Firmware Update Granularity: No Information Provided\n"); + } else if (cdata->fwug == 0xFF) { + printf("Firmware Update Granularity: No Restriction\n"); + } else { + printf("Firmware Update Granularity: %u KiB\n", + cdata->fwug * 4); + } + printf("Per-Namespace SMART Log: %s\n", + cdata->lpa.ns_smart ? "Yes" : "No"); + printf("Command Effects Log Page: %s\n", + cdata->lpa.celp ? "Supported" : "Not Supported"); + printf("Get Log Page Extended Data: %s\n", + cdata->lpa.edlp ? "Supported" : "Not Supported"); + printf("Telemetry Log Pages: %s\n", + cdata->lpa.telemetry ? "Supported" : "Not Supported"); + printf("Error Log Page Entries Supported: %d\n", cdata->elpe + 1); + if (cdata->kas == 0) { + printf("Keep Alive: Not Supported\n"); + } else { + printf("Keep Alive: Supported\n"); + printf("Keep Alive Granularity: %u ms\n", + cdata->kas * 100); + } + printf("\n"); + + printf("NVM Command Set Attributes\n"); + printf("==========================\n"); + printf("Submission Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->sqes.max); + printf(" Min: %d\n", 1 << cdata->sqes.min); + printf("Completion Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->cqes.max); + printf(" Min: %d\n", 1 << cdata->cqes.min); + printf("Number of Namespaces: %d\n", cdata->nn); + printf("Compare Command: %s\n", + cdata->oncs.compare ? "Supported" : "Not Supported"); + printf("Write Uncorrectable Command: %s\n", + cdata->oncs.write_unc ? "Supported" : "Not Supported"); + printf("Dataset Management Command: %s\n", + cdata->oncs.dsm ? "Supported" : "Not Supported"); + printf("Write Zeroes Command: %s\n", + cdata->oncs.write_zeroes ? "Supported" : "Not Supported"); + printf("Set Features Save Field: %s\n", + cdata->oncs.set_features_save ? "Supported" : "Not Supported"); + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + printf("Timestamp: %s\n", + cdata->oncs.timestamp ? "Supported" : "Not Supported"); + printf("Volatile Write Cache: %s\n", + cdata->vwc.present ? "Present" : "Not Present"); + printf("Atomic Write Unit (Normal): %d\n", cdata->awun + 1); + printf("Atomic Write Unit (PFail): %d\n", cdata->awupf + 1); + printf("Atomic Compare & Write Unit: %d\n", cdata->acwu + 1); + printf("Fused Compare & Write: %s\n", + cdata->fuses.compare_and_write ? "Supported" : "Not Supported"); + printf("Scatter-Gather List\n"); + printf(" SGL Command Set: %s\n", + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED ? "Supported" : + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED ? "Supported (Dword aligned)" : + "Not Supported"); + printf(" SGL Keyed: %s\n", + cdata->sgls.keyed_sgl ? "Supported" : "Not Supported"); + printf(" SGL Bit Bucket Descriptor: %s\n", + cdata->sgls.bit_bucket_descriptor ? "Supported" : "Not Supported"); + printf(" SGL Metadata Pointer: %s\n", + cdata->sgls.metadata_pointer ? "Supported" : "Not Supported"); + printf(" Oversized SGL: %s\n", + cdata->sgls.oversized_sgl ? "Supported" : "Not Supported"); + printf(" SGL Metadata Address: %s\n", + cdata->sgls.metadata_address ? "Supported" : "Not Supported"); + printf(" SGL Offset: %s\n", + cdata->sgls.sgl_offset ? "Supported" : "Not Supported"); + printf(" Transport SGL Data Block: %s\n", + cdata->sgls.transport_sgl ? "Supported" : "Not Supported"); + printf("Replay Protected Memory Block:"); + if (cdata->rpmbs.num_rpmb_units > 0) { + printf(" Supported\n"); + printf(" Number of RPMB Units: %d\n", cdata->rpmbs.num_rpmb_units); + printf(" Authentication Method: %s\n", cdata->rpmbs.auth_method == 0 ? "HMAC SHA-256" : "Unknown"); + printf(" Total Size (in 128KB units) = %d\n", cdata->rpmbs.total_size + 1); + printf(" Access Size (in 512B units) = %d\n", cdata->rpmbs.access_size + 1); + } else { + printf(" Not Supported\n"); + } + printf("\n"); + + printf("Firmware Slot Information\n"); + printf("=========================\n"); + if (g_hex_dump) { + hex_dump(&firmware_page, sizeof(firmware_page)); + printf("\n"); + } + printf("Active slot: %u\n", firmware_page.afi.active_slot); + if (firmware_page.afi.next_reset_slot) { + printf("Next controller reset slot: %u\n", firmware_page.afi.next_reset_slot); + } + for (i = 0; i < 7; i++) { + if (!spdk_mem_all_zero(firmware_page.revision[i], sizeof(firmware_page.revision[i]))) { + printf("Slot %u Firmware Revision: ", i + 1); + print_ascii_string(firmware_page.revision[i], sizeof(firmware_page.revision[i])); + printf("\n"); + } + } + printf("\n"); + + if (cdata->lpa.celp) { + printf("Commands Supported and Effects\n"); + printf("==============================\n"); + + if (g_hex_dump) { + hex_dump(&cmd_effects_log_page, sizeof(cmd_effects_log_page)); + printf("\n"); + } + + printf("Admin Commands\n"); + printf("--------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.admin_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.admin_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + admin_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + + printf("I/O Commands\n"); + printf("------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.io_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.io_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + io_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + printf("\n"); + } + + printf("Error Log\n"); + printf("=========\n"); + for (i = 0; i <= cdata->elpe; i++) { + error_entry = &error_page[i]; + if (error_entry->error_count == 0) { + continue; + } + if (i != 0) { + printf("-----------\n"); + } + + printf("Entry: %u\n", i); + printf("Error Count: 0x%"PRIx64"\n", error_entry->error_count); + printf("Submission Queue Id: 0x%x\n", error_entry->sqid); + printf("Command Id: 0x%x\n", error_entry->cid); + printf("Phase Bit: %x\n", error_entry->status.p); + printf("Status Code: 0x%x\n", error_entry->status.sc); + printf("Status Code Type: 0x%x\n", error_entry->status.sct); + printf("Do Not Retry: %x\n", error_entry->status.dnr); + printf("Error Location: 0x%x\n", error_entry->error_location); + printf("LBA: 0x%"PRIx64"\n", error_entry->lba); + printf("Namespace: 0x%x\n", error_entry->nsid); + printf("Vendor Log Page: 0x%x\n", error_entry->vendor_specific); + + } + printf("\n"); + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; + unsigned ab, lpw, mpw, hpw; + + ab = arb & 0x7; + lpw = ((arb >> 8) & 0xFF) + 1; + mpw = ((arb >> 16) & 0xFF) + 1; + hpw = ((arb >> 24) & 0xFF) + 1; + + printf("Arbitration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (ab == 0x7) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << ab); + } + + if (cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) { + printf("Low Priority Weight: %u\n", lpw); + printf("Medium Priority Weight: %u\n", mpw); + printf("High Priority Weight: %u\n", hpw); + } + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_POWER_MANAGEMENT].valid) { + unsigned ps = features[SPDK_NVME_FEAT_POWER_MANAGEMENT].result & 0x1F; + printf("Power Management\n"); + printf("================\n"); + printf("Number of Power States: %u\n", cdata->npss + 1); + printf("Current Power State: Power State #%u\n", ps); + for (i = 0; i <= cdata->npss; i++) { + const struct spdk_nvme_power_state *psd = &cdata->psd[i]; + printf("Power State #%u: ", i); + if (psd->mps) { + /* MP scale is 0.0001 W */ + printf("Max Power: %u.%04u W\n", + psd->mp / 10000, + psd->mp % 10000); + } else { + /* MP scale is 0.01 W */ + printf("Max Power: %3u.%02u W\n", + psd->mp / 100, + psd->mp % 100); + } + /* TODO: print other power state descriptor fields */ + } + printf("Non-Operational Permissive Mode: %s\n", + cdata->ctratt.non_operational_power_state_permissive_mode ? "Supported" : "Not Supported"); + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].valid) { + printf("Health Information\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(&health_page, sizeof(health_page)); + printf("\n"); + } + + printf("Critical Warnings:\n"); + printf(" Available Spare Space: %s\n", + health_page.critical_warning.bits.available_spare ? "WARNING" : "OK"); + printf(" Temperature: %s\n", + health_page.critical_warning.bits.temperature ? "WARNING" : "OK"); + printf(" Device Reliability: %s\n", + health_page.critical_warning.bits.device_reliability ? "WARNING" : "OK"); + printf(" Read Only: %s\n", + health_page.critical_warning.bits.read_only ? "Yes" : "No"); + printf(" Volatile Memory Backup: %s\n", + health_page.critical_warning.bits.volatile_memory_backup ? "WARNING" : "OK"); + printf("Current Temperature: %u Kelvin (%d Celsius)\n", + health_page.temperature, + (int)health_page.temperature - 273); + printf("Temperature Threshold: %u Kelvin (%d Celsius)\n", + features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result, + (int)features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result - 273); + printf("Available Spare: %u%%\n", health_page.available_spare); + printf("Available Spare Threshold: %u%%\n", health_page.available_spare_threshold); + printf("Life Percentage Used: %u%%\n", health_page.percentage_used); + printf("Data Units Read: "); + print_uint128_dec(health_page.data_units_read); + printf("\n"); + printf("Data Units Written: "); + print_uint128_dec(health_page.data_units_written); + printf("\n"); + printf("Host Read Commands: "); + print_uint128_dec(health_page.host_read_commands); + printf("\n"); + printf("Host Write Commands: "); + print_uint128_dec(health_page.host_write_commands); + printf("\n"); + printf("Controller Busy Time: "); + print_uint128_dec(health_page.controller_busy_time); + printf(" minutes\n"); + printf("Power Cycles: "); + print_uint128_dec(health_page.power_cycles); + printf("\n"); + printf("Power On Hours: "); + print_uint128_dec(health_page.power_on_hours); + printf(" hours\n"); + printf("Unsafe Shutdowns: "); + print_uint128_dec(health_page.unsafe_shutdowns); + printf("\n"); + printf("Unrecoverable Media Errors: "); + print_uint128_dec(health_page.media_errors); + printf("\n"); + printf("Lifetime Error Log Entries: "); + print_uint128_dec(health_page.num_error_info_log_entries); + printf("\n"); + printf("Warning Temperature Time: %u minutes\n", health_page.warning_temp_time); + printf("Critical Temperature Time: %u minutes\n", health_page.critical_temp_time); + for (i = 0; i < 8; i++) { + if (health_page.temp_sensor[i] != 0) { + printf("Temperature Sensor %d: %u Kelvin (%d Celsius)\n", + i + 1, health_page.temp_sensor[i], + (int)health_page.temp_sensor[i] - 273); + } + } + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].valid) { + uint32_t result = features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].result; + + printf("Number of Queues\n"); + printf("================\n"); + printf("Number of I/O Submission Queues: %u\n", (result & 0xFFFF) + 1); + printf("Number of I/O Completion Queues: %u\n", (result & 0xFFFF0000 >> 16) + 1); + printf("\n"); + } + + if (features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].valid) { + uint32_t result = features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].result; + + printf("OCSSD Media Feedback\n"); + printf("=======================\n"); + printf("High ECC status: %u\n", (result & 0x1)); + printf("Vector High ECC status: %u\n", (result & 0x2 >> 1)); + printf("\n"); + } + + if (cdata->hctma.bits.supported) { + printf("Host Controlled Thermal Management\n"); + printf("==================================\n"); + printf("Minimum Thermal Management Temperature: "); + if (cdata->mntmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mntmt, (int)cdata->mntmt - 273); + } else { + printf("Not Reported\n"); + } + printf("Maximum Thermal Managment Temperature: "); + if (cdata->mxtmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mxtmt, (int)cdata->mxtmt - 273); + } else { + printf("Not Reported\n"); + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + size_t i = 0; + + printf("Intel Health Information\n"); + printf("==================\n"); + for (i = 0; + i < SPDK_COUNTOF(intel_smart_page.attributes); i++) { + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PROGRAM_FAIL_COUNT) { + printf("Program Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_ERASE_FAIL_COUNT) { + printf("Erase Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_WEAR_LEVELING_COUNT) { + printf("Wear Leveling Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Min: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[0], 2); + printf("\n"); + printf(" Max: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[2], 2); + printf("\n"); + printf(" Avg: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[4], 2); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_E2E_ERROR_COUNT) { + printf("End to End Error Detection Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_CRC_ERROR_COUNT) { + printf("CRC Error Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_MEDIA_WEAR) { + printf("Timed Workload, Media Wear:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_READ_PERCENTAGE) { + printf("Timed Workload, Host Read/Write Ratio:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("%%"); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_TIMER) { + printf("Timed Workload, Timer:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS) { + printf("Thermal Throttle Status:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Percentage: %d%%\n", intel_smart_page.attributes[i].raw_value[0]); + printf(" Throttling Event Count: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[1], 4); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER) { + printf("Retry Buffer Overflow Counter:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT) { + printf("PLL Lock Loss Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_NAND_BYTES_WRITTEN) { + printf("NAND Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_BYTES_WRITTEN) { + printf("Host Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + printf("Intel Temperature Information\n"); + printf("==================\n"); + printf("Current Temperature: %lu\n", intel_temperature_page.current_temperature); + printf("Overtemp shutdown Flag for last critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_last); + printf("Overtemp shutdown Flag for life critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_life); + printf("Highest temperature: %lu\n", intel_temperature_page.highest_temperature); + printf("Lowest temperature: %lu\n", intel_temperature_page.lowest_temperature); + printf("Specified Maximum Operating Temperature: %lu\n", + intel_temperature_page.specified_max_op_temperature); + printf("Specified Minimum Operating Temperature: %lu\n", + intel_temperature_page.specified_min_op_temperature); + printf("Estimated offset: %ld\n", intel_temperature_page.estimated_offset); + printf("\n"); + printf("\n"); + + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + printf("Intel Marketing Information\n"); + printf("==================\n"); + snprintf(str, sizeof(intel_md_page.marketing_product), "%s", intel_md_page.marketing_product); + printf("Marketing Product Information: %s\n", str); + printf("\n"); + printf("\n"); + } + + printf("Active Namespaces\n"); + printf("=================\n"); + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + print_namespace(spdk_nvme_ctrlr_get_ns(ctrlr, nsid)); + } + + if (g_discovery_page) { + printf("Discovery Log Page\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(g_discovery_page, g_discovery_page_size); + printf("\n"); + } + + printf("Generation Counter: %" PRIu64 "\n", + from_le64(&g_discovery_page->genctr)); + printf("Number of Records: %" PRIu64 "\n", + from_le64(&g_discovery_page->numrec)); + printf("Record Format: %" PRIu16 "\n", + from_le16(&g_discovery_page->recfmt)); + printf("\n"); + + for (i = 0; i < g_discovery_page_numrec; i++) { + struct spdk_nvmf_discovery_log_page_entry *entry = &g_discovery_page->entries[i]; + + printf("Discovery Log Entry %u\n", i); + printf("----------------------\n"); + printf("Transport Type: %u (%s)\n", + entry->trtype, spdk_nvme_transport_id_trtype_str(entry->trtype)); + printf("Address Family: %u (%s)\n", + entry->adrfam, spdk_nvme_transport_id_adrfam_str(entry->adrfam)); + printf("Subsystem Type: %u (%s)\n", + entry->subtype, + entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY ? "Discovery Service" : + entry->subtype == SPDK_NVMF_SUBTYPE_NVME ? "NVM Subsystem" : + "Unknown"); + printf("Transport Requirements:\n"); + printf(" Secure Channel: %s\n", + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED ? "Not Specified" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED ? "Required" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED ? "Not Required" : + "Reserved"); + printf("Port ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->portid), from_le16(&entry->portid)); + printf("Controller ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->cntlid), from_le16(&entry->cntlid)); + printf("Admin Max SQ Size: %" PRIu16 "\n", + from_le16(&entry->asqsz)); + snprintf(str, sizeof(entry->trsvcid) + 1, "%s", entry->trsvcid); + printf("Transport Service Identifier: %s\n", str); + snprintf(str, sizeof(entry->subnqn) + 1, "%s", entry->subnqn); + printf("NVM Subsystem Qualified Name: %s\n", str); + snprintf(str, sizeof(entry->traddr) + 1, "%s", entry->traddr); + printf("Transport Address: %s\n", str); + + if (entry->trtype == SPDK_NVMF_TRTYPE_RDMA) { + printf("Transport Specific Address Subtype - RDMA\n"); + printf(" RDMA QP Service Type: %u (%s)\n", + entry->tsas.rdma.rdma_qptype, + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED ? "Reliable Connected" : + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM ? "Reliable Datagram" : + "Unknown"); + printf(" RDMA Provider Type: %u (%s)\n", + entry->tsas.rdma.rdma_prtype, + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_NONE ? "No provider specified" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IB ? "InfiniBand" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE ? "InfiniBand RoCE" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE2 ? "InfiniBand RoCE v2" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IWARP ? "iWARP" : + "Unknown"); + printf(" RDMA CM Service: %u (%s)\n", + entry->tsas.rdma.rdma_cms, + entry->tsas.rdma.rdma_cms == SPDK_NVMF_RDMA_CMS_RDMA_CM ? "RDMA_CM" : + "Unknown"); + if (entry->adrfam == SPDK_NVMF_ADRFAM_IB) { + printf(" RDMA Partition Key: %" PRIu32 "\n", + from_le32(&entry->tsas.rdma.rdma_pkey)); + } + } + } + free(g_discovery_page); + g_discovery_page = NULL; + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -r trid remote NVMe over Fabrics target address\n"); + printf(" Format: 'key:value [key:value] ...'\n"); + printf(" Keys:\n"); + printf(" trtype Transport type (e.g. RDMA)\n"); + printf(" adrfam Address family (e.g. IPv4, IPv6)\n"); + printf(" traddr Transport address (e.g. 192.168.100.8)\n"); + printf(" trsvcid Transport service identifier (e.g. 4420)\n"); + printf(" subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf(" Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n"); + + spdk_log_usage(stdout, "-L"); + + printf(" -i shared memory group ID\n"); + printf(" -p core number in decimal to run this application which started from 0\n"); + printf(" -d DPDK huge memory size in MB\n"); + printf(" -x print hex dump of raw data\n"); + printf(" -v verbose (enable warnings)\n"); + printf(" -V enumerate VMD\n"); + printf(" -H show this usage\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op, rc; + + spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE); + snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + while ((op = getopt(argc, argv, "d:i:p:r:xHL:V")) != -1) { + switch (op) { + case 'd': + g_dpdk_mem = spdk_strtol(optarg, 10); + if (g_dpdk_mem < 0) { + fprintf(stderr, "Invalid DPDK memory size\n"); + return g_dpdk_mem; + } + break; + case 'i': + g_shm_id = spdk_strtol(optarg, 10); + if (g_shm_id < 0) { + fprintf(stderr, "Invalid shared memory ID\n"); + return g_shm_id; + } + break; + case 'p': + g_master_core = spdk_strtol(optarg, 10); + if (g_master_core < 0) { + fprintf(stderr, "Invalid core number\n"); + return g_master_core; + } + snprintf(g_core_mask, sizeof(g_core_mask), "0x%llx", 1ULL << g_master_core); + break; + case 'r': + if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { + fprintf(stderr, "Error parsing transport address\n"); + return 1; + } + break; + case 'x': + g_hex_dump = true; + break; + case 'L': + rc = spdk_log_set_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -L flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + case 'H': + usage(argv[0]); + break; + case 'V': + g_vmd = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + g_controllers_found++; + print_controller(ctrlr, trid); + spdk_nvme_detach(ctrlr); +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "identify"; + opts.shm_id = g_shm_id; + opts.mem_size = g_dpdk_mem; + opts.mem_channel = 1; + opts.master_core = g_master_core; + opts.core_mask = g_core_mask; + if (g_trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { + opts.no_pci = true; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (g_vmd && spdk_vmd_init()) { + fprintf(stderr, "Failed to initialize VMD." + " Some NVMe devices can be unavailable.\n"); + } + + /* A specific trid is required. */ + if (strlen(g_trid.traddr) != 0) { + ctrlr = spdk_nvme_connect(&g_trid, NULL, 0); + if (!ctrlr) { + fprintf(stderr, "spdk_nvme_connect() failed\n"); + return 1; + } + + g_controllers_found++; + print_controller(ctrlr, &g_trid); + spdk_nvme_detach(ctrlr); + } else if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_controllers_found == 0) { + fprintf(stderr, "No NVMe controllers found.\n"); + } + + if (g_vmd) { + spdk_vmd_fini(); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/nvme_manage/.gitignore b/src/spdk/examples/nvme/nvme_manage/.gitignore new file mode 100644 index 000000000..cdc78a1a1 --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/.gitignore @@ -0,0 +1 @@ +nvme_manage diff --git a/src/spdk/examples/nvme/nvme_manage/Makefile b/src/spdk/examples/nvme/nvme_manage/Makefile new file mode 100644 index 000000000..ed467b884 --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = nvme_manage + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/nvme_manage/nvme_manage.c b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c new file mode 100644 index 000000000..c202dab42 --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c @@ -0,0 +1,1703 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/opal.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns_data *common_ns_data; + int outstanding_admin_cmds; + struct spdk_opal_dev *opal_dev; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; +static int g_shm_id = -1; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +enum controller_display_model { + CONTROLLER_DISPLAY_ALL = 0x0, + CONTROLLER_DISPLAY_SIMPLISTIC = 0x1, +}; + +static int +cmp_devs(const void *ap, const void *bp) +{ + const struct dev *a = ap, *b = bp; + + return spdk_pci_addr_compare(&a->pci_addr, &b->pci_addr); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +identify_common_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + if (cpl->status.sc != SPDK_NVME_SC_SUCCESS) { + /* Identify Namespace for NSID = FFFFFFFFh is optional, so failure is not fatal. */ + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + dev->outstanding_admin_cmds--; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + struct spdk_nvme_cmd cmd; + + /* add to dev list */ + dev = &devs[num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; + + /* Retrieve controller data */ + dev->cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + dev->common_ns_data = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (dev->common_ns_data == NULL) { + fprintf(stderr, "common_ns_data allocation failure\n"); + return; + } + + /* Identify Namespace with NSID set to FFFFFFFFh to get common namespace capabilities. */ + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10_bits.identify.cns = 0; /* CNS = 0 (Identify Namespace) */ + cmd.nsid = SPDK_NVME_GLOBAL_NS_TAG; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, dev->common_ns_data, + sizeof(struct spdk_nvme_ns_data), identify_common_ns_cb, dev) != 0) { + dev->outstanding_admin_cmds--; + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void usage(void) +{ + printf("NVMe Management Options"); + printf("\n"); + printf("\t[1: list controllers]\n"); + printf("\t[2: create namespace]\n"); + printf("\t[3: delete namespace]\n"); + printf("\t[4: attach namespace to controller]\n"); + printf("\t[5: detach namespace from controller]\n"); + printf("\t[6: format namespace or controller]\n"); + printf("\t[7: firmware update]\n"); + printf("\t[8: opal]\n"); + printf("\t[9: quit]\n"); +} + +static void +display_namespace_dpc(const struct spdk_nvme_ns_data *nsdata) +{ + if (nsdata->dpc.pit1 || nsdata->dpc.pit2 || nsdata->dpc.pit3) { + if (nsdata->dpc.pit1) { + printf("PIT1 "); + } + + if (nsdata->dpc.pit2) { + printf("PIT2 "); + } + + if (nsdata->dpc.pit3) { + printf("PIT3 "); + } + } else { + printf("Not Supported\n"); + return; + } + + if (nsdata->dpc.md_start && nsdata->dpc.md_end) { + printf("Location: Head or Tail\n"); + } else if (nsdata->dpc.md_start) { + printf("Location: Head\n"); + } else if (nsdata->dpc.md_end) { + printf("Location: Tail\n"); + } else { + printf("Not Supported\n"); + } +} + +static void +display_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + uint32_t i; + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + printf("Format Progress Indicator: %s\n", + nsdata->fpi.fpi_supported ? "Supported" : "Not Supported"); + if (nsdata->fpi.fpi_supported && nsdata->fpi.percentage_remaining) { + printf("Formatted Percentage: %d%%\n", 100 - nsdata->fpi.percentage_remaining); + } + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("Data Protection Capabilities:"); + display_namespace_dpc(nsdata); + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE == nsdata->dps.pit) { + printf("Data Protection Setting: N/A\n"); + } else { + printf("Data Protection Setting: PIT%d Location: %s\n", + nsdata->dps.pit, nsdata->dps.md_start ? "Head" : "Tail"); + } + printf("Multipath IO and Sharing: %s\n", + nsdata->nmic.can_share ? "Supported" : "Not Supported"); + printf("\n"); +} + +static void +display_controller(struct dev *dev, int model) +{ + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata; + uint8_t str[128]; + uint32_t nsid; + + cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + if (model == CONTROLLER_DISPLAY_SIMPLISTIC) { + printf("%04x:%02x:%02x.%02x ", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("%-40.40s %-20.20s ", + cdata->mn, cdata->sn); + printf("%5d ", cdata->cntlid); + printf("\n"); + return; + } + + printf("=====================================================\n"); + printf("NVMe Controller: %04x:%02x:%02x.%02x\n", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("============================\n"); + printf("Controller Capabilities/Features\n"); + printf("Controller ID: %d\n", cdata->cntlid); + snprintf(str, sizeof(cdata->sn) + 1, "%s", cdata->sn); + printf("Serial Number: %s\n", str); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Namespace Manage And Attach: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Namespace Format: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("\n"); + printf("NVM Command Set Attributes\n"); + printf("============================\n"); + if (cdata->fna.format_all_ns) { + printf("Namespace format operation applies to all namespaces\n"); + } else { + printf("Namespace format operation applies to per namespace\n"); + } + printf("\n"); + printf("Namespace Attributes\n"); + printf("============================\n"); + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(dev->ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(dev->ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, nsid); + assert(ns != NULL); + display_namespace(ns); + } +} + +static void +display_controller_list(void) +{ + struct dev *iter; + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_ALL); + } +} + +static char * +get_line(char *buf, int buf_size, FILE *f, bool secret) +{ + char *ch; + size_t len; + struct termios default_attr = {}, new_attr = {}; + int ret; + + if (secret) { + ret = tcgetattr(STDIN_FILENO, &default_attr); + if (ret) { + return NULL; + } + + new_attr = default_attr; + new_attr.c_lflag &= ~ECHO; /* disable echo */ + ret = tcsetattr(STDIN_FILENO, TCSAFLUSH, &new_attr); + if (ret) { + return NULL; + } + } + + ch = fgets(buf, buf_size, f); + if (ch == NULL) { + return NULL; + } + + if (secret) { + ret = tcsetattr(STDIN_FILENO, TCSAFLUSH, &default_attr); /* restore default confing */ + if (ret) { + return NULL; + } + } + + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') { + buf[len - 1] = '\0'; + } + return buf; +} + +static struct dev * +get_controller(void) +{ + struct spdk_pci_addr pci_addr; + char address[64]; + char *p; + int ch; + struct dev *iter; + + memset(address, 0, sizeof(address)); + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_SIMPLISTIC); + } + + printf("Please Input PCI Address(domain:bus:dev.func):\n"); + + while ((ch = getchar()) != '\n' && ch != EOF); + p = get_line(address, 64, stdin, false); + if (p == NULL) { + return NULL; + } + + while (isspace(*p)) { + p++; + } + + if (spdk_pci_addr_parse(&pci_addr, p) < 0) { + return NULL; + } + + foreach_dev(iter) { + if (spdk_pci_addr_compare(&pci_addr, &iter->pci_addr) == 0) { + return iter; + } + } + return NULL; +} + +static int +get_lba_format(const struct spdk_nvme_ns_data *ns_data) +{ + int lbaf, i; + + printf("\nSupported LBA formats:\n"); + for (i = 0; i <= ns_data->nlbaf; i++) { + printf("%2d: %d data bytes", i, 1 << ns_data->lbaf[i].lbads); + if (ns_data->lbaf[i].ms) { + printf(" + %d metadata bytes", ns_data->lbaf[i].ms); + } + printf("\n"); + } + + printf("Please input LBA format index (0 - %d):\n", ns_data->nlbaf); + if (scanf("%d", &lbaf) != 1 || lbaf > ns_data->nlbaf) { + return -1; + } + + return lbaf; +} + +static void +identify_allocated_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + dev->outstanding_admin_cmds--; +} + +static uint32_t +get_allocated_nsid(struct dev *dev) +{ + uint32_t nsid; + size_t i; + struct spdk_nvme_ns_list *ns_list; + struct spdk_nvme_cmd cmd = {0}; + + ns_list = spdk_dma_zmalloc(sizeof(*ns_list), 4096, NULL); + if (ns_list == NULL) { + printf("Allocation error\n"); + return 0; + } + + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10_bits.identify.cns = SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST; + cmd.nsid = 0; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, ns_list, sizeof(*ns_list), + identify_allocated_ns_cb, dev)) { + printf("Identify command failed\n"); + spdk_dma_free(ns_list); + return 0; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + + printf("Allocated Namespace IDs:\n"); + for (i = 0; i < SPDK_COUNTOF(ns_list->ns_list); i++) { + if (ns_list->ns_list[i] == 0) { + break; + } + printf("%u\n", ns_list->ns_list[i]); + } + + spdk_dma_free(ns_list); + + printf("Please Input Namespace ID:\n"); + if (!scanf("%u", &nsid)) { + printf("Invalid Namespace ID\n"); + nsid = 0; + } + + return nsid; +} + +static void +ns_attach(struct dev *device, int attachment_op, int ctrlr_id, int ns_id) +{ + int ret = 0; + struct spdk_nvme_ctrlr_list *ctrlr_list; + + ctrlr_list = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_list), + 4096, NULL); + if (ctrlr_list == NULL) { + printf("Allocation error (controller list)\n"); + exit(1); + } + + ctrlr_list->ctrlr_count = 1; + ctrlr_list->ctrlr_list[0] = ctrlr_id; + + if (attachment_op == SPDK_NVME_NS_CTRLR_ATTACH) { + ret = spdk_nvme_ctrlr_attach_ns(device->ctrlr, ns_id, ctrlr_list); + } else if (attachment_op == SPDK_NVME_NS_CTRLR_DETACH) { + ret = spdk_nvme_ctrlr_detach_ns(device->ctrlr, ns_id, ctrlr_list); + } + + if (ret) { + fprintf(stdout, "ns attach: Failed\n"); + } + + spdk_dma_free(ctrlr_list); +} + +static void +ns_manage_add(struct dev *device, uint64_t ns_size, uint64_t ns_capacity, int ns_lbasize, + uint8_t ns_dps_type, uint8_t ns_dps_location, uint8_t ns_nmic) +{ + uint32_t nsid; + struct spdk_nvme_ns_data *ndata; + + ndata = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (ndata == NULL) { + printf("Allocation error (namespace data)\n"); + exit(1); + } + + ndata->nsze = ns_size; + ndata->ncap = ns_capacity; + ndata->flbas.format = ns_lbasize; + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + ndata->dps.pit = ns_dps_type; + ndata->dps.md_start = ns_dps_location; + } + ndata->nmic.can_share = ns_nmic; + nsid = spdk_nvme_ctrlr_create_ns(device->ctrlr, ndata); + if (nsid == 0) { + fprintf(stdout, "ns manage: Failed\n"); + } else { + printf("Created namespace ID %u\n", nsid); + } + + spdk_dma_free(ndata); +} + +static void +ns_manage_delete(struct dev *device, int ns_id) +{ + int ret = 0; + + ret = spdk_nvme_ctrlr_delete_ns(device->ctrlr, ns_id); + if (ret) { + fprintf(stdout, "ns manage: Failed\n"); + return; + } +} + +static void +nvme_manage_format(struct dev *device, int ns_id, int ses, int pi, int pil, int ms, int lbaf) +{ + int ret = 0; + struct spdk_nvme_format format = {}; + + format.lbaf = lbaf; + format.ms = ms; + format.pi = pi; + format.pil = pil; + format.ses = ses; + ret = spdk_nvme_ctrlr_format(device->ctrlr, ns_id, &format); + if (ret) { + fprintf(stdout, "nvme format: Failed\n"); + return; + } +} + +static void +attach_and_detach_ns(int attachment_op) +{ + uint32_t nsid; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + nsid = get_allocated_nsid(ctrlr); + if (nsid == 0) { + printf("Invalid Namespace ID\n"); + return; + } + + ns_attach(ctrlr, attachment_op, ctrlr->cdata->cntlid, nsid); +} + +static void +add_ns(void) +{ + uint64_t ns_size = 0; + uint64_t ns_capacity = 0; + int ns_lbasize; + int ns_dps_type = 0; + int ns_dps_location = 0; + int ns_nmic = 0; + struct dev *ctrlr = NULL; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + if (!ctrlr->common_ns_data) { + printf("Controller did not return common namespace capabilities\n"); + return; + } + + ns_lbasize = get_lba_format(ctrlr->common_ns_data); + if (ns_lbasize < 0) { + printf("Invalid LBA format number\n"); + return; + } + + printf("Please Input Namespace Size (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_size)) { + printf("Invalid Namespace Size\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Namespace Capacity (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_capacity)) { + printf("Invalid Namespace Capacity\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Data Protection Type (0 - 3):\n"); + if (!scanf("%d", &ns_dps_type)) { + printf("Invalid Data Protection Type\n"); + while (getchar() != '\n'); + return; + } + + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + printf("Please Input Data Protection Location (1: Head; 0: Tail):\n"); + if (!scanf("%d", &ns_dps_location)) { + printf("Invalid Data Protection Location\n"); + while (getchar() != '\n'); + return; + } + } + + printf("Please Input Multi-path IO and Sharing Capabilities (1: Share; 0: Private):\n"); + if (!scanf("%d", &ns_nmic)) { + printf("Invalid Multi-path IO and Sharing Capabilities\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_add(ctrlr, ns_size, ns_capacity, ns_lbasize, + ns_dps_type, ns_dps_location, ns_nmic); +} + +static void +delete_ns(void) +{ + int ns_id; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + printf("Please Input Namespace ID:\n"); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_delete(ctrlr, ns_id); +} + +static void +format_nvm(void) +{ + int ns_id; + int ses; + int pil; + int pi; + int ms; + int lbaf; + char option; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ns_data *nsdata; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.format) { + printf("Controller does not support Format NVM command\n"); + return; + } + + if (cdata->fna.format_all_ns) { + ns_id = SPDK_NVME_GLOBAL_NS_TAG; + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, 1); + } else { + printf("Please Input Namespace ID (1 - %d):\n", cdata->nn); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, ns_id); + } + + if (ns == NULL) { + printf("Namespace ID %d not found\n", ns_id); + while (getchar() != '\n'); + return; + } + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Please Input Secure Erase Setting:\n"); + printf(" 0: No secure erase operation requested\n"); + printf(" 1: User data erase\n"); + if (cdata->fna.crypto_erase_supported) { + printf(" 2: Cryptographic erase\n"); + } + if (!scanf("%d", &ses)) { + printf("Invalid Secure Erase Setting\n"); + while (getchar() != '\n'); + return; + } + + lbaf = get_lba_format(nsdata); + if (lbaf < 0) { + printf("Invalid LBA format number\n"); + return; + } + + if (nsdata->lbaf[lbaf].ms) { + printf("Please Input Protection Information:\n"); + printf(" 0: Protection information is not enabled\n"); + printf(" 1: Protection information is enabled, Type 1\n"); + printf(" 2: Protection information is enabled, Type 2\n"); + printf(" 3: Protection information is enabled, Type 3\n"); + if (!scanf("%d", &pi)) { + printf("Invalid protection information\n"); + while (getchar() != '\n'); + return; + } + + if (pi) { + printf("Please Input Protection Information Location:\n"); + printf(" 0: Protection information transferred as the last eight bytes of metadata\n"); + printf(" 1: Protection information transferred as the first eight bytes of metadata\n"); + if (!scanf("%d", &pil)) { + printf("Invalid protection information location\n"); + while (getchar() != '\n'); + return; + } + } else { + pil = 0; + } + + printf("Please Input Metadata Setting:\n"); + printf(" 0: Metadata is transferred as part of a separate buffer\n"); + printf(" 1: Metadata is transferred as part of an extended data LBA\n"); + if (!scanf("%d", &ms)) { + printf("Invalid metadata setting\n"); + while (getchar() != '\n'); + return; + } + } else { + ms = 0; + pi = 0; + pil = 0; + } + + printf("Warning: use this utility at your own risk.\n" + "This command will format your namespace and all data will be lost.\n" + "This command may take several minutes to complete,\n" + "so do not interrupt the utility until it completes.\n" + "Press 'Y' to continue with the format operation.\n"); + + while (getchar() != '\n'); + if (!scanf("%c", &option)) { + printf("Invalid option\n"); + while (getchar() != '\n'); + return; + } + + if (option == 'y' || option == 'Y') { + nvme_manage_format(ctrlr, ns_id, ses, pi, pil, ms, lbaf); + } else { + printf("NVMe format abort\n"); + } +} + +static void +update_firmware_image(void) +{ + int rc; + int fd = -1; + int slot; + unsigned int size; + struct stat fw_stat; + char path[256]; + void *fw_image; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + enum spdk_nvme_fw_commit_action commit_action; + struct spdk_nvme_status status; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.firmware) { + printf("Controller does not support firmware download and commit command\n"); + return; + } + + printf("Please Input The Path Of Firmware Image\n"); + + if (get_line(path, sizeof(path), stdin, false) == NULL) { + printf("Invalid path setting\n"); + while (getchar() != '\n'); + return; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("Open file failed"); + return; + } + rc = fstat(fd, &fw_stat); + if (rc < 0) { + printf("Fstat failed\n"); + close(fd); + return; + } + + if (fw_stat.st_size % 4) { + printf("Firmware image size is not multiple of 4\n"); + close(fd); + return; + } + + size = fw_stat.st_size; + + fw_image = spdk_dma_zmalloc(size, 4096, NULL); + if (fw_image == NULL) { + printf("Allocation error\n"); + close(fd); + return; + } + + if (read(fd, fw_image, size) != ((ssize_t)(size))) { + printf("Read firmware image failed\n"); + close(fd); + spdk_dma_free(fw_image); + return; + } + close(fd); + + printf("Please Input Slot(0 - 7):\n"); + if (!scanf("%d", &slot)) { + printf("Invalid Slot\n"); + spdk_dma_free(fw_image); + while (getchar() != '\n'); + return; + } + + commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG; + rc = spdk_nvme_ctrlr_update_firmware(ctrlr->ctrlr, fw_image, size, slot, commit_action, &status); + if (rc == -ENXIO && status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && + status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { + printf("conventional reset is needed to enable firmware !\n"); + } else if (rc) { + printf("spdk_nvme_ctrlr_update_firmware failed\n"); + } else { + printf("spdk_nvme_ctrlr_update_firmware success\n"); + } + spdk_dma_free(fw_image); +} + +static void +opal_dump_info(struct spdk_opal_d0_features_info *feat) +{ + if (feat->tper.hdr.code) { + printf("\nOpal TPer feature:\n"); + printf("ACKNACK = %s", (feat->tper.acknack ? "Y, " : "N, ")); + printf("ASYNC = %s", (feat->tper.async ? "Y, " : "N, ")); + printf("BufferManagement = %s\n", (feat->tper.buffer_management ? "Y, " : "N, ")); + printf("ComIDManagement = %s", (feat->tper.comid_management ? "Y, " : "N, ")); + printf("Streaming = %s", (feat->tper.streaming ? "Y, " : "N, ")); + printf("Sync = %s\n", (feat->tper.sync ? "Y" : "N")); + printf("\n"); + } + + if (feat->locking.hdr.code) { + printf("Opal Locking feature:\n"); + printf("Locked = %s", (feat->locking.locked ? "Y, " : "N, ")); + printf("Locking Enabled = %s", (feat->locking.locking_enabled ? "Y, " : "N, ")); + printf("Locking supported = %s\n", (feat->locking.locking_supported ? "Y" : "N")); + + printf("MBR done = %s", (feat->locking.mbr_done ? "Y, " : "N, ")); + printf("MBR enabled = %s", (feat->locking.mbr_enabled ? "Y, " : "N, ")); + printf("Media encrypt = %s\n", (feat->locking.media_encryption ? "Y" : "N")); + printf("\n"); + } + + if (feat->geo.hdr.code) { + printf("Opal Geometry feature:\n"); + printf("Align = %s", (feat->geo.alignment_granularity ? "Y, " : "N, ")); + printf("Logical block size = %d, ", from_be32(&feat->geo.logical_block_size)); + printf("Lowest aligned LBA = %ld\n", from_be64(&feat->geo.lowest_aligned_lba)); + printf("\n"); + } + + if (feat->single_user.hdr.code) { + printf("Opal Single User Mode feature:\n"); + printf("Any in SUM = %s", (feat->single_user.any ? "Y, " : "N, ")); + printf("All in SUM = %s", (feat->single_user.all ? "Y, " : "N, ")); + printf("Policy: %s Authority,\n", (feat->single_user.policy ? "Admin" : "Users")); + printf("Number of locking objects = %d\n ", from_be32(&feat->single_user.num_locking_objects)); + printf("\n"); + } + + if (feat->datastore.hdr.code) { + printf("Opal DataStore feature:\n"); + printf("Table alignment = %d, ", from_be32(&feat->datastore.alignment)); + printf("Max number of tables = %d, ", from_be16(&feat->datastore.max_tables)); + printf("Max size of tables = %d\n", from_be32(&feat->datastore.max_table_size)); + printf("\n"); + } + + if (feat->v100.hdr.code) { + printf("Opal V100 feature:\n"); + printf("Base comID = %d, ", from_be16(&feat->v100.base_comid)); + printf("Number of comIDs = %d, ", from_be16(&feat->v100.number_comids)); + printf("Range crossing = %s\n", (feat->v100.range_crossing ? "N" : "Y")); + printf("\n"); + } + + if (feat->v200.hdr.code) { + printf("Opal V200 feature:\n"); + printf("Base comID = %d, ", from_be16(&feat->v200.base_comid)); + printf("Number of comIDs = %d, ", from_be16(&feat->v200.num_comids)); + printf("Initial PIN = %d,\n", feat->v200.initial_pin); + printf("Reverted PIN = %d, ", feat->v200.reverted_pin); + printf("Number of admins = %d, ", from_be16(&feat->v200.num_locking_admin_auth)); + printf("Number of users = %d\n", from_be16(&feat->v200.num_locking_user_auth)); + printf("\n"); + } +} + +static void +opal_usage(void) +{ + printf("Opal General Usage:\n"); + printf("\n"); + printf("\t[1: scan device]\n"); + printf("\t[2: init - take ownership and activate locking]\n"); + printf("\t[3: revert tper]\n"); + printf("\t[4: setup locking range]\n"); + printf("\t[5: list locking ranges]\n"); + printf("\t[6: enable user]\n"); + printf("\t[7: set new password]\n"); + printf("\t[8: add user to locking range]\n"); + printf("\t[9: lock/unlock range]\n"); + printf("\t[10: erase locking range]\n"); + printf("\t[0: quit]\n"); +} + +static void +opal_scan(struct dev *iter) +{ + while (getchar() != '\n'); + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + + printf("\n\nOpal Supported:\n"); + display_controller(iter, CONTROLLER_DISPLAY_SIMPLISTIC); + opal_dump_info(spdk_opal_get_d0_features_info(iter->opal_dev)); + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + printf("%04x:%02x:%02x.%02x: Opal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_init(struct dev *iter) +{ + char new_passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please input the new password for ownership:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(new_passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n...\n"); + if (passwd_p) { + ret = spdk_opal_cmd_take_ownership(iter->opal_dev, passwd_p); + if (ret) { + printf("Take ownership failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + ret = spdk_opal_cmd_activate_locking_sp(iter->opal_dev, passwd_p); + if (ret) { + printf("Locking SP activate failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("...\nOpal Init Success\n"); + } else { + printf("Input password invalid. Opal Init failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_locking_usage(void) +{ + printf("Choose Opal locking state:\n"); + printf("\n"); + printf("\t[1: read write lock]\n"); + printf("\t[2: read only]\n"); + printf("\t[3: read write unlock]\n"); +} + +static void +opal_setup_lockingrange(struct dev *iter) +{ + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + uint64_t range_start; + uint64_t range_length; + int locking_range_id; + struct spdk_opal_locking_range_info *info; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please input the password for setting up locking range:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (passwd_p) { + printf("Specify locking range id:\n"); + if (!scanf("%d", &locking_range_id)) { + printf("Invalid locking range id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("range length:\n"); + if (!scanf("%" SCNu64, &range_length)) { + printf("Invalid range length\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("range start:\n"); + if (!scanf("%" SCNu64, &range_start)) { + printf("Invalid range start address\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + while (getchar() != '\n'); + + ret = spdk_opal_cmd_setup_locking_range(iter->opal_dev, + OPAL_ADMIN1, locking_range_id, range_start, range_length, passwd_p); + if (ret) { + printf("Setup locking range failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + ret = spdk_opal_cmd_get_locking_range_info(iter->opal_dev, + passwd_p, OPAL_ADMIN1, locking_range_id); + if (ret) { + printf("Get locking range info failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + info = spdk_opal_get_locking_range_info(iter->opal_dev, locking_range_id); + + printf("\nlocking range ID: %d\n", info->locking_range_id); + printf("range start: %ld\n", info->range_start); + printf("range length: %ld\n", info->range_length); + printf("read lock enabled: %d\n", info->read_lock_enabled); + printf("write lock enabled: %d\n", info->write_lock_enabled); + printf("read locked: %d\n", info->read_locked); + printf("write locked: %d\n", info->write_locked); + + printf("...\n...\nOpal setup locking range success\n"); + } else { + printf("Input password invalid. Opal setup locking range failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_list_locking_ranges(struct dev *iter) +{ + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + int max_ranges; + int i; + struct spdk_opal_locking_range_info *info; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please input password:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (passwd_p) { + ret = spdk_opal_cmd_get_max_ranges(iter->opal_dev, passwd_p); + if (ret <= 0) { + printf("get max ranges failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + max_ranges = ret; + for (i = 0; i < max_ranges; i++) { + ret = spdk_opal_cmd_get_locking_range_info(iter->opal_dev, + passwd_p, OPAL_ADMIN1, i); + if (ret) { + printf("Get locking range info failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + info = spdk_opal_get_locking_range_info(iter->opal_dev, i); + if (info == NULL) { + continue; + } + + printf("===============================================\n"); + printf("locking range ID: %d\t", info->locking_range_id); + if (i == 0) { printf("(Global Range)"); } + printf("\n===============================================\n"); + printf("range start: %ld\t", info->range_start); + printf("range length: %ld\n", info->range_length); + printf("read lock enabled: %d\t", info->read_lock_enabled); + printf("write lock enabled: %d\t", info->write_lock_enabled); + printf("read locked: %d\t", info->read_locked); + printf("write locked: %d\n", info->write_locked); + printf("\n"); + } + } else { + printf("Input password invalid. List locking ranges failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_new_user_enable(struct dev *iter) +{ + int user_id; + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + char user_pw[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *user_pw_p; + int ret; + int ch; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please input admin password:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (passwd_p) { + printf("which user to enable: "); + if (!scanf("%d", &user_id)) { + printf("Invalid user id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + ret = spdk_opal_cmd_enable_user(iter->opal_dev, user_id, passwd_p); + if (ret) { + printf("Enable user failure error code: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("Please set a new password for this user:"); + while ((ch = getchar()) != '\n' && ch != EOF); + user_pw_p = get_line(user_pw, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + if (user_pw_p == NULL) { + printf("Input password invalid. Enable user failure\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + ret = spdk_opal_cmd_set_new_passwd(iter->opal_dev, user_id, user_pw_p, passwd_p, true); + if (ret) { + printf("Set new password failure error code: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("\n...\n...\nEnable User Success\n"); + } else { + printf("Input password invalid. Enable user failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_change_password(struct dev *iter) +{ + int user_id; + char old_passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *old_passwd_p; + char new_passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *new_passwd_p; + int ret; + int ch; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("user id: "); + if (!scanf("%d", &user_id)) { + printf("Invalid user id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("Password:"); + while ((ch = getchar()) != '\n' && ch != EOF); + old_passwd_p = get_line(old_passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (old_passwd_p) { + printf("Please input new password:\n"); + new_passwd_p = get_line(new_passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (new_passwd_p == NULL) { + printf("Input password invalid. Change password failure\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + ret = spdk_opal_cmd_set_new_passwd(iter->opal_dev, user_id, new_passwd_p, old_passwd_p, false); + if (ret) { + printf("Set new password failure error code: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("...\n...\nChange password Success\n"); + } else { + printf("Input password invalid. Change password failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_add_user_to_locking_range(struct dev *iter) +{ + int locking_range_id, user_id; + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please input admin password:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (passwd_p) { + printf("Specify locking range id:\n"); + if (!scanf("%d", &locking_range_id)) { + printf("Invalid locking range id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("which user to enable:\n"); + if (!scanf("%d", &user_id)) { + printf("Invalid user id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + while (getchar() != '\n'); + + ret = spdk_opal_cmd_add_user_to_locking_range(iter->opal_dev, user_id, locking_range_id, + OPAL_READONLY, passwd_p); + ret += spdk_opal_cmd_add_user_to_locking_range(iter->opal_dev, user_id, locking_range_id, + OPAL_READWRITE, passwd_p); + if (ret) { + printf("Add user to locking range error: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("...\n...\nAdd user to locking range Success\n"); + } else { + printf("Input password invalid. Add user to locking range failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_user_lock_unlock_range(struct dev *iter) +{ + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ch; + int ret; + int user_id; + int locking_range_id; + int state; + enum spdk_opal_lock_state state_flag; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("User id: "); + if (!scanf("%d", &user_id)) { + printf("Invalid user id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + printf("Please input password:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n"); + if (passwd_p) { + printf("Specify locking range id:\n"); + if (!scanf("%d", &locking_range_id)) { + printf("Invalid locking range id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + + opal_locking_usage(); + if (!scanf("%d", &state)) { + printf("Invalid option\n"); + } + switch (state) { + case 1: + state_flag = OPAL_RWLOCK; + break; + case 2: + state_flag = OPAL_READONLY; + break; + case 3: + state_flag = OPAL_READWRITE; + break; + default: + printf("Invalid options\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + while (getchar() != '\n'); + + ret = spdk_opal_cmd_lock_unlock(iter->opal_dev, user_id, state_flag, + locking_range_id, passwd_p); + if (ret) { + printf("lock/unlock range failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("...\n...\nLock/unlock range Success\n"); + } else { + printf("Input password invalid. lock/unlock range failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_revert_tper(struct dev *iter) +{ + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please be noted this operation will erase ALL DATA on this drive\n"); + printf("Please don't ternminate this excecution. Otherwise undefined error may occur\n"); + printf("Please input password for revert TPer:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + printf("\n...\n"); + if (passwd_p) { + ret = spdk_opal_cmd_revert_tper(iter->opal_dev, passwd_p); + if (ret) { + printf("Revert TPer failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("...\nRevert TPer Success\n"); + } else { + printf("Input password invalid. Revert TPer failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +opal_erase_locking_range(struct dev *iter) +{ + char passwd[SPDK_OPAL_MAX_PASSWORD_SIZE] = {0}; + char *passwd_p; + int ret; + int ch; + int locking_range_id; + + if (spdk_nvme_ctrlr_get_flags(iter->ctrlr) & SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) { + iter->opal_dev = spdk_opal_dev_construct(iter->ctrlr); + if (iter->opal_dev == NULL) { + return; + } + printf("Please be noted this operation will erase ALL DATA on this range\n"); + printf("Please input password for erase locking range:"); + while ((ch = getchar()) != '\n' && ch != EOF); + passwd_p = get_line(passwd, SPDK_OPAL_MAX_PASSWORD_SIZE, stdin, true); + if (passwd_p) { + printf("\nSpecify locking range id:\n"); + if (!scanf("%d", &locking_range_id)) { + printf("Invalid locking range id\n"); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("\n...\n"); + ret = spdk_opal_cmd_secure_erase_locking_range(iter->opal_dev, OPAL_ADMIN1, locking_range_id, + passwd_p); + if (ret) { + printf("Erase locking range failure: %d\n", ret); + spdk_opal_dev_destruct(iter->opal_dev); + return; + } + printf("...\nErase locking range Success\n"); + } else { + printf("Input password invalid. Erase locking range failure\n"); + } + spdk_opal_dev_destruct(iter->opal_dev); + } else { + printf("%04x:%02x:%02x.%02x: NVMe Security Support/Receive Not supported.\nOpal Not Supported\n\n\n", + iter->pci_addr.domain, iter->pci_addr.bus, iter->pci_addr.dev, iter->pci_addr.func); + } +} + +static void +test_opal(void) +{ + int exit_flag = false; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + opal_usage(); + while (!exit_flag) { + int cmd; + if (!scanf("%d", &cmd)) { + printf("Invalid Command: command must be number 0-9\n"); + while (getchar() != '\n'); + opal_usage(); + continue; + } + + switch (cmd) { + case 0: + exit_flag = true; + continue; + case 1: + opal_scan(ctrlr); + break; + case 2: + opal_init(ctrlr); /* Take ownership, Activate Locking SP */ + break; + case 3: + opal_revert_tper(ctrlr); + break; + case 4: + opal_setup_lockingrange(ctrlr); + break; + case 5: + opal_list_locking_ranges(ctrlr); + break; + case 6: + opal_new_user_enable(ctrlr); + break; + case 7: + opal_change_password(ctrlr); + break; + case 8: + opal_add_user_to_locking_range(ctrlr); + break; + case 9: + opal_user_lock_unlock_range(ctrlr); + break; + case 10: + opal_erase_locking_range(ctrlr); + break; + + default: + printf("Invalid option\n"); + } + + printf("\npress Enter to display Opal cmd menu ...\n"); + while (getchar() != '\n'); + opal_usage(); + } +} + +static void +args_usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -i shared memory group ID\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "i:")) != -1) { + switch (op) { + case 'i': + g_shm_id = spdk_strtol(optarg, 10); + if (g_shm_id < 0) { + fprintf(stderr, "Invalid shared memory ID\n"); + return g_shm_id; + } + break; + default: + args_usage(argv[0]); + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int i, rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "nvme_manage"; + opts.core_mask = "0x1"; + opts.shm_id = g_shm_id; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + qsort(devs, num_devs, sizeof(devs[0]), cmp_devs); + + usage(); + + while (1) { + int cmd; + bool exit_flag = false; + + if (!scanf("%d", &cmd)) { + printf("Invalid Command: command must be number 1-8\n"); + while (getchar() != '\n'); + usage(); + continue; + } + switch (cmd) { + case 1: + display_controller_list(); + break; + case 2: + add_ns(); + break; + case 3: + delete_ns(); + break; + case 4: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_ATTACH); + break; + case 5: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_DETACH); + break; + case 6: + format_nvm(); + break; + case 7: + update_firmware_image(); + break; + case 8: + test_opal(); + break; + case 9: + exit_flag = true; + break; + default: + printf("Invalid Command\n"); + break; + } + + if (exit_flag) { + break; + } + + while (getchar() != '\n'); + printf("press Enter to display cmd menu ...\n"); + while (getchar() != '\n'); + usage(); + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/perf/.gitignore b/src/spdk/examples/nvme/perf/.gitignore new file mode 100644 index 000000000..bd14107d8 --- /dev/null +++ b/src/spdk/examples/nvme/perf/.gitignore @@ -0,0 +1 @@ +perf diff --git a/src/spdk/examples/nvme/perf/Makefile b/src/spdk/examples/nvme/perf/Makefile new file mode 100644 index 000000000..0742f1842 --- /dev/null +++ b/src/spdk/examples/nvme/perf/Makefile @@ -0,0 +1,49 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = perf + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk + +ifeq ($(OS),Linux) +SYS_LIBS += -laio +CFLAGS += -DHAVE_LIBAIO +endif + +install: $(APP) + $(INSTALL_EXAMPLE) + +uninstall: + $(UNINSTALL_EXAMPLE) diff --git a/src/spdk/examples/nvme/perf/README.md b/src/spdk/examples/nvme/perf/README.md new file mode 100644 index 000000000..e5ec38d12 --- /dev/null +++ b/src/spdk/examples/nvme/perf/README.md @@ -0,0 +1,5 @@ +# Compiling perf on FreeBSD + +To use perf test on FreeBSD over NVMe-oF, explicitly link userspace library of HBA. For example, on a setup with Mellanox HBA, + + LIBS += -lmlx5 diff --git a/src/spdk/examples/nvme/perf/perf.c b/src/spdk/examples/nvme/perf/perf.c new file mode 100644 index 000000000..9e8cf6793 --- /dev/null +++ b/src/spdk/examples/nvme/perf/perf.c @@ -0,0 +1,2308 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/fd.h" +#include "spdk/nvme.h" +#include "spdk/vmd.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" +#include "spdk/histogram_data.h" +#include "spdk/endian.h" +#include "spdk/dif.h" +#include "spdk/util.h" +#include "spdk/log.h" +#include "spdk/likely.h" + +#ifdef SPDK_CONFIG_URING +#include <liburing.h> +#endif + +#if HAVE_LIBAIO +#include <libaio.h> +#endif + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + enum spdk_nvme_transport_type trtype; + struct spdk_nvme_intel_rw_latency_page *latency_page; + + struct spdk_nvme_qpair **unused_qpairs; + + struct ctrlr_entry *next; + char name[1024]; +}; + +enum entry_type { + ENTRY_TYPE_NVME_NS, + ENTRY_TYPE_AIO_FILE, + ENTRY_TYPE_URING_FILE, +}; + +struct ns_fn_table; + +struct ns_entry { + enum entry_type type; + const struct ns_fn_table *fn_table; + + union { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; +#ifdef SPDK_CONFIG_URING + struct { + int fd; + } uring; +#endif +#if HAVE_LIBAIO + struct { + int fd; + } aio; +#endif + } u; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint32_t num_io_requests; + uint64_t size_in_ios; + uint32_t block_size; + uint32_t md_size; + bool md_interleave; + bool pi_loc; + enum spdk_nvme_pi_type pi_type; + uint32_t io_flags; + char name[1024]; +}; + +static const double g_latency_cutoffs[] = { + 0.01, + 0.10, + 0.25, + 0.50, + 0.75, + 0.90, + 0.95, + 0.98, + 0.99, + 0.995, + 0.999, + 0.9999, + 0.99999, + 0.999999, + 0.9999999, + -1, +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t last_io_completed; + uint64_t total_tsc; + uint64_t min_tsc; + uint64_t max_tsc; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + + union { + struct { + int num_active_qpairs; + int num_all_qpairs; + struct spdk_nvme_qpair **qpair; + struct spdk_nvme_poll_group *group; + int last_qpair; + } nvme; + +#ifdef SPDK_CONFIG_URING + struct { + struct io_uring ring; + uint64_t io_inflight; + uint64_t io_pending; + struct io_uring_cqe **cqes; + + } uring; +#endif +#if HAVE_LIBAIO + struct { + struct io_event *events; + io_context_t ctx; + } aio; +#endif + } u; + + struct ns_worker_ctx *next; + + struct spdk_histogram_data *histogram; +}; + +struct perf_task { + struct ns_worker_ctx *ns_ctx; + struct iovec iov; + struct iovec md_iov; + uint64_t submit_tsc; + bool is_read; + struct spdk_dif_ctx dif_ctx; +#if HAVE_LIBAIO + struct iocb iocb; +#endif +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; +}; + +struct ns_fn_table { + void (*setup_payload)(struct perf_task *task, uint8_t pattern); + + int (*submit_io)(struct perf_task *task, struct ns_worker_ctx *ns_ctx, + struct ns_entry *entry, uint64_t offset_in_ios); + + void (*check_io)(struct ns_worker_ctx *ns_ctx); + + void (*verify_io)(struct perf_task *task, struct ns_entry *entry); + + int (*init_ns_worker_ctx)(struct ns_worker_ctx *ns_ctx); + + void (*cleanup_ns_worker_ctx)(struct ns_worker_ctx *ns_ctx); +}; + +static int g_outstanding_commands; + +static bool g_latency_ssd_tracking_enable; +static int g_latency_sw_tracking_level; + +static bool g_vmd; +static const char *g_workload_type; +static struct ctrlr_entry *g_controllers; +static struct ns_entry *g_namespaces; +static int g_num_namespaces; +static struct worker_thread *g_workers; +static int g_num_workers; +static uint32_t g_master_core; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_align = 0x200; +static uint32_t g_io_size_bytes; +static uint32_t g_max_io_md_size; +static uint32_t g_max_io_size_blocks; +static uint32_t g_metacfg_pract_flag; +static uint32_t g_metacfg_prchk_flags; +static int g_rw_percentage = -1; +static int g_is_random; +static int g_queue_depth; +static int g_nr_io_queues_per_ns = 1; +static int g_nr_unused_io_queues; +static int g_time_in_sec; +static uint32_t g_max_completions; +static int g_dpdk_mem; +static int g_shm_id = -1; +static uint32_t g_disable_sq_cmb; +static bool g_use_uring; +static bool g_no_pci; +static bool g_warn; +static bool g_header_digest; +static bool g_data_digest; +static bool g_no_shn_notification; +static bool g_mix_specified; +/* Default to 10 seconds for the keep alive value. This value is arbitrary. */ +static uint32_t g_keep_alive_timeout_in_ms = 10000; + +static const char *g_core_mask; + +struct trid_entry { + struct spdk_nvme_transport_id trid; + uint16_t nsid; + TAILQ_ENTRY(trid_entry) tailq; +}; + +static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list); + +static int g_file_optind; /* Index of first filename in argv */ + +static inline void +task_complete(struct perf_task *task); + +#ifdef SPDK_CONFIG_URING + +static void +uring_setup_payload(struct perf_task *task, uint8_t pattern) +{ + task->iov.iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL); + task->iov.iov_len = g_io_size_bytes; + if (task->iov.iov_base == NULL) { + fprintf(stderr, "spdk_dma_zmalloc() for task->iov.iov_base failed\n"); + exit(1); + } + memset(task->iov.iov_base, pattern, task->iov.iov_len); +} + +static int +uring_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, + struct ns_entry *entry, uint64_t offset_in_ios) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(&ns_ctx->u.uring.ring); + if (!sqe) { + fprintf(stderr, "Cannot get sqe\n"); + return -1; + } + + if (task->is_read) { + io_uring_prep_readv(sqe, entry->u.uring.fd, &task->iov, 1, offset_in_ios * task->iov.iov_len); + } else { + io_uring_prep_writev(sqe, entry->u.uring.fd, &task->iov, 1, offset_in_ios * task->iov.iov_len); + } + + io_uring_sqe_set_data(sqe, task); + ns_ctx->u.uring.io_pending++; + + return 0; +} + +static void +uring_check_io(struct ns_worker_ctx *ns_ctx) +{ + int i, count, to_complete, to_submit, ret = 0; + struct perf_task *task; + + to_submit = ns_ctx->u.uring.io_pending; + + if (to_submit > 0) { + /* If there are I/O to submit, use io_uring_submit here. + * It will automatically call spdk_io_uring_enter appropriately. */ + ret = io_uring_submit(&ns_ctx->u.uring.ring); + if (ret < 0) { + return; + } + ns_ctx->u.uring.io_pending = 0; + ns_ctx->u.uring.io_inflight += to_submit; + } + + to_complete = ns_ctx->u.uring.io_inflight; + if (to_complete > 0) { + count = io_uring_peek_batch_cqe(&ns_ctx->u.uring.ring, ns_ctx->u.uring.cqes, to_complete); + ns_ctx->u.uring.io_inflight -= count; + for (i = 0; i < count; i++) { + assert(ns_ctx->u.uring.cqes[i] != NULL); + task = (struct perf_task *)ns_ctx->u.uring.cqes[i]->user_data; + if (ns_ctx->u.uring.cqes[i]->res != (int)task->iov.iov_len) { + fprintf(stderr, "cqe[i]->status=%d\n", ns_ctx->u.uring.cqes[i]->res); + exit(0); + } + io_uring_cqe_seen(&ns_ctx->u.uring.ring, ns_ctx->u.uring.cqes[i]); + task_complete(task); + } + } +} + +static void +uring_verify_io(struct perf_task *task, struct ns_entry *entry) +{ +} + +static int +uring_init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + if (io_uring_queue_init(g_queue_depth, &ns_ctx->u.uring.ring, 0) < 0) { + SPDK_ERRLOG("uring I/O context setup failure\n"); + return -1; + } + + ns_ctx->u.uring.cqes = calloc(g_queue_depth, sizeof(struct io_uring_cqe *)); + if (!ns_ctx->u.uring.cqes) { + io_uring_queue_exit(&ns_ctx->u.uring.ring); + return -1; + } + + return 0; +} + +static void +uring_cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + io_uring_queue_exit(&ns_ctx->u.uring.ring); + free(ns_ctx->u.uring.cqes); +} + +static const struct ns_fn_table uring_fn_table = { + .setup_payload = uring_setup_payload, + .submit_io = uring_submit_io, + .check_io = uring_check_io, + .verify_io = uring_verify_io, + .init_ns_worker_ctx = uring_init_ns_worker_ctx, + .cleanup_ns_worker_ctx = uring_cleanup_ns_worker_ctx, +}; + +#endif + +#ifdef HAVE_LIBAIO +static void +aio_setup_payload(struct perf_task *task, uint8_t pattern) +{ + task->iov.iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL); + task->iov.iov_len = g_io_size_bytes; + if (task->iov.iov_base == NULL) { + fprintf(stderr, "spdk_dma_zmalloc() for task->buf failed\n"); + exit(1); + } + memset(task->iov.iov_base, pattern, task->iov.iov_len); +} + +static int +aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, + struct iovec *iov, uint64_t offset, void *cb_ctx) +{ + iocb->aio_fildes = fd; + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = cmd; + iocb->u.c.buf = iov->iov_base; + iocb->u.c.nbytes = iov->iov_len; + iocb->u.c.offset = offset * iov->iov_len; + iocb->data = cb_ctx; + + if (io_submit(aio_ctx, 1, &iocb) < 0) { + printf("io_submit"); + return -1; + } + + return 0; +} + +static int +aio_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, + struct ns_entry *entry, uint64_t offset_in_ios) +{ + if (task->is_read) { + return aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD, + &task->iov, offset_in_ios, task); + } else { + return aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE, + &task->iov, offset_in_ios, task); + } +} + +static void +aio_check_io(struct ns_worker_ctx *ns_ctx) +{ + int count, i; + struct timespec timeout; + + timeout.tv_sec = 0; + timeout.tv_nsec = 0; + + count = io_getevents(ns_ctx->u.aio.ctx, 1, g_queue_depth, ns_ctx->u.aio.events, &timeout); + if (count < 0) { + fprintf(stderr, "io_getevents error\n"); + exit(1); + } + + for (i = 0; i < count; i++) { + task_complete(ns_ctx->u.aio.events[i].data); + } +} + +static void +aio_verify_io(struct perf_task *task, struct ns_entry *entry) +{ +} + +static int +aio_init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->u.aio.events = calloc(g_queue_depth, sizeof(struct io_event)); + if (!ns_ctx->u.aio.events) { + return -1; + } + ns_ctx->u.aio.ctx = 0; + if (io_setup(g_queue_depth, &ns_ctx->u.aio.ctx) < 0) { + free(ns_ctx->u.aio.events); + perror("io_setup"); + return -1; + } + return 0; +} + +static void +aio_cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + io_destroy(ns_ctx->u.aio.ctx); + free(ns_ctx->u.aio.events); +} + +static const struct ns_fn_table aio_fn_table = { + .setup_payload = aio_setup_payload, + .submit_io = aio_submit_io, + .check_io = aio_check_io, + .verify_io = aio_verify_io, + .init_ns_worker_ctx = aio_init_ns_worker_ctx, + .cleanup_ns_worker_ctx = aio_cleanup_ns_worker_ctx, +}; + +#endif /* HAVE_LIBAIO */ + +#if defined(HAVE_LIBAIO) || defined(SPDK_CONFIG_URING) + +static int +register_file(const char *path) +{ + struct ns_entry *entry; + + int flags, fd; + uint64_t size; + uint32_t blklen; + + if (g_rw_percentage == 100) { + flags = O_RDONLY; + } else if (g_rw_percentage == 0) { + flags = O_WRONLY; + } else { + flags = O_RDWR; + } + + flags |= O_DIRECT; + + fd = open(path, flags); + if (fd < 0) { + fprintf(stderr, "Could not open device %s: %s\n", path, strerror(errno)); + return -1; + } + + size = spdk_fd_get_size(fd); + if (size == 0) { + fprintf(stderr, "Could not determine size of device %s\n", path); + close(fd); + return -1; + } + + blklen = spdk_fd_get_blocklen(fd); + if (blklen == 0) { + fprintf(stderr, "Could not determine block size of device %s\n", path); + close(fd); + return -1; + } + + /* + * TODO: This should really calculate the LCM of the current g_io_align and blklen. + * For now, it's fairly safe to just assume all block sizes are powers of 2. + */ + if (g_io_align < blklen) { + g_io_align = blklen; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + close(fd); + perror("ns_entry malloc"); + return -1; + } + + if (g_use_uring) { +#ifdef SPDK_CONFIG_URING + entry->type = ENTRY_TYPE_URING_FILE; + entry->fn_table = &uring_fn_table; + entry->u.uring.fd = fd; +#endif + } else { +#if HAVE_LIBAIO + entry->type = ENTRY_TYPE_AIO_FILE; + entry->fn_table = &aio_fn_table; + entry->u.aio.fd = fd; +#endif + } + entry->size_in_ios = size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / blklen; + + snprintf(entry->name, sizeof(entry->name), "%s", path); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; + + return 0; +} + +static int +register_files(int argc, char **argv) +{ + int i; + + /* Treat everything after the options as files for AIO/URING */ + for (i = g_file_optind; i < argc; i++) { + if (register_file(argv[i]) != 0) { + return 1; + } + } + + return 0; +} +#endif + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *cpl); + +static void +nvme_setup_payload(struct perf_task *task, uint8_t pattern) +{ + uint32_t max_io_size_bytes, max_io_md_size; + + /* maximum extended lba format size from all active namespace, + * it's same with g_io_size_bytes for namespace without metadata. + */ + max_io_size_bytes = g_io_size_bytes + g_max_io_md_size * g_max_io_size_blocks; + task->iov.iov_base = spdk_dma_zmalloc(max_io_size_bytes, g_io_align, NULL); + task->iov.iov_len = max_io_size_bytes; + if (task->iov.iov_base == NULL) { + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + memset(task->iov.iov_base, pattern, task->iov.iov_len); + + max_io_md_size = g_max_io_md_size * g_max_io_size_blocks; + if (max_io_md_size != 0) { + task->md_iov.iov_base = spdk_dma_zmalloc(max_io_md_size, g_io_align, NULL); + task->md_iov.iov_len = max_io_md_size; + if (task->md_iov.iov_base == NULL) { + fprintf(stderr, "task->md_buf spdk_dma_zmalloc failed\n"); + spdk_dma_free(task->iov.iov_base); + exit(1); + } + } +} + +static int +nvme_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, + struct ns_entry *entry, uint64_t offset_in_ios) +{ + uint64_t lba; + int rc; + int qp_num; + + enum dif_mode { + DIF_MODE_NONE = 0, + DIF_MODE_DIF = 1, + DIF_MODE_DIX = 2, + } mode = DIF_MODE_NONE; + + lba = offset_in_ios * entry->io_size_blocks; + + if (entry->md_size != 0 && !(entry->io_flags & SPDK_NVME_IO_FLAGS_PRACT)) { + if (entry->md_interleave) { + mode = DIF_MODE_DIF; + } else { + mode = DIF_MODE_DIX; + } + } + + qp_num = ns_ctx->u.nvme.last_qpair; + ns_ctx->u.nvme.last_qpair++; + if (ns_ctx->u.nvme.last_qpair == ns_ctx->u.nvme.num_active_qpairs) { + ns_ctx->u.nvme.last_qpair = 0; + } + + if (mode != DIF_MODE_NONE) { + rc = spdk_dif_ctx_init(&task->dif_ctx, entry->block_size, entry->md_size, + entry->md_interleave, entry->pi_loc, + (enum spdk_dif_type)entry->pi_type, entry->io_flags, + lba, 0xFFFF, (uint16_t)entry->io_size_blocks, 0, 0); + if (rc != 0) { + fprintf(stderr, "Initialization of DIF context failed\n"); + exit(1); + } + } + + if (task->is_read) { + return spdk_nvme_ns_cmd_read_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num], + task->iov.iov_base, task->md_iov.iov_base, + lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->dif_ctx.apptag_mask, task->dif_ctx.app_tag); + } else { + switch (mode) { + case DIF_MODE_DIF: + rc = spdk_dif_generate(&task->iov, 1, entry->io_size_blocks, &task->dif_ctx); + if (rc != 0) { + fprintf(stderr, "Generation of DIF failed\n"); + return rc; + } + break; + case DIF_MODE_DIX: + rc = spdk_dix_generate(&task->iov, 1, &task->md_iov, entry->io_size_blocks, + &task->dif_ctx); + if (rc != 0) { + fprintf(stderr, "Generation of DIX failed\n"); + return rc; + } + break; + default: + break; + } + + return spdk_nvme_ns_cmd_write_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair[qp_num], + task->iov.iov_base, task->md_iov.iov_base, + lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->dif_ctx.apptag_mask, task->dif_ctx.app_tag); + } +} + +static void +perf_disconnect_cb(struct spdk_nvme_qpair *qpair, void *ctx) +{ + +} + +static void +nvme_check_io(struct ns_worker_ctx *ns_ctx) +{ + int64_t rc; + + rc = spdk_nvme_poll_group_process_completions(ns_ctx->u.nvme.group, 0, perf_disconnect_cb); + if (rc < 0) { + fprintf(stderr, "NVMe io qpair process completion error\n"); + exit(1); + } +} + +static void +nvme_verify_io(struct perf_task *task, struct ns_entry *entry) +{ + struct spdk_dif_error err_blk = {}; + int rc; + + if (!task->is_read || (entry->io_flags & SPDK_NVME_IO_FLAGS_PRACT)) { + return; + } + + if (entry->md_interleave) { + rc = spdk_dif_verify(&task->iov, 1, entry->io_size_blocks, &task->dif_ctx, + &err_blk); + if (rc != 0) { + fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + } else { + rc = spdk_dix_verify(&task->iov, 1, &task->md_iov, entry->io_size_blocks, + &task->dif_ctx, &err_blk); + if (rc != 0) { + fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + } +} + +/* + * TODO: If a controller has multiple namespaces, they could all use the same queue. + * For now, give each namespace/thread combination its own queue. + */ +static int +nvme_init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + struct spdk_nvme_io_qpair_opts opts; + struct ns_entry *entry = ns_ctx->entry; + struct spdk_nvme_poll_group *group; + struct spdk_nvme_qpair *qpair; + int i; + + ns_ctx->u.nvme.num_active_qpairs = g_nr_io_queues_per_ns; + ns_ctx->u.nvme.num_all_qpairs = g_nr_io_queues_per_ns + g_nr_unused_io_queues; + ns_ctx->u.nvme.qpair = calloc(ns_ctx->u.nvme.num_all_qpairs, sizeof(struct spdk_nvme_qpair *)); + if (!ns_ctx->u.nvme.qpair) { + return -1; + } + + spdk_nvme_ctrlr_get_default_io_qpair_opts(entry->u.nvme.ctrlr, &opts, sizeof(opts)); + if (opts.io_queue_requests < entry->num_io_requests) { + opts.io_queue_requests = entry->num_io_requests; + } + opts.delay_cmd_submit = true; + opts.create_only = true; + + ns_ctx->u.nvme.group = spdk_nvme_poll_group_create(NULL); + if (ns_ctx->u.nvme.group == NULL) { + goto poll_group_failed; + } + + group = ns_ctx->u.nvme.group; + for (i = 0; i < ns_ctx->u.nvme.num_all_qpairs; i++) { + ns_ctx->u.nvme.qpair[i] = spdk_nvme_ctrlr_alloc_io_qpair(entry->u.nvme.ctrlr, &opts, + sizeof(opts)); + qpair = ns_ctx->u.nvme.qpair[i]; + if (!qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + goto qpair_failed; + } + + if (spdk_nvme_poll_group_add(group, qpair)) { + printf("ERROR: unable to add I/O qpair to poll group.\n"); + spdk_nvme_ctrlr_free_io_qpair(qpair); + goto qpair_failed; + } + + if (spdk_nvme_ctrlr_connect_io_qpair(entry->u.nvme.ctrlr, qpair)) { + printf("ERROR: unable to connect I/O qpair.\n"); + spdk_nvme_poll_group_remove(group, qpair); + spdk_nvme_ctrlr_free_io_qpair(qpair); + goto qpair_failed; + } + } + + return 0; + +qpair_failed: + for (; i > 0; --i) { + spdk_nvme_poll_group_remove(ns_ctx->u.nvme.group, ns_ctx->u.nvme.qpair[i - 1]); + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->u.nvme.qpair[i - 1]); + } + + spdk_nvme_poll_group_destroy(ns_ctx->u.nvme.group); +poll_group_failed: + free(ns_ctx->u.nvme.qpair); + return -1; +} + +static void +nvme_cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + int i; + + for (i = 0; i < ns_ctx->u.nvme.num_all_qpairs; i++) { + spdk_nvme_poll_group_remove(ns_ctx->u.nvme.group, ns_ctx->u.nvme.qpair[i]); + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->u.nvme.qpair[i]); + } + + spdk_nvme_poll_group_destroy(ns_ctx->u.nvme.group); + free(ns_ctx->u.nvme.qpair); +} + +static const struct ns_fn_table nvme_fn_table = { + .setup_payload = nvme_setup_payload, + .submit_io = nvme_submit_io, + .check_io = nvme_check_io, + .verify_io = nvme_verify_io, + .init_ns_worker_ctx = nvme_init_ns_worker_ctx, + .cleanup_ns_worker_ctx = nvme_cleanup_ns_worker_ctx, +}; + +static int +build_nvme_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_transport_id *trid; + int res = 0; + + trid = spdk_nvme_ctrlr_get_transport_id(ctrlr); + + switch (trid->trtype) { + case SPDK_NVME_TRANSPORT_PCIE: + res = snprintf(name, length, "PCIE (%s)", trid->traddr); + break; + case SPDK_NVME_TRANSPORT_RDMA: + res = snprintf(name, length, "RDMA (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + case SPDK_NVME_TRANSPORT_TCP: + res = snprintf(name, length, "TCP (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + + default: + fprintf(stderr, "Unknown transport type %d\n", trid->trtype); + break; + } + return res; +} + +static void +build_nvme_ns_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) +{ + int res = 0; + + res = build_nvme_name(name, length, ctrlr); + if (res > 0) { + snprintf(name + res, length - res, " NSID %u", nsid); + } + +} + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + uint32_t max_xfer_size, entries, sector_size; + uint64_t ns_size; + struct spdk_nvme_io_qpair_opts opts; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + g_warn = true; + return; + } + + ns_size = spdk_nvme_ns_get_size(ns); + sector_size = spdk_nvme_ns_get_sector_size(ns); + + if (ns_size < g_io_size_bytes || sector_size > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + ns_size, spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + g_warn = true; + return; + } + + max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + /* NVMe driver may add additional entries based on + * stripe size and maximum transfer size, we assume + * 1 more entry be used for stripe. + */ + entries = (g_io_size_bytes - 1) / max_xfer_size + 2; + if ((g_queue_depth * entries) > opts.io_queue_size) { + printf("controller IO queue size %u less than required\n", + opts.io_queue_size); + printf("Consider using lower queue depth or small IO size because " + "IO requests may be queued at the NVMe driver.\n"); + } + /* For requests which have children requests, parent request itself + * will also occupy 1 entry. + */ + entries += 1; + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->type = ENTRY_TYPE_NVME_NS; + entry->fn_table = &nvme_fn_table; + entry->u.nvme.ctrlr = ctrlr; + entry->u.nvme.ns = ns; + entry->num_io_requests = g_queue_depth * entries; + + entry->size_in_ios = ns_size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / sector_size; + + entry->block_size = spdk_nvme_ns_get_extended_sector_size(ns); + entry->md_size = spdk_nvme_ns_get_md_size(ns); + entry->md_interleave = spdk_nvme_ns_supports_extended_lba(ns); + entry->pi_loc = spdk_nvme_ns_get_data(ns)->dps.md_start; + entry->pi_type = spdk_nvme_ns_get_pi_type(ns); + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + entry->io_flags = g_metacfg_pract_flag | g_metacfg_prchk_flags; + } + + /* If metadata size = 8 bytes, PI is stripped (read) or inserted (write), + * and so reduce metadata size from block size. (If metadata size > 8 bytes, + * PI is passed (read) or replaced (write). So block size is not necessary + * to change.) + */ + if ((entry->io_flags & SPDK_NVME_IO_FLAGS_PRACT) && (entry->md_size == 8)) { + entry->block_size = spdk_nvme_ns_get_sector_size(ns); + } + + if (g_max_io_md_size < entry->md_size) { + g_max_io_md_size = entry->md_size; + } + + if (g_max_io_size_blocks < entry->io_size_blocks) { + g_max_io_size_blocks = entry->io_size_blocks; + } + + build_nvme_ns_name(entry->name, sizeof(entry->name), ctrlr, spdk_nvme_ns_get_id(ns)); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +unregister_namespaces(void) +{ + struct ns_entry *entry = g_namespaces; + + while (entry) { + struct ns_entry *next = entry->next; + free(entry); + entry = next; + } +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_outstanding_commands++; + + while (g_outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry) +{ + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + uint32_t nsid; + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + entry->latency_page = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_rw_latency_page), + 4096, NULL); + if (entry->latency_page == NULL) { + printf("Allocation error (latency page)\n"); + exit(1); + } + + build_nvme_name(entry->name, sizeof(entry->name), ctrlr); + + entry->ctrlr = ctrlr; + entry->trtype = trid_entry->trid.trtype; + entry->next = g_controllers; + g_controllers = entry; + + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + if (trid_entry->nsid == 0) { + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + } else { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid); + if (!ns) { + perror("Namespace does not exist."); + exit(1); + } + + register_ns(ctrlr, ns); + } +} + +static __thread unsigned int seed = 0; + +static inline void +submit_single_io(struct perf_task *task) +{ + uint64_t offset_in_ios; + int rc; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ns_entry *entry = ns_ctx->entry; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + task->submit_tsc = spdk_get_ticks(); + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { + task->is_read = true; + } else { + task->is_read = false; + } + + rc = entry->fn_table->submit_io(task, ns_ctx, entry, offset_in_ios); + + if (spdk_unlikely(rc != 0)) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static inline void +task_complete(struct perf_task *task) +{ + struct ns_worker_ctx *ns_ctx; + uint64_t tsc_diff; + struct ns_entry *entry; + + ns_ctx = task->ns_ctx; + entry = ns_ctx->entry; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + tsc_diff = spdk_get_ticks() - task->submit_tsc; + ns_ctx->total_tsc += tsc_diff; + if (spdk_unlikely(ns_ctx->min_tsc > tsc_diff)) { + ns_ctx->min_tsc = tsc_diff; + } + if (spdk_unlikely(ns_ctx->max_tsc < tsc_diff)) { + ns_ctx->max_tsc = tsc_diff; + } + if (spdk_unlikely(g_latency_sw_tracking_level > 0)) { + spdk_histogram_data_tally(ns_ctx->histogram, tsc_diff); + } + + if (spdk_unlikely(entry->md_size > 0)) { + /* add application level verification for end-to-end data protection */ + entry->fn_table->verify_io(task, entry); + } + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (spdk_unlikely(ns_ctx->is_draining)) { + spdk_dma_free(task->iov.iov_base); + spdk_dma_free(task->md_iov.iov_base); + free(task); + } else { + submit_single_io(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct perf_task *task = ctx; + + if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) { + fprintf(stderr, "%s completed with error (sct=%d, sc=%d)\n", + task->is_read ? "Read" : "Write", + cpl->status.sct, cpl->status.sc); + } + + task_complete(task); +} + +static struct perf_task * +allocate_task(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + fprintf(stderr, "Out of memory allocating tasks\n"); + exit(1); + } + + ns_ctx->entry->fn_table->setup_payload(task, queue_depth % 8 + 1); + + task->ns_ctx = ns_ctx; + + return task; +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = allocate_task(ns_ctx, queue_depth); + submit_single_io(task); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + return ns_ctx->entry->fn_table->init_ns_worker_ctx(ns_ctx); +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->entry->fn_table->cleanup_ns_worker_ctx(ns_ctx); +} + +static void +print_periodic_performance(void) +{ + uint64_t io_this_second; + double mb_this_second; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + if (!isatty(STDOUT_FILENO)) { + /* Don't print periodic stats if output is not going + * to a terminal. + */ + return; + } + + io_this_second = 0; + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + io_this_second += ns_ctx->io_completed - ns_ctx->last_io_completed; + ns_ctx->last_io_completed = ns_ctx->io_completed; + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + mb_this_second = (double)io_this_second * g_io_size_bytes / (1024 * 1024); + printf("%9ju IOPS, %8.2f MiB/s\r", io_this_second, mb_this_second); + fflush(stdout); +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end, tsc_current, tsc_next_print; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + uint32_t unfinished_ns_ctx; + + /* Allocate queue pairs for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_current = spdk_get_ticks(); + tsc_end = tsc_current + g_time_in_sec * g_tsc_rate; + tsc_next_print = tsc_current + g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + ns_ctx->entry->fn_table->check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + tsc_current = spdk_get_ticks(); + + if (worker->lcore == g_master_core && tsc_current > tsc_next_print) { + tsc_next_print += g_tsc_rate; + print_periodic_performance(); + } + + if (tsc_current > tsc_end) { + break; + } + } + + /* drain the io of each ns_ctx in round robin to make the fairness */ + do { + unfinished_ns_ctx = 0; + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + /* first time will enter into this if case */ + if (!ns_ctx->is_draining) { + ns_ctx->is_draining = true; + } + + if (ns_ctx->current_queue_depth > 0) { + ns_ctx->entry->fn_table->check_io(ns_ctx); + if (ns_ctx->current_queue_depth == 0) { + cleanup_ns_worker_ctx(ns_ctx); + } else { + unfinished_ns_ctx++; + } + } + ns_ctx = ns_ctx->next; + } + } while (unfinished_ns_ctx > 0); + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); +#if defined(SPDK_CONFIG_URING) || defined(HAVE_LIBAIO) + printf(" [Kernel device(s)]..."); +#endif + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-o io size in bytes]\n"); + printf("\t[-P number of io queues per namespace. default: 1]\n"); + printf("\t[-U number of unused io queues per controller. default: 0]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-L enable latency tracking via sw, default: disabled]\n"); + printf("\t\t-L for latency summary, -LL for detailed histogram\n"); + printf("\t[-l enable latency tracking via ssd (if supported), default: disabled]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 1)\n"); + printf("\t[-D disable submission queue in controller memory buffer, default: enabled]\n"); + printf("\t[-H enable header digest for TCP transport, default: disabled]\n"); + printf("\t[-I enable data digest for TCP transport, default: disabled]\n"); + printf("\t[-N no shutdown notification process for controllers, default: disabled]\n"); + printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n"); + printf("\t Format: 'key:value [key:value] ...'\n"); + printf("\t Keys:\n"); + printf("\t trtype Transport type (e.g. PCIe, RDMA)\n"); + printf("\t adrfam Address family (e.g. IPv4, IPv6)\n"); + printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n"); + printf("\t trsvcid Transport service identifier (e.g. 4420)\n"); + printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n"); + printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n"); + printf("\t[-e metadata configuration]\n"); + printf("\t Keys:\n"); + printf("\t PRACT Protection Information Action bit (PRACT=1 or PRACT=0)\n"); + printf("\t PRCHK Control of Protection Information Checking (PRCHK=GUARD|REFTAG|APPTAG)\n"); + printf("\t Example: -e 'PRACT=0,PRCHK=GUARD|REFTAG|APPTAG'\n"); + printf("\t -e 'PRACT=1,PRCHK=GUARD'\n"); + printf("\t[-k keep alive timeout period in millisecond]\n"); + printf("\t[-s DPDK huge memory size in MB.]\n"); + printf("\t[-C max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-i shared memory group ID]\n"); + printf("\t"); + spdk_log_usage(stdout, "-T"); +#ifdef SPDK_CONFIG_URING + printf("\t[-R enable using liburing to drive kernel devices (Default: libaio)]\n"); +#endif +#ifdef DEBUG + printf("\t[-G enable debug logging]\n"); +#else + printf("\t[-G enable debug logging (flag disabled, must reconfigure with --enable-debug)\n"); +#endif +} + +static void +check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + double **cutoff = ctx; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far / total; + while (so_far_pct >= **cutoff && **cutoff > 0) { + printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate); + (*cutoff)++; + } +} + +static void +print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far * 100 / total; + printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", + (double)start * 1000 * 1000 / g_tsc_rate, + (double)end * 1000 * 1000 / g_tsc_rate, + so_far_pct, count); +} + +static void +print_performance(void) +{ + uint64_t total_io_completed, total_io_tsc; + double io_per_second, mb_per_second, average_latency, min_latency, max_latency; + double sum_ave_latency, min_latency_so_far, max_latency_so_far; + double total_io_per_second, total_mb_per_second; + int ns_count; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + uint32_t max_strlen; + + total_io_per_second = 0; + total_mb_per_second = 0; + total_io_completed = 0; + total_io_tsc = 0; + min_latency_so_far = (double)UINT64_MAX; + max_latency_so_far = 0; + ns_count = 0; + + max_strlen = 0; + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + max_strlen = spdk_max(strlen(ns_ctx->entry->name), max_strlen); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + printf("========================================================\n"); + printf("%*s\n", max_strlen + 60, "Latency(us)"); + printf("%-*s: %10s %10s %10s %10s %10s\n", + max_strlen + 13, "Device Information", "IOPS", "MiB/s", "Average", "min", "max"); + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + if (ns_ctx->io_completed != 0) { + io_per_second = (double)ns_ctx->io_completed / g_time_in_sec; + mb_per_second = io_per_second * g_io_size_bytes / (1024 * 1024); + average_latency = ((double)ns_ctx->total_tsc / ns_ctx->io_completed) * 1000 * 1000 / g_tsc_rate; + min_latency = (double)ns_ctx->min_tsc * 1000 * 1000 / g_tsc_rate; + if (min_latency < min_latency_so_far) { + min_latency_so_far = min_latency; + } + + max_latency = (double)ns_ctx->max_tsc * 1000 * 1000 / g_tsc_rate; + if (max_latency > max_latency_so_far) { + max_latency_so_far = max_latency; + } + + printf("%-*.*s from core %2u: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + max_strlen, max_strlen, ns_ctx->entry->name, worker->lcore, + io_per_second, mb_per_second, + average_latency, min_latency, max_latency); + total_io_per_second += io_per_second; + total_mb_per_second += mb_per_second; + total_io_completed += ns_ctx->io_completed; + total_io_tsc += ns_ctx->total_tsc; + ns_count++; + } + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (ns_count != 0 && total_io_completed) { + sum_ave_latency = ((double)total_io_tsc / total_io_completed) * 1000 * 1000 / g_tsc_rate; + printf("========================================================\n"); + printf("%-*s: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + max_strlen + 13, "Total", total_io_per_second, total_mb_per_second, + sum_ave_latency, min_latency_so_far, max_latency_so_far); + printf("\n"); + } + + if (g_latency_sw_tracking_level == 0 || total_io_completed == 0) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + const double *cutoff = g_latency_cutoffs; + + printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("=================================================================================\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, check_cutoff, &cutoff); + + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (g_latency_sw_tracking_level == 1) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("==============================================================================\n"); + printf(" Range in us Cumulative IO count\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, print_bucket, NULL); + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page->buckets_32us[i]) { + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, entry->latency_page->buckets_32us[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_1ms[i]) { + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, entry->latency_page->buckets_1ms[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page->buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr->ctrlr, log_page, SPDK_NVME_GLOBAL_NS_TAG, + ctrlr->latency_page, sizeof(struct spdk_nvme_intel_rw_latency_page), 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_latency_ssd_tracking_enable) { + if (g_rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static void +unregister_trids(void) +{ + struct trid_entry *trid_entry, *tmp; + + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) { + TAILQ_REMOVE(&g_trid_list, trid_entry, tailq); + free(trid_entry); + } +} + +static int +add_trid(const char *trid_str) +{ + struct trid_entry *trid_entry; + struct spdk_nvme_transport_id *trid; + char *ns; + + trid_entry = calloc(1, sizeof(*trid_entry)); + if (trid_entry == NULL) { + return -1; + } + + trid = &trid_entry->trid; + trid->trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) { + fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str); + free(trid_entry); + return 1; + } + + spdk_nvme_transport_id_populate_trstring(trid, + spdk_nvme_transport_id_trtype_str(trid->trtype)); + + ns = strcasestr(trid_str, "ns:"); + if (ns) { + char nsid_str[6]; /* 5 digits maximum in an nsid */ + int len; + int nsid; + + ns += 3; + + len = strcspn(ns, " \t\n"); + if (len > 5) { + fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n"); + free(trid_entry); + return 1; + } + + memcpy(nsid_str, ns, len); + nsid_str[len] = '\0'; + + nsid = spdk_strtol(nsid_str, 10); + if (nsid <= 0 || nsid > 65535) { + fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n"); + free(trid_entry); + return 1; + } + + trid_entry->nsid = (uint16_t)nsid; + } + + TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq); + return 0; +} + +static size_t +parse_next_key(const char **str, char *key, char *val, size_t key_buf_size, + size_t val_buf_size) +{ + const char *sep; + const char *separator = ", \t\n"; + size_t key_len, val_len; + + *str += strspn(*str, separator); + + sep = strchr(*str, '='); + if (!sep) { + fprintf(stderr, "Key without '=' separator\n"); + return 0; + } + + key_len = sep - *str; + if (key_len >= key_buf_size) { + fprintf(stderr, "Key length %zu is greater than maximum allowed %zu\n", + key_len, key_buf_size - 1); + return 0; + } + + memcpy(key, *str, key_len); + key[key_len] = '\0'; + + *str += key_len + 1; /* Skip key */ + val_len = strcspn(*str, separator); + if (val_len == 0) { + fprintf(stderr, "Key without value\n"); + return 0; + } + + if (val_len >= val_buf_size) { + fprintf(stderr, "Value length %zu is greater than maximum allowed %zu\n", + val_len, val_buf_size - 1); + return 0; + } + + memcpy(val, *str, val_len); + val[val_len] = '\0'; + + *str += val_len; + + return val_len; +} + +static int +parse_metadata(const char *metacfg_str) +{ + const char *str; + size_t val_len; + char key[32]; + char val[1024]; + + if (metacfg_str == NULL) { + return -EINVAL; + } + + str = metacfg_str; + + while (*str != '\0') { + val_len = parse_next_key(&str, key, val, sizeof(key), sizeof(val)); + if (val_len == 0) { + fprintf(stderr, "Failed to parse metadata\n"); + return -EINVAL; + } + + if (strcmp(key, "PRACT") == 0) { + if (*val == '1') { + g_metacfg_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; + } + } else if (strcmp(key, "PRCHK") == 0) { + if (strstr(val, "GUARD") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(val, "REFTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(val, "APPTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } + } else { + fprintf(stderr, "Unknown key '%s'\n", key); + } + } + + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + int op; + long int val; + int rc; + + while ((op = getopt(argc, argv, "c:e:i:lo:q:r:k:s:t:w:C:DGHILM:NP:RT:U:V")) != -1) { + switch (op) { + case 'i': + case 'C': + case 'P': + case 'o': + case 'q': + case 'k': + case 's': + case 't': + case 'M': + case 'U': + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'i': + g_shm_id = val; + break; + case 'C': + g_max_completions = val; + break; + case 'P': + g_nr_io_queues_per_ns = val; + break; + case 'o': + g_io_size_bytes = val; + break; + case 'q': + g_queue_depth = val; + break; + case 'k': + g_keep_alive_timeout_in_ms = val; + break; + case 's': + g_dpdk_mem = val; + break; + case 't': + g_time_in_sec = val; + break; + case 'M': + g_rw_percentage = val; + g_mix_specified = true; + break; + case 'U': + g_nr_unused_io_queues = val; + break; + } + break; + case 'c': + g_core_mask = optarg; + break; + case 'e': + if (parse_metadata(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'l': + g_latency_ssd_tracking_enable = true; + break; + case 'r': + if (add_trid(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'w': + g_workload_type = optarg; + break; + case 'D': + g_disable_sq_cmb = 1; + break; + case 'G': +#ifndef DEBUG + fprintf(stderr, "%s must be configured with --enable-debug for -G flag\n", + argv[0]); + usage(argv[0]); + return 1; +#else + spdk_log_set_flag("nvme"); + spdk_log_set_print_level(SPDK_LOG_DEBUG); + break; +#endif + case 'H': + g_header_digest = 1; + break; + case 'I': + g_data_digest = 1; + break; + case 'L': + g_latency_sw_tracking_level++; + break; + case 'N': + g_no_shn_notification = true; + break; + case 'R': +#ifndef SPDK_CONFIG_URING + fprintf(stderr, "%s must be rebuilt with CONFIG_URING=y for -R flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + g_use_uring = true; + break; + case 'T': + rc = spdk_log_set_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -T flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + case 'V': + g_vmd = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_nr_io_queues_per_ns) { + usage(argv[0]); + return 1; + } + + if (!g_queue_depth) { + fprintf(stderr, "missing -q (queue size) operand\n"); + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + fprintf(stderr, "missing -o (block size) operand\n"); + usage(argv[0]); + return 1; + } + if (!g_workload_type) { + fprintf(stderr, "missing -w (io pattern type) operand\n"); + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + fprintf(stderr, "missing -t (test time in seconds) operand\n"); + usage(argv[0]); + return 1; + } + + if (strncmp(g_workload_type, "rand", 4) == 0) { + g_is_random = 1; + g_workload_type = &g_workload_type[4]; + } + + if (strcmp(g_workload_type, "read") == 0 || strcmp(g_workload_type, "write") == 0) { + g_rw_percentage = strcmp(g_workload_type, "read") == 0 ? 100 : 0; + if (g_mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } else if (strcmp(g_workload_type, "rw") == 0) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } else { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (TAILQ_EMPTY(&g_trid_list)) { + /* If no transport IDs specified, default to enumerating all local PCIe devices */ + add_trid("trtype:PCIe"); + } else { + struct trid_entry *trid_entry, *trid_entry_tmp; + + g_no_pci = true; + /* check whether there is local PCIe type */ + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) { + if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + g_no_pci = false; + break; + } + } + } + + g_file_optind = optind; + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + + /* Free namespace context and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + spdk_histogram_data_free(ns_ctx->histogram); + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { + if (g_disable_sq_cmb) { + opts->use_cmb_sqs = false; + } + if (g_no_shn_notification) { + opts->no_shn_notification = true; + } + } + + /* Set io_queue_size to UINT16_MAX, NVMe driver + * will then reduce this to MQES to maximize + * the io_queue_size as much as possible. + */ + opts->io_queue_size = UINT16_MAX; + + /* Set the header and data_digest */ + opts->header_digest = g_header_digest; + opts->data_digest = g_data_digest; + opts->keep_alive_timeout_ms = g_keep_alive_timeout_in_ms; + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct trid_entry *trid_entry = cb_ctx; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("Attached to NVMe Controller at %s [%04x:%04x]\n", + trid->traddr, + pci_id.vendor_id, pci_id.device_id); + } + + register_ctrlr(ctrlr, trid_entry); +} + +static int +register_controllers(void) +{ + struct trid_entry *trid_entry; + + printf("Initializing NVMe Controllers\n"); + + if (g_vmd && spdk_vmd_init()) { + fprintf(stderr, "Failed to initialize VMD." + " Some NVMe devices can be unavailable.\n"); + } + + TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) { + if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n", + trid_entry->trid.traddr); + return -1; + } + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + spdk_dma_free(entry->latency_page); + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + + if (g_nr_unused_io_queues) { + int i; + + for (i = 0; i < g_nr_unused_io_queues; i++) { + spdk_nvme_ctrlr_free_io_qpair(entry->unused_qpairs[i]); + } + + free(entry->unused_qpairs); + } + + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } + + if (g_vmd) { + spdk_vmd_fini(); + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = calloc(1, sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->min_tsc = UINT64_MAX; + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + ns_ctx->histogram = spdk_histogram_data_alloc(); + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +static void * +nvme_poll_ctrlrs(void *arg) +{ + struct ctrlr_entry *entry; + int oldstate; + + spdk_unaffinitize_thread(); + + while (true) { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + + entry = g_controllers; + while (entry) { + if (entry->trtype != SPDK_NVME_TRANSPORT_PCIE) { + spdk_nvme_ctrlr_process_admin_completions(entry->ctrlr); + } + entry = entry->next; + } + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +int main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + struct spdk_env_opts opts; + pthread_t thread_id = 0; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "perf"; + opts.shm_id = g_shm_id; + if (g_core_mask) { + opts.core_mask = g_core_mask; + } + + if (g_dpdk_mem) { + opts.mem_size = g_dpdk_mem; + } + if (g_no_pci) { + opts.no_pci = g_no_pci; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + rc = -1; + goto cleanup; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + rc = -1; + goto cleanup; + } + +#if defined(HAVE_LIBAIO) || defined(SPDK_CONFIG_URING) + if (register_files(argc, argv) != 0) { + rc = -1; + goto cleanup; + } +#endif + + if (register_controllers() != 0) { + rc = -1; + goto cleanup; + } + + if (g_warn) { + printf("WARNING: Some requested NVMe devices were skipped\n"); + } + + if (g_num_namespaces == 0) { + fprintf(stderr, "No valid NVMe controllers or AIO or URING devices found\n"); + goto cleanup; + } + + rc = pthread_create(&thread_id, NULL, &nvme_poll_ctrlrs, NULL); + if (rc != 0) { + fprintf(stderr, "Unable to spawn a thread to poll admin queues.\n"); + goto cleanup; + } + + if (associate_workers_with_ns() != 0) { + rc = -1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + g_master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != g_master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + +cleanup: + if (thread_id && pthread_cancel(thread_id) == 0) { + pthread_join(thread_id, NULL); + } + unregister_trids(); + unregister_namespaces(); + unregister_controllers(); + unregister_workers(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/reconnect/.gitignore b/src/spdk/examples/nvme/reconnect/.gitignore new file mode 100644 index 000000000..efe3eada4 --- /dev/null +++ b/src/spdk/examples/nvme/reconnect/.gitignore @@ -0,0 +1 @@ +reconnect diff --git a/src/spdk/examples/nvme/reconnect/Makefile b/src/spdk/examples/nvme/reconnect/Makefile new file mode 100644 index 000000000..880ae76c0 --- /dev/null +++ b/src/spdk/examples/nvme/reconnect/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = reconnect + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/reconnect/reconnect.c b/src/spdk/examples/nvme/reconnect/reconnect.c new file mode 100644 index 000000000..74c5f3657 --- /dev/null +++ b/src/spdk/examples/nvme/reconnect/reconnect.c @@ -0,0 +1,1185 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/nvme.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/log.h" +#include "spdk/likely.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_transport_id failover_trid; + enum spdk_nvme_transport_type trtype; + struct ctrlr_entry *next; + char name[1024]; + int num_resets; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint32_t num_io_requests; + uint64_t size_in_ios; + uint32_t block_size; + uint32_t io_flags; + char name[1024]; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + + int num_qpairs; + struct spdk_nvme_qpair **qpair; + int last_qpair; + + struct ns_worker_ctx *next; +}; + +struct perf_task { + struct ns_worker_ctx *ns_ctx; + struct iovec iov; + bool is_read; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; +}; + +/* For basic reset handling. */ +static int g_max_ctrlr_resets = 15; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static int g_num_namespaces = 0; +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_align = 0x200; +static uint32_t g_io_size_bytes; +static uint32_t g_max_io_size_blocks; +static int g_rw_percentage; +static int g_is_random; +static int g_queue_depth; +static int g_time_in_sec; +static uint32_t g_max_completions; +static int g_dpdk_mem; +static bool g_warn; +static uint32_t g_keep_alive_timeout_in_ms = 0; +static uint8_t g_transport_retry_count = 4; +static uint8_t g_transport_ack_timeout = 0; /* disabled */ + +static const char *g_core_mask; + +struct trid_entry { + struct spdk_nvme_transport_id trid; + struct spdk_nvme_transport_id failover_trid; + TAILQ_ENTRY(trid_entry) tailq; +}; + +static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list); + +static inline void +task_complete(struct perf_task *task); +static void submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth); + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *cpl); + +static void +nvme_setup_payload(struct perf_task *task) +{ + /* maximum extended lba format size from all active namespace, + * it's same with g_io_size_bytes for namespace without metadata. + */ + task->iov.iov_base = spdk_dma_zmalloc(g_io_size_bytes, g_io_align, NULL); + task->iov.iov_len = g_io_size_bytes; + if (task->iov.iov_base == NULL) { + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } +} + +static int +nvme_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx, + struct ns_entry *entry, uint64_t offset_in_ios) +{ + uint64_t lba; + int qp_num; + + lba = offset_in_ios * entry->io_size_blocks; + + qp_num = ns_ctx->last_qpair; + ns_ctx->last_qpair++; + if (ns_ctx->last_qpair == ns_ctx->num_qpairs) { + ns_ctx->last_qpair = 0; + } + + if (task->is_read) { + return spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair[qp_num], + task->iov.iov_base, lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags); + } + + return spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair[qp_num], + task->iov.iov_base, lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags); +} + +static void +nvme_check_io(struct ns_worker_ctx *ns_ctx) +{ + int i, rc; + + for (i = 0; i < ns_ctx->num_qpairs; i++) { + rc = spdk_nvme_qpair_process_completions(ns_ctx->qpair[i], g_max_completions); + /* The transport level qpair is failed and we need to reconnect it. */ + if (spdk_unlikely(rc == -ENXIO)) { + rc = spdk_nvme_ctrlr_reconnect_io_qpair(ns_ctx->qpair[i]); + /* successful reconnect */ + if (rc == 0) { + continue; + } else if (rc == -ENXIO) { + /* This means the controller is failed. Defer to it to restore the qpair. */ + continue; + } else { + /* + * We were unable to restore the qpair on this attempt. We don't + * really know why. For naive handling, just keep trying. + * TODO: add a retry limit, and destroy the qpair after x iterations. + */ + fprintf(stderr, "qpair failed and we were unable to recover it.\n"); + } + } else if (spdk_unlikely(rc < 0)) { + fprintf(stderr, "Received an unknown error processing completions.\n"); + exit(1); + } + } +} + +/* + * TODO: If a controller has multiple namespaces, they could all use the same queue. + * For now, give each namespace/thread combination its own queue. + */ +static int +nvme_init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + struct spdk_nvme_io_qpair_opts opts; + struct ns_entry *entry = ns_ctx->entry; + int i; + + ns_ctx->num_qpairs = 1; + ns_ctx->qpair = calloc(ns_ctx->num_qpairs, sizeof(struct spdk_nvme_qpair *)); + if (!ns_ctx->qpair) { + return -1; + } + + spdk_nvme_ctrlr_get_default_io_qpair_opts(entry->ctrlr, &opts, sizeof(opts)); + if (opts.io_queue_requests < entry->num_io_requests) { + opts.io_queue_requests = entry->num_io_requests; + } + + for (i = 0; i < ns_ctx->num_qpairs; i++) { + ns_ctx->qpair[i] = spdk_nvme_ctrlr_alloc_io_qpair(entry->ctrlr, &opts, + sizeof(opts)); + if (!ns_ctx->qpair[i]) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return -1; + } + } + + return 0; +} + +static void +nvme_cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + int i; + + for (i = 0; i < ns_ctx->num_qpairs; i++) { + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair[i]); + } + + free(ns_ctx->qpair); +} + +static void +build_nvme_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_transport_id *trid; + + trid = spdk_nvme_ctrlr_get_transport_id(ctrlr); + + switch (trid->trtype) { + case SPDK_NVME_TRANSPORT_RDMA: + snprintf(name, length, "RDMA (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + case SPDK_NVME_TRANSPORT_TCP: + snprintf(name, length, "TCP (addr:%s subnqn:%s)", trid->traddr, trid->subnqn); + break; + default: + fprintf(stderr, "Unknown transport type %d\n", trid->trtype); + break; + } +} + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + uint32_t max_xfer_size, entries, sector_size; + uint64_t ns_size; + struct spdk_nvme_io_qpair_opts opts; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + g_warn = true; + return; + } + + ns_size = spdk_nvme_ns_get_size(ns); + sector_size = spdk_nvme_ns_get_sector_size(ns); + + if (ns_size < g_io_size_bytes || sector_size > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + ns_size, spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + g_warn = true; + return; + } + + max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + /* NVMe driver may add additional entries based on + * stripe size and maximum transfer size, we assume + * 1 more entry be used for stripe. + */ + entries = (g_io_size_bytes - 1) / max_xfer_size + 2; + if ((g_queue_depth * entries) > opts.io_queue_size) { + printf("controller IO queue size %u less than required\n", + opts.io_queue_size); + printf("Consider using lower queue depth or small IO size because " + "IO requests may be queued at the NVMe driver.\n"); + g_warn = true; + } + /* For requests which have children requests, parent request itself + * will also occupy 1 entry. + */ + entries += 1; + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->num_io_requests = g_queue_depth * entries; + + entry->size_in_ios = ns_size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / sector_size; + + entry->block_size = spdk_nvme_ns_get_sector_size(ns); + + + if (g_max_io_size_blocks < entry->io_size_blocks) { + g_max_io_size_blocks = entry->io_size_blocks; + } + + build_nvme_name(entry->name, sizeof(entry->name), ctrlr); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +unregister_namespaces(void) +{ + struct ns_entry *entry = g_namespaces; + + while (entry) { + struct ns_entry *next = entry->next; + free(entry); + entry = next; + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry) +{ + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); + const struct spdk_nvme_transport_id *ctrlr_trid; + uint32_t nsid; + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + ctrlr_trid = spdk_nvme_ctrlr_get_transport_id(ctrlr); + assert(ctrlr_trid != NULL); + + /* each controller needs a unique failover trid. */ + entry->failover_trid = trid_entry->failover_trid; + + /* + * Users are allowed to leave the trid subnqn blank or specify a discovery controller subnqn. + * In those cases, the controller subnqn will not equal the trid_entry subnqn and, by association, + * the failover_trid subnqn. + * When we do failover, we want to reconnect to the same nqn so explicitly set the failover nqn to + * the ctrlr nqn here. + */ + snprintf(entry->failover_trid.subnqn, SPDK_NVMF_NQN_MAX_LEN + 1, "%s", ctrlr_trid->subnqn); + + + build_nvme_name(entry->name, sizeof(entry->name), ctrlr); + + entry->ctrlr = ctrlr; + entry->trtype = trid_entry->trid.trtype; + entry->next = g_controllers; + g_controllers = entry; + + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } +} + +static __thread unsigned int seed = 0; + +static inline void +submit_single_io(struct perf_task *task) +{ + uint64_t offset_in_ios; + int rc; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ns_entry *entry = ns_ctx->entry; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { + task->is_read = true; + } else { + task->is_read = false; + } + + rc = nvme_submit_io(task, ns_ctx, entry, offset_in_ios); + + if (spdk_unlikely(rc != 0)) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static inline void +task_complete(struct perf_task *task) +{ + struct ns_worker_ctx *ns_ctx; + + ns_ctx = task->ns_ctx; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (spdk_unlikely(ns_ctx->is_draining)) { + spdk_dma_free(task->iov.iov_base); + free(task); + } else { + submit_single_io(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct perf_task *task = ctx; + + if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) { + fprintf(stderr, "%s completed with error (sct=%d, sc=%d)\n", + task->is_read ? "Read" : "Write", + cpl->status.sct, cpl->status.sc); + } + + task_complete(task); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ + nvme_check_io(ns_ctx); +} + +static struct perf_task * +allocate_task(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + fprintf(stderr, "Out of memory allocating tasks\n"); + exit(1); + } + + nvme_setup_payload(task); + + task->ns_ctx = ns_ctx; + + return task; +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = allocate_task(ns_ctx, queue_depth); + submit_single_io(task); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + uint32_t unfinished_ns_ctx; + + printf("Starting thread on core %u\n", worker->lcore); + + /* Allocate queue pairs for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (nvme_init_ns_worker_ctx(ns_ctx) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + /* drain the io of each ns_ctx in round robin to make the fairness */ + do { + unfinished_ns_ctx = 0; + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + /* first time will enter into this if case */ + if (!ns_ctx->is_draining) { + ns_ctx->is_draining = true; + } + + if (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + if (ns_ctx->current_queue_depth == 0) { + nvme_cleanup_ns_worker_ctx(ns_ctx); + } else { + unfinished_ns_ctx++; + } + } + ns_ctx = ns_ctx->next; + } + } while (unfinished_ns_ctx > 0); + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-o io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 1)\n"); + printf("\t[-r Transport ID for NVMeoF]\n"); + printf("\t Format: 'key:value [key:value] ...'\n"); + printf("\t Keys:\n"); + printf("\t trtype Transport type (e.g. RDMA)\n"); + printf("\t adrfam Address family (e.g. IPv4, IPv6)\n"); + printf("\t traddr Transport address (e.g. 192.168.100.8 for RDMA)\n"); + printf("\t trsvcid Transport service identifier (e.g. 4420)\n"); + printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf("\t alt_traddr (Optional) Alternative Transport address for failover.\n"); + printf("\t Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n"); + printf("\t[-k keep alive timeout period in millisecond]\n"); + printf("\t[-s DPDK huge memory size in MB.]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-i shared memory group ID]\n"); + printf("\t[-A transport ACK timeout]\n"); + printf("\t[-R transport retry count]\n"); + printf("\t"); + spdk_log_usage(stdout, "-T"); +#ifdef DEBUG + printf("\t[-G enable debug logging]\n"); +#else + printf("\t[-G enable debug logging (flag disabled, must reconfigure with --enable-debug)\n"); +#endif +} + +static void +unregister_trids(void) +{ + struct trid_entry *trid_entry, *tmp; + + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) { + TAILQ_REMOVE(&g_trid_list, trid_entry, tailq); + free(trid_entry); + } +} + +static int +add_trid(const char *trid_str) +{ + struct trid_entry *trid_entry; + struct spdk_nvme_transport_id *trid; + char *alt_traddr; + int len; + + trid_entry = calloc(1, sizeof(*trid_entry)); + if (trid_entry == NULL) { + return -1; + } + + trid = &trid_entry->trid; + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) { + fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str); + free(trid_entry); + return 1; + } + + trid_entry->failover_trid = trid_entry->trid; + + alt_traddr = strcasestr(trid_str, "alt_traddr:"); + if (alt_traddr) { + alt_traddr += strlen("alt_traddr:"); + len = strcspn(alt_traddr, " \t\n"); + if (len > SPDK_NVMF_TRADDR_MAX_LEN) { + fprintf(stderr, "The failover traddr %s is too long.\n", alt_traddr); + free(trid_entry); + return -1; + } + snprintf(trid_entry->failover_trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, "%s", alt_traddr); + } + + TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq); + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + struct trid_entry *trid_entry, *trid_entry_tmp; + const char *workload_type; + int op; + bool mix_specified = false; + long int val; + int rc; + + /* default value */ + g_queue_depth = 0; + g_io_size_bytes = 0; + workload_type = NULL; + g_time_in_sec = 0; + g_rw_percentage = -1; + g_core_mask = NULL; + g_max_completions = 0; + + while ((op = getopt(argc, argv, "c:m:o:q:r:k:s:t:w:A:GM:R:T:")) != -1) { + switch (op) { + case 'm': + case 'o': + case 'q': + case 'k': + case 's': + case 't': + case 'A': + case 'M': + case 'R': + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'm': + g_max_completions = val; + break; + case 'o': + g_io_size_bytes = val; + break; + case 'q': + g_queue_depth = val; + break; + case 'k': + g_keep_alive_timeout_in_ms = val; + break; + case 's': + g_dpdk_mem = val; + break; + case 't': + g_time_in_sec = val; + break; + case 'A': + g_transport_ack_timeout = val; + break; + case 'M': + g_rw_percentage = val; + mix_specified = true; + break; + case 'R': + g_transport_retry_count = val; + break; + } + break; + case 'c': + g_core_mask = optarg; + break; + case 'r': + if (add_trid(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'w': + workload_type = optarg; + break; + case 'G': +#ifndef DEBUG + fprintf(stderr, "%s must be configured with --enable-debug for -G flag\n", + argv[0]); + usage(argv[0]); + return 1; +#else + spdk_log_set_flag("nvme"); + spdk_log_set_print_level(SPDK_LOG_DEBUG); + break; +#endif + case 'T': + rc = spdk_log_set_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -T flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_queue_depth) { + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + usage(argv[0]); + return 1; + } + if (!workload_type) { + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_is_random = 0; + } else { + g_is_random = 1; + } + + if (TAILQ_EMPTY(&g_trid_list)) { + fprintf(stderr, "You must specify at least one fabrics TRID.\n"); + return -1; + } + + /* check whether there is local PCIe type and fail. */ + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) { + if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + fprintf(stderr, "This application was not intended to be run on PCIe controllers.\n"); + return 1; + } + } + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + + /* Free namespace context and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* These should have been weeded out earlier. */ + assert(trid->trtype != SPDK_NVME_TRANSPORT_PCIE); + + printf("Attaching to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + + /* Set io_queue_size to UINT16_MAX, NVMe driver + * will then reduce this to MQES to maximize + * the io_queue_size as much as possible. + */ + opts->io_queue_size = UINT16_MAX; + + opts->keep_alive_timeout_ms = spdk_max(opts->keep_alive_timeout_ms, + g_keep_alive_timeout_in_ms); + + opts->transport_retry_count = g_transport_retry_count; + opts->transport_ack_timeout = g_transport_ack_timeout; + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct trid_entry *trid_entry = cb_ctx; + + printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + + register_ctrlr(ctrlr, trid_entry); +} + +static int +register_controllers(void) +{ + struct trid_entry *trid_entry; + + printf("Initializing NVMe Controllers\n"); + + TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) { + if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n", + trid_entry->trid.traddr); + return -1; + } + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = calloc(1, sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +static void * +nvme_poll_ctrlrs(void *arg) +{ + struct ctrlr_entry *entry; + const struct spdk_nvme_transport_id *old_trid; + int oldstate; + int rc; + + + spdk_unaffinitize_thread(); + + while (true) { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + + entry = g_controllers; + while (entry) { + rc = spdk_nvme_ctrlr_process_admin_completions(entry->ctrlr); + /* This controller has encountered a failure at the transport level. reset it. */ + if (rc == -ENXIO) { + if (entry->num_resets == 0) { + old_trid = spdk_nvme_ctrlr_get_transport_id(entry->ctrlr); + fprintf(stderr, "A controller has encountered a failure and is being reset.\n"); + if (spdk_nvme_transport_id_compare(old_trid, &entry->failover_trid)) { + fprintf(stderr, "Resorting to new failover address %s\n", entry->failover_trid.traddr); + spdk_nvme_ctrlr_fail(entry->ctrlr); + rc = spdk_nvme_ctrlr_set_trid(entry->ctrlr, &entry->failover_trid); + if (rc != 0) { + fprintf(stderr, "Unable to fail over to back up trid.\n"); + } + } + } + + rc = spdk_nvme_ctrlr_reset(entry->ctrlr); + if (rc != 0) { + entry->num_resets++; + fprintf(stderr, "Unable to reset the controller.\n"); + + if (entry->num_resets > g_max_ctrlr_resets) { + fprintf(stderr, "Controller cannot be recovered. Exiting.\n"); + exit(1); + } + } else { + fprintf(stderr, "Controller properly reset.\n"); + } + } + entry = entry->next; + } + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +int main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + struct spdk_env_opts opts; + pthread_t thread_id = 0; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "reconnect"; + if (g_core_mask) { + opts.core_mask = g_core_mask; + } + + if (g_dpdk_mem) { + opts.mem_size = g_dpdk_mem; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + rc = 1; + goto cleanup; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + rc = 1; + goto cleanup; + } + + if (register_controllers() != 0) { + rc = 1; + goto cleanup; + } + + if (g_warn) { + printf("WARNING: Some requested NVMe devices were skipped\n"); + } + + if (g_num_namespaces == 0) { + fprintf(stderr, "No valid NVMe controllers found\n"); + goto cleanup; + } + + rc = pthread_create(&thread_id, NULL, &nvme_poll_ctrlrs, NULL); + if (rc != 0) { + fprintf(stderr, "Unable to spawn a thread to poll admin queues.\n"); + goto cleanup; + } + + if (associate_workers_with_ns() != 0) { + rc = 1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + +cleanup: + if (thread_id && pthread_cancel(thread_id) == 0) { + pthread_join(thread_id, NULL); + } + unregister_trids(); + unregister_namespaces(); + unregister_controllers(); + unregister_workers(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + /* + * return a generic error to the caller. This allows us to + * distinguish between a failure in the script and something + * like a segfault or an invalid access which causes the program + * to crash. + */ + rc = 1; + } + + return rc; +} diff --git a/src/spdk/examples/nvmf/Makefile b/src/spdk/examples/nvmf/Makefile new file mode 100644 index 000000000..f34027406 --- /dev/null +++ b/src/spdk/examples/nvmf/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += nvmf + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/nvmf/nvmf/.gitignore b/src/spdk/examples/nvmf/nvmf/.gitignore new file mode 100644 index 000000000..461c83676 --- /dev/null +++ b/src/spdk/examples/nvmf/nvmf/.gitignore @@ -0,0 +1 @@ +nvmf diff --git a/src/spdk/examples/nvmf/nvmf/Makefile b/src/spdk/examples/nvmf/nvmf/Makefile new file mode 100644 index 000000000..ccf5273be --- /dev/null +++ b/src/spdk/examples/nvmf/nvmf/Makefile @@ -0,0 +1,52 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP := nvmf + +C_SRCS := nvmf.c +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += nvmf thread util bdev conf accel rpc jsonrpc json log sock trace notify +SPDK_LIB_LIST += event $(EVENT_BDEV_SUBSYSTEM) + +ifeq ($(CONFIG_FC),y) +ifneq ($(strip $(CONFIG_FC_PATH)),) +SYS_LIBS += -L$(CONFIG_FC_PATH) +endif +SYS_LIBS += -lufc +endif + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/nvmf/nvmf/README.md b/src/spdk/examples/nvmf/nvmf/README.md new file mode 100644 index 000000000..c7432a6b7 --- /dev/null +++ b/src/spdk/examples/nvmf/nvmf/README.md @@ -0,0 +1,31 @@ +# NVMe-oF target without SPDK event framework + +## Overview + +This example is used to show how to use the nvmf lib. In this example we want to encourage user +to use RPC cmd so we would only support RPC style. + +## Usage + +This example's usage is very similar with nvmf_tgt, difference is that you must use the RPC cmd +to setup the nvmf target. + +First, start this example app. You can use the -m to specify how many cores you want to use. +The other parameters you can use -h to show. + ./nvmf -m 0xf -r /var/tmp/spdk.sock + +Then, you need to use the RPC cmd to config the nvmf target. You can use the -h to get how many +RPC cmd you can use. As this example is about nvmf so I think you can focus on the nvmf cmds and +the bdev cmds. + ./scripts/rpc.py -h + +Next, You should use the RPC cmd to setup nvmf target. + ./scripts/rpc.py nvmf_create_transport -t RDMA -g nvmf_example + ./scripts/rpc.py nvmf_create_subsystem -t nvmf_example -s SPDK00000000000001 -a -m 32 nqn.2016-06.io.spdk:cnode1 + ./scripts/rpc.py bdev_malloc_create -b Malloc1 128 512 + ./scripts/rpc.py nvmf_subsystem_add_ns -t nvmf_example nqn.2016-06.io.spdk:cnode1 Malloc1 + ./scripts/rpc.py nvmf_subsystem_add_listener -t rdma -f Ipv4 -a 192.168.0.10 -s 4420 -p nvmf_example nqn.2016-06.io.spdk:cnode1 + +Last, start the initiator to connect the nvmf example target and test the IOs + $ROOT_SPDK/example/nvme/perf/perf -q 64 -o 4095 -w randrw -M 30 -l -t 60 \ + -r "trtype:RDMA adrfam:IPv4 traddr:192.168.0.10 trsvcid:4420 subnqn:nqn.2016-06.io.spdk:cnode1" diff --git a/src/spdk/examples/nvmf/nvmf/nvmf.c b/src/spdk/examples/nvmf/nvmf/nvmf.c new file mode 100644 index 000000000..22e271fb3 --- /dev/null +++ b/src/spdk/examples/nvmf/nvmf/nvmf.c @@ -0,0 +1,905 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/string.h" +#include "spdk/thread.h" +#include "spdk/bdev.h" +#include "spdk/rpc.h" +#include "spdk/nvmf.h" +#include "spdk/likely.h" + +#include "spdk_internal/event.h" + +#define NVMF_DEFAULT_SUBSYSTEMS 32 +#define ACCEPT_TIMEOUT_US 10000 /* 10ms */ + +static const char *g_rpc_addr = SPDK_DEFAULT_RPC_ADDR; +static uint32_t g_acceptor_poll_rate = ACCEPT_TIMEOUT_US; + +enum nvmf_target_state { + NVMF_INIT_SUBSYSTEM = 0, + NVMF_INIT_TARGET, + NVMF_INIT_POLL_GROUPS, + NVMF_INIT_START_SUBSYSTEMS, + NVMF_INIT_START_ACCEPTOR, + NVMF_RUNNING, + NVMF_FINI_STOP_SUBSYSTEMS, + NVMF_FINI_POLL_GROUPS, + NVMF_FINI_STOP_ACCEPTOR, + NVMF_FINI_TARGET, + NVMF_FINI_SUBSYSTEM, +}; + +struct nvmf_lw_thread { + TAILQ_ENTRY(nvmf_lw_thread) link; + bool resched; +}; + +struct nvmf_reactor { + uint32_t core; + + struct spdk_ring *threads; + TAILQ_ENTRY(nvmf_reactor) link; +}; + +struct nvmf_target_poll_group { + struct spdk_nvmf_poll_group *group; + struct spdk_thread *thread; + + TAILQ_ENTRY(nvmf_target_poll_group) link; +}; + +struct nvmf_target { + struct spdk_nvmf_tgt *tgt; + + int max_subsystems; +}; + +TAILQ_HEAD(, nvmf_reactor) g_reactors = TAILQ_HEAD_INITIALIZER(g_reactors); +TAILQ_HEAD(, nvmf_target_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups); +static uint32_t g_num_poll_groups = 0; + +static struct nvmf_reactor *g_master_reactor = NULL; +static struct nvmf_reactor *g_next_reactor = NULL; +static struct spdk_thread *g_init_thread = NULL; +static struct spdk_thread *g_fini_thread = NULL; +static struct nvmf_target g_nvmf_tgt = { + .max_subsystems = NVMF_DEFAULT_SUBSYSTEMS, +}; +static struct spdk_poller *g_acceptor_poller = NULL; +static struct nvmf_target_poll_group *g_next_pg = NULL; +static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; +static bool g_reactors_exit = false; +static enum nvmf_target_state g_target_state; +static bool g_intr_received = false; + +static uint32_t g_migrate_pg_period_us = 0; +static struct spdk_poller *g_migrate_pg_poller = NULL; + +static void nvmf_target_advance_state(void); +static int nvmf_schedule_spdk_thread(struct spdk_thread *thread); + +static void +usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-g period of round robin poll group migration (us) (default: 0 (disabled))]\n"); + printf("\t[-h show this usage]\n"); + printf("\t[-i shared memory ID (optional)]\n"); + printf("\t[-m core mask for DPDK]\n"); + printf("\t[-n max subsystems for target(default: 32)]\n"); + printf("\t[-p acceptor poller rate in us for target(default: 10000us)]\n"); + printf("\t[-r RPC listen address (default /var/tmp/spdk.sock)]\n"); + printf("\t[-s memory size in MB for DPDK (default: 0MB)]\n"); + printf("\t[-u disable PCI access]\n"); +} + +static int +parse_args(int argc, char **argv, struct spdk_env_opts *opts) +{ + int op; + long int value; + + while ((op = getopt(argc, argv, "g:i:m:n:p:r:s:u:h")) != -1) { + switch (op) { + case 'g': + value = spdk_strtol(optarg, 10); + if (value < 0) { + fprintf(stderr, "converting a string to integer failed\n"); + return -EINVAL; + } + g_migrate_pg_period_us = value; + break; + case 'i': + value = spdk_strtol(optarg, 10); + if (value < 0) { + fprintf(stderr, "converting a string to integer failed\n"); + return -EINVAL; + } + opts->shm_id = value; + break; + case 'm': + opts->core_mask = optarg; + break; + case 'n': + g_nvmf_tgt.max_subsystems = spdk_strtol(optarg, 10); + if (g_nvmf_tgt.max_subsystems < 0) { + fprintf(stderr, "converting a string to integer failed\n"); + return -EINVAL; + } + break; + case 'p': + value = spdk_strtol(optarg, 10); + if (value < 0) { + fprintf(stderr, "converting a string to integer failed\n"); + return -EINVAL; + } + g_acceptor_poll_rate = value; + break; + case 'r': + g_rpc_addr = optarg; + break; + case 's': + value = spdk_strtol(optarg, 10); + if (value < 0) { + fprintf(stderr, "converting a string to integer failed\n"); + return -EINVAL; + } + opts->mem_size = value; + break; + case 'u': + opts->no_pci = true; + break; + case 'h': + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +static int +nvmf_reactor_run(void *arg) +{ + struct nvmf_reactor *nvmf_reactor = arg; + struct nvmf_lw_thread *lw_thread; + struct spdk_thread *thread; + + /* run all the lightweight threads in this nvmf_reactor by FIFO. */ + do { + if (spdk_ring_dequeue(nvmf_reactor->threads, (void **)&lw_thread, 1)) { + thread = spdk_thread_get_from_ctx(lw_thread); + + spdk_thread_poll(thread, 0, 0); + + if (spdk_unlikely(spdk_thread_is_exited(thread) && + spdk_thread_is_idle(thread))) { + spdk_thread_destroy(thread); + } else if (spdk_unlikely(lw_thread->resched)) { + lw_thread->resched = false; + nvmf_schedule_spdk_thread(thread); + } else { + spdk_ring_enqueue(nvmf_reactor->threads, (void **)&lw_thread, 1, NULL); + } + } + } while (!g_reactors_exit); + + /* free all the lightweight threads */ + while (spdk_ring_dequeue(nvmf_reactor->threads, (void **)&lw_thread, 1)) { + thread = spdk_thread_get_from_ctx(lw_thread); + spdk_set_thread(thread); + + if (spdk_thread_is_exited(thread)) { + spdk_thread_destroy(thread); + } else { + /* This thread is not exited yet, and may need to communicate with other threads + * to be exited. So mark it as exiting, and check again after traversing other threads. + */ + spdk_thread_exit(thread); + spdk_thread_poll(thread, 0, 0); + spdk_ring_enqueue(nvmf_reactor->threads, (void **)&lw_thread, 1, NULL); + } + } + + return 0; +} + +static int +nvmf_schedule_spdk_thread(struct spdk_thread *thread) +{ + struct nvmf_reactor *nvmf_reactor; + struct nvmf_lw_thread *lw_thread; + struct spdk_cpuset *cpumask; + uint32_t i; + + /* Lightweight threads may have a requested cpumask. + * This is a request only - the scheduler does not have to honor it. + * For this scheduler implementation, each reactor is pinned to + * a particular core so honoring the request is reasonably easy. + */ + cpumask = spdk_thread_get_cpumask(thread); + + lw_thread = spdk_thread_get_ctx(thread); + assert(lw_thread != NULL); + memset(lw_thread, 0, sizeof(*lw_thread)); + + /* assign lightweight threads to nvmf reactor(core) + * Here we use the mutex.The way the actual SPDK event framework + * solves this is by using internal rings for messages between reactors + */ + pthread_mutex_lock(&g_mutex); + for (i = 0; i < spdk_env_get_core_count(); i++) { + if (g_next_reactor == NULL) { + g_next_reactor = TAILQ_FIRST(&g_reactors); + } + nvmf_reactor = g_next_reactor; + g_next_reactor = TAILQ_NEXT(g_next_reactor, link); + + /* each spdk_thread has the core affinity */ + if (spdk_cpuset_get_cpu(cpumask, nvmf_reactor->core)) { + spdk_ring_enqueue(nvmf_reactor->threads, (void **)&lw_thread, 1, NULL); + break; + } + } + pthread_mutex_unlock(&g_mutex); + + if (i == spdk_env_get_core_count()) { + fprintf(stderr, "failed to schedule spdk thread\n"); + return -1; + } + return 0; +} + +static void +nvmf_request_spdk_thread_reschedule(struct spdk_thread *thread) +{ + struct nvmf_lw_thread *lw_thread; + + assert(thread == spdk_get_thread()); + + lw_thread = spdk_thread_get_ctx(thread); + + assert(lw_thread != NULL); + + lw_thread->resched = true; +} + +static int +nvmf_reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op) +{ + switch (op) { + case SPDK_THREAD_OP_NEW: + return nvmf_schedule_spdk_thread(thread); + case SPDK_THREAD_OP_RESCHED: + nvmf_request_spdk_thread_reschedule(thread); + return 0; + default: + return -ENOTSUP; + } +} + +static bool +nvmf_reactor_thread_op_supported(enum spdk_thread_op op) +{ + switch (op) { + case SPDK_THREAD_OP_NEW: + case SPDK_THREAD_OP_RESCHED: + return true; + default: + return false; + } +} + +static int +nvmf_init_threads(void) +{ + int rc; + uint32_t i; + char thread_name[32]; + struct nvmf_reactor *nvmf_reactor; + struct spdk_cpuset cpumask; + uint32_t master_core = spdk_env_get_current_core(); + + /* Whenever SPDK creates a new lightweight thread it will call + * nvmf_schedule_spdk_thread asking for the application to begin + * polling it via spdk_thread_poll(). Each lightweight thread in + * SPDK optionally allocates extra memory to be used by the application + * framework. The size of the extra memory allocated is the second parameter. + */ + spdk_thread_lib_init_ext(nvmf_reactor_thread_op, nvmf_reactor_thread_op_supported, + sizeof(struct nvmf_lw_thread)); + + /* Spawn one system thread per CPU core. The system thread is called a reactor. + * SPDK will spawn lightweight threads that must be mapped to reactors in + * nvmf_schedule_spdk_thread. Using a single system thread per CPU core is a + * choice unique to this application. SPDK itself does not require this specific + * threading model. For example, another viable threading model would be + * dynamically scheduling the lightweight threads onto a thread pool using a + * work queue. + */ + SPDK_ENV_FOREACH_CORE(i) { + nvmf_reactor = calloc(1, sizeof(struct nvmf_reactor)); + if (!nvmf_reactor) { + fprintf(stderr, "failed to alloc nvmf reactor\n"); + rc = -ENOMEM; + goto err_exit; + } + + nvmf_reactor->core = i; + + nvmf_reactor->threads = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 1024, SPDK_ENV_SOCKET_ID_ANY); + if (!nvmf_reactor->threads) { + fprintf(stderr, "failed to alloc ring\n"); + free(nvmf_reactor); + rc = -ENOMEM; + goto err_exit; + } + + TAILQ_INSERT_TAIL(&g_reactors, nvmf_reactor, link); + + if (i == master_core) { + g_master_reactor = nvmf_reactor; + g_next_reactor = g_master_reactor; + } else { + rc = spdk_env_thread_launch_pinned(i, + nvmf_reactor_run, + nvmf_reactor); + if (rc) { + fprintf(stderr, "failed to pin reactor launch\n"); + goto err_exit; + } + } + } + + /* Spawn a lightweight thread only on the current core to manage this application. */ + spdk_cpuset_zero(&cpumask); + spdk_cpuset_set_cpu(&cpumask, master_core, true); + snprintf(thread_name, sizeof(thread_name), "nvmf_master_thread"); + g_init_thread = spdk_thread_create(thread_name, &cpumask); + if (!g_init_thread) { + fprintf(stderr, "failed to create spdk thread\n"); + return -1; + } + + fprintf(stdout, "nvmf threads initlize successfully\n"); + return 0; + +err_exit: + return rc; +} + +static void +nvmf_destroy_threads(void) +{ + struct nvmf_reactor *nvmf_reactor, *tmp; + + TAILQ_FOREACH_SAFE(nvmf_reactor, &g_reactors, link, tmp) { + spdk_ring_free(nvmf_reactor->threads); + free(nvmf_reactor); + } + + pthread_mutex_destroy(&g_mutex); + spdk_thread_lib_fini(); + fprintf(stdout, "nvmf threads destroy successfully\n"); +} + +static void +nvmf_tgt_destroy_done(void *ctx, int status) +{ + fprintf(stdout, "destroyed the nvmf target service\n"); + + g_target_state = NVMF_FINI_SUBSYSTEM; + nvmf_target_advance_state(); +} + +static void +nvmf_destroy_nvmf_tgt(void) +{ + if (g_nvmf_tgt.tgt) { + spdk_nvmf_tgt_destroy(g_nvmf_tgt.tgt, nvmf_tgt_destroy_done, NULL); + } else { + g_target_state = NVMF_FINI_SUBSYSTEM; + } +} + +static void +nvmf_create_nvmf_tgt(void) +{ + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_target_opts tgt_opts; + + tgt_opts.max_subsystems = g_nvmf_tgt.max_subsystems; + snprintf(tgt_opts.name, sizeof(tgt_opts.name), "%s", "nvmf_example"); + /* Construct the default NVMe-oF target + * An NVMe-oF target is a collection of subsystems, namespace, and poll + * groups, and defines the scope of the NVMe-oF discovery service. + */ + g_nvmf_tgt.tgt = spdk_nvmf_tgt_create(&tgt_opts); + if (g_nvmf_tgt.tgt == NULL) { + fprintf(stderr, "spdk_nvmf_tgt_create() failed\n"); + goto error; + } + + /* Create and add discovery subsystem to the NVMe-oF target. + * NVMe-oF defines a discovery mechanism that a host uses to determine + * the NVM subsystems that expose namespaces that the host may access. + * It provides a host with following capabilities: + * 1,The ability to discover a list of NVM subsystems with namespaces + * that are accessible to the host. + * 2,The ability to discover multiple paths to an NVM subsystem. + * 3,The ability to discover controllers that are statically configured. + */ + subsystem = spdk_nvmf_subsystem_create(g_nvmf_tgt.tgt, SPDK_NVMF_DISCOVERY_NQN, + SPDK_NVMF_SUBTYPE_DISCOVERY, 0); + if (subsystem == NULL) { + fprintf(stderr, "failed to create discovery nvmf library subsystem\n"); + goto error; + } + + /* Allow any host to access the discovery subsystem */ + spdk_nvmf_subsystem_set_allow_any_host(subsystem, true); + + fprintf(stdout, "created a nvmf target service\n"); + + g_target_state = NVMF_INIT_POLL_GROUPS; + return; + +error: + g_target_state = NVMF_FINI_TARGET; +} + +static void +nvmf_tgt_subsystem_stop_next(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + subsystem = spdk_nvmf_subsystem_get_next(subsystem); + if (subsystem) { + spdk_nvmf_subsystem_stop(subsystem, + nvmf_tgt_subsystem_stop_next, + cb_arg); + return; + } + + fprintf(stdout, "all subsystems of target stopped\n"); + + g_target_state = NVMF_FINI_POLL_GROUPS; + nvmf_target_advance_state(); +} + +static void +nvmf_tgt_stop_subsystems(struct nvmf_target *nvmf_tgt) +{ + struct spdk_nvmf_subsystem *subsystem; + + subsystem = spdk_nvmf_subsystem_get_first(nvmf_tgt->tgt); + if (spdk_likely(subsystem)) { + spdk_nvmf_subsystem_stop(subsystem, + nvmf_tgt_subsystem_stop_next, + NULL); + } else { + g_target_state = NVMF_FINI_POLL_GROUPS; + } +} + +static int +nvmf_tgt_acceptor_poll(void *arg) +{ + struct nvmf_target *nvmf_tgt = arg; + + spdk_nvmf_tgt_accept(nvmf_tgt->tgt); + + return -1; +} + +static void +nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) +{ + subsystem = spdk_nvmf_subsystem_get_next(subsystem); + if (subsystem) { + spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_start_next, + cb_arg); + return; + } + + fprintf(stdout, "all subsystems of target started\n"); + + g_target_state = NVMF_INIT_START_ACCEPTOR; + nvmf_target_advance_state(); +} + +static void +nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt) +{ + struct spdk_nvmf_subsystem *subsystem; + + /* Subsystem is the NVM subsystem which is a combine of namespaces + * except the discovery subsystem which is used for discovery service. + * It also controls the hosts that means the subsystem determines whether + * the host can access this subsystem. + */ + subsystem = spdk_nvmf_subsystem_get_first(nvmf_tgt->tgt); + if (spdk_likely(subsystem)) { + /* In SPDK there are three states in subsystem: Inactive, Active, Paused. + * Start subsystem means make it from inactive to active that means + * subsystem start to work or it can be accessed. + */ + spdk_nvmf_subsystem_start(subsystem, + nvmf_tgt_subsystem_start_next, + NULL); + } else { + g_target_state = NVMF_INIT_START_ACCEPTOR; + } +} + +static void +nvmf_tgt_create_poll_groups_done(void *ctx) +{ + struct nvmf_target_poll_group *pg = ctx; + + if (!g_next_pg) { + g_next_pg = pg; + } + + TAILQ_INSERT_TAIL(&g_poll_groups, pg, link); + + assert(g_num_poll_groups < spdk_env_get_core_count()); + + if (++g_num_poll_groups == spdk_env_get_core_count()) { + fprintf(stdout, "create targets's poll groups done\n"); + + g_target_state = NVMF_INIT_START_SUBSYSTEMS; + nvmf_target_advance_state(); + } +} + +static void +nvmf_tgt_create_poll_group(void *ctx) +{ + struct nvmf_target_poll_group *pg; + + pg = calloc(1, sizeof(struct nvmf_target_poll_group)); + if (!pg) { + fprintf(stderr, "failed to allocate poll group\n"); + assert(false); + return; + } + + pg->thread = spdk_get_thread(); + pg->group = spdk_nvmf_poll_group_create(g_nvmf_tgt.tgt); + if (!pg->group) { + fprintf(stderr, "failed to create poll group of the target\n"); + free(pg); + assert(false); + return; + } + + spdk_thread_send_msg(g_init_thread, nvmf_tgt_create_poll_groups_done, pg); +} + +/* Create a lightweight thread per poll group instead of assuming a pool of lightweight + * threads already exist at start up time. A poll group is a collection of unrelated NVMe-oF + * connections. Each poll group is only accessed from the associated lightweight thread. + */ +static void +nvmf_poll_groups_create(void) +{ + struct spdk_cpuset tmp_cpumask = {}; + uint32_t i; + char thread_name[32]; + struct spdk_thread *thread; + + assert(g_init_thread != NULL); + + SPDK_ENV_FOREACH_CORE(i) { + spdk_cpuset_zero(&tmp_cpumask); + spdk_cpuset_set_cpu(&tmp_cpumask, i, true); + snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", i); + + thread = spdk_thread_create(thread_name, &tmp_cpumask); + assert(thread != NULL); + + spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL); + } +} + +static void +_nvmf_tgt_destroy_poll_groups_done(void *ctx) +{ + assert(g_num_poll_groups > 0); + + if (--g_num_poll_groups == 0) { + fprintf(stdout, "destroy targets's poll groups done\n"); + + g_target_state = NVMF_FINI_STOP_ACCEPTOR; + nvmf_target_advance_state(); + } +} + +static void +nvmf_tgt_destroy_poll_groups_done(void *cb_arg, int status) +{ + struct nvmf_target_poll_group *pg = cb_arg; + + free(pg); + + spdk_thread_send_msg(g_fini_thread, _nvmf_tgt_destroy_poll_groups_done, NULL); + + spdk_thread_exit(spdk_get_thread()); +} + +static void +nvmf_tgt_destroy_poll_group(void *ctx) +{ + struct nvmf_target_poll_group *pg = ctx; + + spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_groups_done, pg); +} + +static void +nvmf_poll_groups_destroy(void) +{ + struct nvmf_target_poll_group *pg, *tmp; + + g_fini_thread = spdk_get_thread(); + assert(g_fini_thread != NULL); + + TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tmp) { + TAILQ_REMOVE(&g_poll_groups, pg, link); + spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg); + } +} + +static void +nvmf_subsystem_fini_done(void *cb_arg) +{ + fprintf(stdout, "bdev subsystem finish successfully\n"); + spdk_rpc_finish(); + g_reactors_exit = true; +} + +static void +nvmf_subsystem_init_done(int rc, void *cb_arg) +{ + fprintf(stdout, "bdev subsystem init successfully\n"); + spdk_rpc_initialize(g_rpc_addr); + spdk_rpc_set_state(SPDK_RPC_RUNTIME); + + g_target_state = NVMF_INIT_TARGET; + nvmf_target_advance_state(); +} + +static void +migrate_poll_group_by_rr(void *ctx) +{ + uint32_t current_core, next_core; + struct spdk_cpuset cpumask = {}; + + current_core = spdk_env_get_current_core(); + next_core = spdk_env_get_next_core(current_core); + if (next_core == UINT32_MAX) { + next_core = spdk_env_get_first_core(); + } + + spdk_cpuset_set_cpu(&cpumask, next_core, true); + + spdk_thread_set_cpumask(&cpumask); +} + +static int +migrate_poll_groups_by_rr(void *ctx) +{ + struct nvmf_target_poll_group *pg; + + TAILQ_FOREACH(pg, &g_poll_groups, link) { + spdk_thread_send_msg(pg->thread, migrate_poll_group_by_rr, NULL); + } + + return 1; +} + +static void +nvmf_target_advance_state(void) +{ + enum nvmf_target_state prev_state; + + do { + prev_state = g_target_state; + + switch (g_target_state) { + case NVMF_INIT_SUBSYSTEM: + /* initlize the bdev layer */ + spdk_subsystem_init(nvmf_subsystem_init_done, NULL); + return; + case NVMF_INIT_TARGET: + nvmf_create_nvmf_tgt(); + break; + case NVMF_INIT_POLL_GROUPS: + nvmf_poll_groups_create(); + break; + case NVMF_INIT_START_SUBSYSTEMS: + nvmf_tgt_start_subsystems(&g_nvmf_tgt); + break; + case NVMF_INIT_START_ACCEPTOR: + g_acceptor_poller = SPDK_POLLER_REGISTER(nvmf_tgt_acceptor_poll, &g_nvmf_tgt, + g_acceptor_poll_rate); + fprintf(stdout, "Acceptor running\n"); + g_target_state = NVMF_RUNNING; + break; + case NVMF_RUNNING: + fprintf(stdout, "nvmf target is running\n"); + if (g_migrate_pg_period_us != 0) { + g_migrate_pg_poller = SPDK_POLLER_REGISTER(migrate_poll_groups_by_rr, NULL, + g_migrate_pg_period_us); + } + break; + case NVMF_FINI_STOP_SUBSYSTEMS: + spdk_poller_unregister(&g_migrate_pg_poller); + nvmf_tgt_stop_subsystems(&g_nvmf_tgt); + break; + case NVMF_FINI_POLL_GROUPS: + nvmf_poll_groups_destroy(); + break; + case NVMF_FINI_STOP_ACCEPTOR: + spdk_poller_unregister(&g_acceptor_poller); + g_target_state = NVMF_FINI_TARGET; + break; + case NVMF_FINI_TARGET: + nvmf_destroy_nvmf_tgt(); + break; + case NVMF_FINI_SUBSYSTEM: + spdk_subsystem_fini(nvmf_subsystem_fini_done, NULL); + break; + } + } while (g_target_state != prev_state); +} + +static void +nvmf_target_app_start(void *arg) +{ + g_target_state = NVMF_INIT_SUBSYSTEM; + nvmf_target_advance_state(); +} + +static void +_nvmf_shutdown_cb(void *ctx) +{ + /* Still in initialization state, defer shutdown operation */ + if (g_target_state < NVMF_RUNNING) { + spdk_thread_send_msg(spdk_get_thread(), _nvmf_shutdown_cb, NULL); + return; + } else if (g_target_state > NVMF_RUNNING) { + /* Already in Shutdown status, ignore the signal */ + return; + } + + g_target_state = NVMF_FINI_STOP_SUBSYSTEMS; + nvmf_target_advance_state(); +} + +static void +nvmf_shutdown_cb(int signo) +{ + if (!g_intr_received) { + g_intr_received = true; + spdk_thread_send_msg(g_init_thread, _nvmf_shutdown_cb, NULL); + } +} + +static int +nvmf_setup_signal_handlers(void) +{ + struct sigaction sigact; + sigset_t sigmask; + int signals[] = {SIGINT, SIGTERM}; + int num_signals = sizeof(signals) / sizeof(int); + int rc, i; + + rc = sigemptyset(&sigmask); + if (rc) { + fprintf(stderr, "errno:%d--failed to empty signal set\n", errno); + return rc; + } + memset(&sigact, 0, sizeof(sigact)); + rc = sigemptyset(&sigact.sa_mask); + if (rc) { + fprintf(stderr, "errno:%d--failed to empty signal set\n", errno); + return rc; + } + + /* Install the same handler for SIGINT and SIGTERM */ + sigact.sa_handler = nvmf_shutdown_cb; + + for (i = 0; i < num_signals; i++) { + rc = sigaction(signals[i], &sigact, NULL); + if (rc < 0) { + fprintf(stderr, "errno:%d--sigaction() failed\n", errno); + return rc; + } + rc = sigaddset(&sigmask, signals[i]); + if (rc) { + fprintf(stderr, "errno:%d--failed to add set\n", errno); + return rc; + } + } + + pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL); + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "nvmf-example"; + + rc = parse_args(argc, argv, &opts); + if (rc != 0) { + return rc; + } + + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "unable to initialize SPDK env\n"); + return -EINVAL; + } + + /* Initialize the threads */ + rc = nvmf_init_threads(); + assert(rc == 0); + + /* Send a message to the thread assigned to the master reactor + * that continues initialization. This is how we bootstrap the + * program so that all code from here on is running on an SPDK thread. + */ + assert(g_init_thread != NULL); + + rc = nvmf_setup_signal_handlers(); + assert(rc == 0); + + spdk_thread_send_msg(g_init_thread, nvmf_target_app_start, NULL); + + nvmf_reactor_run(g_master_reactor); + + spdk_env_thread_wait_all(); + nvmf_destroy_threads(); + return rc; +} diff --git a/src/spdk/examples/sock/Makefile b/src/spdk/examples/sock/Makefile new file mode 100644 index 000000000..097061fd1 --- /dev/null +++ b/src/spdk/examples/sock/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) + @: + +clean: $(DIRS-y) + @: + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/sock/hello_world/.gitignore b/src/spdk/examples/sock/hello_world/.gitignore new file mode 100644 index 000000000..95ffb143c --- /dev/null +++ b/src/spdk/examples/sock/hello_world/.gitignore @@ -0,0 +1 @@ +hello_sock diff --git a/src/spdk/examples/sock/hello_world/Makefile b/src/spdk/examples/sock/hello_world/Makefile new file mode 100644 index 000000000..e326bf26b --- /dev/null +++ b/src/spdk/examples/sock/hello_world/Makefile @@ -0,0 +1,43 @@ +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = hello_sock + +C_SRCS := hello_sock.c + +SPDK_LIB_LIST = $(SOCK_MODULES_LIST) +SPDK_LIB_LIST += event_net net event thread util conf trace log jsonrpc json rpc sock notify + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/sock/hello_world/hello_sock.c b/src/spdk/examples/sock/hello_world/hello_sock.c new file mode 100644 index 000000000..75ae4e631 --- /dev/null +++ b/src/spdk/examples/sock/hello_world/hello_sock.c @@ -0,0 +1,442 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/string.h" + +#include "spdk/sock.h" +#include "spdk/net.h" + +#define ACCEPT_TIMEOUT_US 1000 +#define CLOSE_TIMEOUT_US 1000000 +#define BUFFER_SIZE 1024 +#define ADDR_STR_LEN INET6_ADDRSTRLEN + +static bool g_is_running; + +static char *g_host; +static char *g_sock_impl_name; +static int g_port; +static bool g_is_server; +static bool g_verbose; + +/* + * We'll use this struct to gather housekeeping hello_context to pass between + * our events and callbacks. + */ +struct hello_context_t { + bool is_server; + char *host; + char *sock_impl_name; + int port; + + bool verbose; + int bytes_in; + int bytes_out; + + struct spdk_sock *sock; + + struct spdk_sock_group *group; + struct spdk_poller *poller_in; + struct spdk_poller *poller_out; + struct spdk_poller *time_out; + + int rc; +}; + +/* + * Usage function for printing parameters that are specific to this application + */ +static void +hello_sock_usage(void) +{ + printf(" -H host_addr host address\n"); + printf(" -P port port number\n"); + printf(" -N sock_impl socket implementation, e.g., -N posix or -N vpp\n"); + printf(" -S start in server mode\n"); + printf(" -V print out additional informations"); +} + +/* + * This function is called to parse the parameters that are specific to this application + */ +static int hello_sock_parse_arg(int ch, char *arg) +{ + switch (ch) { + case 'H': + g_host = arg; + break; + case 'N': + g_sock_impl_name = arg; + break; + case 'P': + g_port = spdk_strtol(arg, 10); + if (g_port < 0) { + fprintf(stderr, "Invalid port ID\n"); + return g_port; + } + break; + case 'S': + g_is_server = 1; + break; + case 'V': + g_verbose = true; + break; + default: + return -EINVAL; + } + return 0; +} + +static int +hello_sock_close_timeout_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + SPDK_NOTICELOG("Connection closed\n"); + + spdk_poller_unregister(&ctx->time_out); + spdk_poller_unregister(&ctx->poller_in); + spdk_sock_close(&ctx->sock); + spdk_sock_group_close(&ctx->group); + + spdk_app_stop(ctx->rc); + return 0; +} + +static int +hello_sock_quit(struct hello_context_t *ctx, int rc) +{ + ctx->rc = rc; + spdk_poller_unregister(&ctx->poller_out); + if (!ctx->time_out) { + ctx->time_out = SPDK_POLLER_REGISTER(hello_sock_close_timeout_poll, ctx, + CLOSE_TIMEOUT_US); + } + return 0; +} + +static int +hello_sock_recv_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc; + char buf_in[BUFFER_SIZE]; + + /* + * Get response + */ + rc = spdk_sock_recv(ctx->sock, buf_in, sizeof(buf_in) - 1); + + if (rc <= 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + return -1; + } + + if (rc > 0) { + ctx->bytes_in += rc; + buf_in[rc] = '\0'; + printf("%s", buf_in); + } + + return 0; +} + +static int +hello_sock_writev_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc = 0; + char buf_out[BUFFER_SIZE]; + struct iovec iov; + ssize_t n; + + n = read(STDIN_FILENO, buf_out, sizeof(buf_out)); + if (n == 0 || !g_is_running) { + /* EOF */ + SPDK_NOTICELOG("Closing connection...\n"); + hello_sock_quit(ctx, 0); + return 0; + } + if (n > 0) { + /* + * Send message to the server + */ + iov.iov_base = buf_out; + iov.iov_len = n; + rc = spdk_sock_writev(ctx->sock, &iov, 1); + if (rc > 0) { + ctx->bytes_out += rc; + } + } + return rc; +} + +static int +hello_sock_connect(struct hello_context_t *ctx) +{ + int rc; + char saddr[ADDR_STR_LEN], caddr[ADDR_STR_LEN]; + uint16_t cport, sport; + + SPDK_NOTICELOG("Connecting to the server on %s:%d with sock_impl(%s)\n", ctx->host, ctx->port, + ctx->sock_impl_name); + + ctx->sock = spdk_sock_connect(ctx->host, ctx->port, ctx->sock_impl_name); + if (ctx->sock == NULL) { + SPDK_ERRLOG("connect error(%d): %s\n", errno, spdk_strerror(errno)); + return -1; + } + + rc = spdk_sock_getaddr(ctx->sock, saddr, sizeof(saddr), &sport, caddr, sizeof(caddr), &cport); + if (rc < 0) { + SPDK_ERRLOG("Cannot get connection addresses\n"); + spdk_sock_close(&ctx->sock); + return -1; + } + + SPDK_NOTICELOG("Connection accepted from (%s, %hu) to (%s, %hu)\n", caddr, cport, saddr, sport); + + fcntl(STDIN_FILENO, F_SETFL, fcntl(STDIN_FILENO, F_GETFL) | O_NONBLOCK); + + g_is_running = true; + ctx->poller_in = SPDK_POLLER_REGISTER(hello_sock_recv_poll, ctx, 0); + ctx->poller_out = SPDK_POLLER_REGISTER(hello_sock_writev_poll, ctx, 0); + + return 0; +} + +static void +hello_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock) +{ + ssize_t n; + char buf[BUFFER_SIZE]; + struct iovec iov; + struct hello_context_t *ctx = arg; + + n = spdk_sock_recv(sock, buf, sizeof(buf)); + if (n < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + return; + } + + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + + if (n > 0) { + ctx->bytes_in += n; + iov.iov_base = buf; + iov.iov_len = n; + n = spdk_sock_writev(sock, &iov, 1); + if (n > 0) { + ctx->bytes_out += n; + } + return; + } + + /* Connection closed */ + SPDK_NOTICELOG("Connection closed\n"); + spdk_sock_group_remove_sock(group, sock); + spdk_sock_close(&sock); +} + +static int +hello_sock_accept_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + struct spdk_sock *sock; + int rc; + int count = 0; + char saddr[ADDR_STR_LEN], caddr[ADDR_STR_LEN]; + uint16_t cport, sport; + + if (!g_is_running) { + hello_sock_quit(ctx, 0); + return 0; + } + + while (1) { + sock = spdk_sock_accept(ctx->sock); + if (sock != NULL) { + rc = spdk_sock_getaddr(sock, saddr, sizeof(saddr), &sport, caddr, sizeof(caddr), &cport); + if (rc < 0) { + SPDK_ERRLOG("Cannot get connection addresses\n"); + spdk_sock_close(&ctx->sock); + return -1; + } + + SPDK_NOTICELOG("Accepting a new connection from (%s, %hu) to (%s, %hu)\n", + caddr, cport, saddr, sport); + + rc = spdk_sock_group_add_sock(ctx->group, sock, + hello_sock_cb, ctx); + + if (rc < 0) { + spdk_sock_close(&sock); + SPDK_ERRLOG("failed\n"); + break; + } + + count++; + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + SPDK_ERRLOG("accept error(%d): %s\n", errno, spdk_strerror(errno)); + } + break; + } + } + + return count; +} + +static int +hello_sock_group_poll(void *arg) +{ + struct hello_context_t *ctx = arg; + int rc; + + rc = spdk_sock_group_poll(ctx->group); + if (rc < 0) { + SPDK_ERRLOG("Failed to poll sock_group=%p\n", ctx->group); + } + + return -1; +} + +static int +hello_sock_listen(struct hello_context_t *ctx) +{ + ctx->sock = spdk_sock_listen(ctx->host, ctx->port, ctx->sock_impl_name); + if (ctx->sock == NULL) { + SPDK_ERRLOG("Cannot create server socket\n"); + return -1; + } + + SPDK_NOTICELOG("Listening connection on %s:%d with sock_impl(%s)\n", ctx->host, ctx->port, + ctx->sock_impl_name); + + /* + * Create sock group for server socket + */ + ctx->group = spdk_sock_group_create(NULL); + + g_is_running = true; + + /* + * Start acceptor and group poller + */ + ctx->poller_in = SPDK_POLLER_REGISTER(hello_sock_accept_poll, ctx, + ACCEPT_TIMEOUT_US); + ctx->poller_out = SPDK_POLLER_REGISTER(hello_sock_group_poll, ctx, 0); + + return 0; +} + +static void +hello_sock_shutdown_cb(void) +{ + g_is_running = false; +} + +/* + * Our initial event that kicks off everything from main(). + */ +static void +hello_start(void *arg1) +{ + struct hello_context_t *ctx = arg1; + int rc; + + SPDK_NOTICELOG("Successfully started the application\n"); + + if (ctx->is_server) { + rc = hello_sock_listen(ctx); + } else { + rc = hello_sock_connect(ctx); + } + + if (rc) { + spdk_app_stop(-1); + return; + } +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc = 0; + struct hello_context_t hello_context = {}; + + /* Set default values in opts structure. */ + spdk_app_opts_init(&opts); + opts.name = "hello_sock"; + opts.shutdown_cb = hello_sock_shutdown_cb; + + if ((rc = spdk_app_parse_args(argc, argv, &opts, "H:N:P:SV", NULL, hello_sock_parse_arg, + hello_sock_usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { + exit(rc); + } + hello_context.is_server = g_is_server; + hello_context.host = g_host; + hello_context.sock_impl_name = g_sock_impl_name; + hello_context.port = g_port; + hello_context.verbose = g_verbose; + + rc = spdk_app_start(&opts, hello_start, &hello_context); + if (rc) { + SPDK_ERRLOG("ERROR starting application\n"); + } + + SPDK_NOTICELOG("Exiting from application\n"); + + if (hello_context.verbose) { + printf("** %d bytes received, %d bytes sent **\n", + hello_context.bytes_in, hello_context.bytes_out); + } + + /* Gracefully close out all of the SPDK subsystems. */ + spdk_app_fini(); + return rc; +} diff --git a/src/spdk/examples/vmd/Makefile b/src/spdk/examples/vmd/Makefile new file mode 100644 index 000000000..ca8fb6980 --- /dev/null +++ b/src/spdk/examples/vmd/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += lsvmd led + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/vmd/led/.gitignore b/src/spdk/examples/vmd/led/.gitignore new file mode 100644 index 000000000..727c8bd27 --- /dev/null +++ b/src/spdk/examples/vmd/led/.gitignore @@ -0,0 +1 @@ +led diff --git a/src/spdk/examples/vmd/led/Makefile b/src/spdk/examples/vmd/led/Makefile new file mode 100644 index 000000000..0fa807bbc --- /dev/null +++ b/src/spdk/examples/vmd/led/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = led + +C_SRCS := led.c + +SPDK_LIB_LIST = vmd log + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/vmd/led/led.c b/src/spdk/examples/vmd/led/led.c new file mode 100644 index 000000000..e014de704 --- /dev/null +++ b/src/spdk/examples/vmd/led/led.c @@ -0,0 +1,214 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/log.h" +#include "spdk/likely.h" +#include "spdk/env.h" +#include "spdk/vmd.h" + +enum app_action { + APP_ACTION_SET, + APP_ACTION_GET, + APP_ACTION_NOP, +}; + +struct app_opts { + const char *app_name; + struct spdk_pci_addr pci_addr; + bool all_devices; + enum app_action action; + enum spdk_vmd_led_state led_state; +}; + +struct app_opts g_opts = { + .all_devices = true, + .action = APP_ACTION_GET, + .led_state = SPDK_VMD_LED_STATE_UNKNOWN, +}; + +static const char *g_led_states[] = { + [SPDK_VMD_LED_STATE_OFF] = "off", + [SPDK_VMD_LED_STATE_IDENTIFY] = "identify", + [SPDK_VMD_LED_STATE_FAULT] = "fault", + [SPDK_VMD_LED_STATE_REBUILD] = "rebuild", + [SPDK_VMD_LED_STATE_UNKNOWN] = "unknown", +}; + +static void +usage(void) +{ + printf("Usage: %s [-d] [-s STATE] [-r TRADDR]\n", g_opts.app_name); + printf("\n"); + printf("Options:\n"); + printf(" -d enables debug logs from the VMD module\n"); + printf(" -s STATE sets the state of the LEDs. Available states are:\n"); + printf(" off, identify, fault, rebuild\n"); + printf(" -r TRADDR uses device identified by TRADDR\n"); + printf(" -h shows this help\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int led_state; + int op; + + g_opts.app_name = argv[0]; + + while ((op = getopt(argc, argv, "dhr:s:")) != -1) { + switch (op) { + case 'r': + if (spdk_pci_addr_parse(&g_opts.pci_addr, optarg)) { + fprintf(stderr, "Unable to parse PCI address: %s\n", optarg); + return -EINVAL; + } + + g_opts.all_devices = false; + break; + + case 'd': +#ifdef DEBUG + spdk_log_set_print_level(SPDK_LOG_DEBUG); + spdk_log_set_flag("vmd"); + break; +#else + fprintf(stderr, "%s must be rebuilt with --enable-debug for the -d flag\n", + argv[0]); + return -EINVAL; +#endif + case 's': + for (led_state = SPDK_VMD_LED_STATE_OFF; + led_state <= SPDK_VMD_LED_STATE_REBUILD; + led_state++) { + if (strcmp(optarg, g_led_states[led_state]) == 0) { + g_opts.led_state = (enum spdk_vmd_led_state)led_state; + break; + } + } + + if (g_opts.led_state == SPDK_VMD_LED_STATE_UNKNOWN) { + fprintf(stderr, "Invalid LED state\n"); + return -EINVAL; + } + + g_opts.action = APP_ACTION_SET; + break; + + case 'h': + g_opts.action = APP_ACTION_NOP; + usage(); + break; + + default: + return -EINVAL; + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + struct spdk_env_opts opts; + struct spdk_pci_device *pci_device; + enum spdk_vmd_led_state led_state; + char addr_buf[128]; + int rc, status = 0; + + if (parse_args(argc, argv) != 0) { + usage(); + return 1; + } + + if (g_opts.action == APP_ACTION_NOP) { + return 0; + } + + spdk_env_opts_init(&opts); + opts.name = "led"; + + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK environment\n"); + return 1; + } + + rc = spdk_vmd_init(); + if (rc) { + fprintf(stderr, "Unable to initialize VMD subsystem\n"); + return 1; + } + + for (pci_device = spdk_pci_get_first_device(); pci_device != NULL; + pci_device = spdk_pci_get_next_device(pci_device)) { + if (strcmp(spdk_pci_device_get_type(pci_device), "vmd") != 0) { + continue; + } + + if (!g_opts.all_devices && + spdk_pci_addr_compare(&g_opts.pci_addr, &pci_device->addr) != 0) { + continue; + } + + rc = spdk_pci_addr_fmt(addr_buf, sizeof(addr_buf), &pci_device->addr); + if (rc != 0) { + fprintf(stderr, "Failed to format VMD's PCI address\n"); + status = 1; + break; + } + + if (g_opts.action == APP_ACTION_GET) { + rc = spdk_vmd_get_led_state(pci_device, &led_state); + if (spdk_unlikely(rc != 0)) { + fprintf(stderr, "Failed to retrieve the state of the LED on %s\n", + addr_buf); + status = 1; + break; + } + + printf("%s: %s\n", addr_buf, g_led_states[led_state]); + } else { + rc = spdk_vmd_set_led_state(pci_device, g_opts.led_state); + if (spdk_unlikely(rc != 0)) { + fprintf(stderr, "Failed to set LED state on %s\n", addr_buf); + status = 1; + break; + } + } + } + + spdk_vmd_fini(); + + return status; +} diff --git a/src/spdk/examples/vmd/lsvmd/.gitignore b/src/spdk/examples/vmd/lsvmd/.gitignore new file mode 100644 index 000000000..3956616f7 --- /dev/null +++ b/src/spdk/examples/vmd/lsvmd/.gitignore @@ -0,0 +1 @@ +lsvmd diff --git a/src/spdk/examples/vmd/lsvmd/Makefile b/src/spdk/examples/vmd/lsvmd/Makefile new file mode 100644 index 000000000..96fde0773 --- /dev/null +++ b/src/spdk/examples/vmd/lsvmd/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = lsvmd + +C_SRCS := lsvmd.c + +SPDK_LIB_LIST = vmd log + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/examples/vmd/lsvmd/lsvmd.c b/src/spdk/examples/vmd/lsvmd/lsvmd.c new file mode 100644 index 000000000..b083bc049 --- /dev/null +++ b/src/spdk/examples/vmd/lsvmd/lsvmd.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk/vmd.h" + +struct spdk_pci_addr g_probe_addr; + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "r:d")) != -1) { + switch (op) { + case 'r': + if (spdk_pci_addr_parse(&g_probe_addr, optarg)) { + SPDK_ERRLOG("Error parsing PCI address\n"); + return 1; + } + + break; + + case 'd': + spdk_log_set_print_level(SPDK_LOG_DEBUG); + spdk_log_set_flag("vmd"); + break; + + default: + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + struct spdk_env_opts opts; + struct spdk_pci_device *pci_device; + char addr_buf[128]; + int rc; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "lsvmd"; + + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + return 1; + } + + rc = spdk_vmd_init(); + if (rc) { + SPDK_ERRLOG("No VMD Controllers found\n"); + } + + for (pci_device = spdk_pci_get_first_device(); pci_device != NULL; + pci_device = spdk_pci_get_next_device(pci_device)) { + if (strcmp(spdk_pci_device_get_type(pci_device), "vmd") == 0) { + rc = spdk_pci_addr_fmt(addr_buf, sizeof(addr_buf), &pci_device->addr); + if (rc != 0) { + fprintf(stderr, "Failed to format VMD's PCI address\n"); + continue; + } + + printf("%s\n", addr_buf); + } + } + + spdk_vmd_fini(); + + return rc; +} |