diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/examples/nvme | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/examples/nvme')
32 files changed, 8752 insertions, 0 deletions
diff --git a/src/spdk/examples/nvme/Makefile b/src/spdk/examples/nvme/Makefile new file mode 100644 index 00000000..f7066626 --- /dev/null +++ b/src/spdk/examples/nvme/Makefile @@ -0,0 +1,47 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y += hello_world identify perf reserve nvme_manage arbitration \ + hotplug cmb_copy + +DIRS-$(CONFIG_FIO_PLUGIN) += fio_plugin + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/examples/nvme/arbitration/.gitignore b/src/spdk/examples/nvme/arbitration/.gitignore new file mode 100644 index 00000000..f1d6e38d --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/.gitignore @@ -0,0 +1 @@ +arbitration diff --git a/src/spdk/examples/nvme/arbitration/Makefile b/src/spdk/examples/nvme/arbitration/Makefile new file mode 100644 index 00000000..3affeb80 --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = arbitration + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/arbitration/arbitration.c b/src/spdk/examples/nvme/arbitration/arbitration.c new file mode 100644 index 00000000..8065b1ba --- /dev/null +++ b/src/spdk/examples/nvme/arbitration/arbitration.c @@ -0,0 +1,1167 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_intel_rw_latency_page latency_page; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint64_t size_in_ios; + char name[1024]; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + struct spdk_nvme_qpair *qpair; + struct ns_worker_ctx *next; +}; + +struct arb_task { + struct ns_worker_ctx *ns_ctx; + void *buf; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; + enum spdk_nvme_qprio qprio; +}; + +struct arb_context { + int shm_id; + int outstanding_commands; + int num_namespaces; + int num_workers; + int rw_percentage; + int is_random; + int queue_depth; + int time_in_sec; + int io_count; + uint8_t latency_tracking_enable; + uint8_t arbitration_mechanism; + uint8_t arbitration_config; + uint32_t io_size_bytes; + uint32_t max_completions; + uint64_t tsc_rate; + const char *core_mask; + const char *workload_type; +}; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct spdk_mempool *task_pool = NULL; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static struct worker_thread *g_workers = NULL; + +static struct feature features[256]; + +static struct arb_context g_arbitration = { + .shm_id = -1, + .outstanding_commands = 0, + .num_workers = 0, + .num_namespaces = 0, + .rw_percentage = 50, + .queue_depth = 64, + .time_in_sec = 60, + .io_count = 100000, + .latency_tracking_enable = 0, + .arbitration_mechanism = SPDK_NVME_CC_AMS_RR, + .arbitration_config = 0, + .io_size_bytes = 131072, + .max_completions = 0, + /* Default 4 cores for urgent/high/medium/low */ + .core_mask = "0xf", + .workload_type = "randrw", +}; + +/* + * For weighted round robin arbitration mechanism, the smaller value between + * weight and burst will be picked to execute the commands in one queue. + */ +#define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32 +#define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16 +#define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8 +#define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */ + +/* + * Description of dword for priority weight and arbitration burst + * ------------------------------------------------------------------------------ + * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00 + * ------------------------------------------------------------------------------ + * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst + * ------------------------------------------------------------------------------ + * + * The priority weights are zero based value. + */ +#define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24 +#define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16 +#define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8 +#define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF +#define SPDK_NVME_ARB_BURST_MASK 0x7 + +#define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1) + +static void task_complete(struct arb_task *task); + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr); + +static const char *print_qprio(enum spdk_nvme_qprio); + + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes || + spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), + g_arbitration.io_size_bytes); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->nvme.ctrlr = ctrlr; + entry->nvme.ns = ns; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes; + entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_arbitration.num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_arbitration.outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + int nsid, num_ns; + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry)); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + if ((g_arbitration.latency_tracking_enable != 0) && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { + get_arb_feature(ctrlr); + + if (g_arbitration.arbitration_config != 0) { + set_arb_feature(ctrlr); + get_arb_feature(ctrlr); + } + } +} + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct ns_worker_ctx *ns_ctx) +{ + struct arb_task *task = NULL; + uint64_t offset_in_ios; + int rc; + struct ns_entry *entry = ns_ctx->entry; + + task = spdk_mempool_get(task_pool); + if (!task) { + fprintf(stderr, "Failed to get task from task_pool\n"); + exit(1); + } + + task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL); + if (!task->buf) { + spdk_mempool_put(task_pool, task); + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + + task->ns_ctx = ns_ctx; + + if (g_arbitration.is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + if ((g_arbitration.rw_percentage == 100) || + (g_arbitration.rw_percentage != 0 && + ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) { + rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } else { + rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } + + ns_ctx->current_queue_depth++; +} + +static void +task_complete(struct arb_task *task) +{ + struct ns_worker_ctx *ns_ctx; + + ns_ctx = task->ns_ctx; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + + spdk_dma_free(task->buf); + spdk_mempool_put(task_pool, task); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!ns_ctx->is_draining) { + submit_single_io(ns_ctx); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct arb_task *)ctx); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions); +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + while (queue_depth-- > 0) { + submit_single_io(ns_ctx); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio) +{ + struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr; + struct spdk_nvme_io_qpair_opts opts; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + opts.qprio = qprio; + + ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); + if (!ns_ctx->qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return 1; + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); +} + +static void +cleanup(uint32_t task_count) +{ + struct ns_entry *entry = g_namespaces; + struct ns_entry *next_entry = NULL; + struct worker_thread *worker = g_workers; + struct worker_thread *next_worker = NULL; + + while (entry) { + next_entry = entry->next; + free(entry); + entry = next_entry; + }; + + while (worker) { + next_worker = worker->next; + free(worker->ns_ctx); + free(worker); + worker = next_worker; + }; + + if (spdk_mempool_count(task_pool) != (size_t)task_count) { + fprintf(stderr, "task_pool count is %zu but should be %u\n", + spdk_mempool_count(task_pool), task_count); + } + spdk_mempool_free(task_pool); +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + + printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio)); + + /* Allocate a queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_arbitration.queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + cleanup_ns_worker_ctx(ns_ctx); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-l enable latency tracking, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 0xf - 4 cores)]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-a arbitration mechanism, must be one of below]\n"); + printf("\t\t(0, 1, 2)]\n"); + printf("\t\t(0: default round robin mechanism)]\n"); + printf("\t\t(1: weighted round robin mechanism)]\n"); + printf("\t\t(2: vendor specific mechanism)]\n"); + printf("\t[-b enable arbitration user configuration, default: disabled]\n"); + printf("\t\t(0 - disabled; 1 - enabled)\n"); + printf("\t[-n subjected IOs for performance comparison]\n"); + printf("\t[-i shared memory group ID]\n"); +} + +static const char * +print_qprio(enum spdk_nvme_qprio qprio) +{ + switch (qprio) { + case SPDK_NVME_QPRIO_URGENT: + return "urgent priority queue"; + case SPDK_NVME_QPRIO_HIGH: + return "high priority queue"; + case SPDK_NVME_QPRIO_MEDIUM: + return "medium priority queue"; + case SPDK_NVME_QPRIO_LOW: + return "low priority queue"; + default: + return "invalid priority queue"; + } +} + + +static void +print_configuration(char *program_name) +{ + printf("%s run with configuration:\n", program_name); + printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n", + program_name, + g_arbitration.queue_depth, + g_arbitration.io_size_bytes, + g_arbitration.workload_type, + g_arbitration.rw_percentage, + g_arbitration.latency_tracking_enable, + g_arbitration.time_in_sec, + g_arbitration.core_mask, + g_arbitration.max_completions, + g_arbitration.arbitration_mechanism, + g_arbitration.arbitration_config, + g_arbitration.io_count, + g_arbitration.shm_id); +} + + +static void +print_performance(void) +{ + float io_per_second, sent_all_io_in_secs; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec; + sent_all_io_in_secs = g_arbitration.io_count / io_per_second; + printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n", + ns_ctx->entry->name, worker->lcore, + io_per_second, sent_all_io_in_secs, g_arbitration.io_count); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + printf("========================================================\n"); + + printf("\n"); +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page.buckets_32us[i]) + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, + entry->latency_page.buckets_32us[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_1ms[i]) + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, + entry->latency_page.buckets_1ms[i]); + } + for (i = 0; i < 31; i++) { + if (entry->latency_page.buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page.buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page( + ctrlr->ctrlr, log_page, + SPDK_NVME_GLOBAL_NS_TAG, + &ctrlr->latency_page, + sizeof(struct spdk_nvme_intel_rw_latency_page), + 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_arbitration.outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", + ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_arbitration.outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_arbitration.latency_tracking_enable) { + if (g_arbitration.rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_arbitration.rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type = NULL; + int op = 0; + bool mix_specified = false; + + while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) { + switch (op) { + case 'c': + g_arbitration.core_mask = optarg; + break; + case 'i': + g_arbitration.shm_id = atoi(optarg); + break; + case 'l': + g_arbitration.latency_tracking_enable = atoi(optarg); + break; + case 'm': + g_arbitration.max_completions = atoi(optarg); + break; + case 'q': + g_arbitration.queue_depth = atoi(optarg); + break; + case 's': + g_arbitration.io_size_bytes = atoi(optarg); + break; + case 't': + g_arbitration.time_in_sec = atoi(optarg); + break; + case 'w': + g_arbitration.workload_type = optarg; + break; + case 'M': + g_arbitration.rw_percentage = atoi(optarg); + mix_specified = true; + break; + case 'a': + g_arbitration.arbitration_mechanism = atoi(optarg); + break; + case 'b': + g_arbitration.arbitration_config = atoi(optarg); + break; + case 'n': + g_arbitration.io_count = atoi(optarg); + break; + case 'h': + default: + usage(argv[0]); + return 1; + } + } + + workload_type = g_arbitration.workload_type; + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_arbitration.rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_arbitration.rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_arbitration.is_random = 0; + } else { + g_arbitration.is_random = 1; + } + + if (g_arbitration.latency_tracking_enable != 0 && + g_arbitration.latency_tracking_enable != 1) { + fprintf(stderr, + "-l must be specified to value 0 or 1.\n"); + return 1; + } + + switch (g_arbitration.arbitration_mechanism) { + case SPDK_NVME_CC_AMS_RR: + case SPDK_NVME_CC_AMS_WRR: + case SPDK_NVME_CC_AMS_VS: + break; + default: + fprintf(stderr, + "-a must be specified to value 0, 1, or 7.\n"); + return 1; + } + + if (g_arbitration.arbitration_config != 0 && + g_arbitration.arbitration_config != 1) { + fprintf(stderr, + "-b must be specified to value 0 or 1.\n"); + return 1; + } else if (g_arbitration.arbitration_config == 1 && + g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) { + fprintf(stderr, + "-a must be specified to 1 (WRR) together.\n"); + return 1; + } + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT; + + g_workers = NULL; + g_arbitration.num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_arbitration.num_workers++; + + if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) { + qprio++; + } + + worker->qprio = qprio % SPDK_NVME_QPRIO_MAX; + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* Update with user specified arbitration configuration */ + opts->arb_mechanism = g_arbitration.arbitration_mechanism; + + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attached to %s\n", trid->traddr); + + /* Update with actual arbitration configuration in use */ + g_arbitration.arbitration_mechanism = opts->arb_mechanism; + + register_ctrlr(ctrlr); +} + +static int +register_controllers(void) +{ + printf("Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_arbitration.num_namespaces == 0) { + fprintf(stderr, "No valid namespaces to continue IO testing\n"); + return 1; + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + if (g_arbitration.latency_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return 1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + + g_arbitration.outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10 = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]); +} + +static void +get_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION); + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; + unsigned ab, lpw, mpw, hpw; + + ab = arb & SPDK_NVME_ARB_BURST_MASK; + lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1; + + printf("Current Arbitration Configuration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (ab == SPDK_NVME_ARB_BURST_MASK) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << ab); + } + + printf("Low Priority Weight: %u\n", lpw); + printf("Medium Priority Weight: %u\n", mpw); + printf("High Priority Weight: %u\n", hpw); + printf("\n"); + } +} + +static void +set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("set_feature(0x%02X) failed\n", fid); + feature->valid = false; + } else { + printf("Set Arbitration Feature Successfully\n"); + } + + g_arbitration.outstanding_commands--; +} + +static int +set_arb_feature(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + struct spdk_nvme_cmd cmd = {}; + uint32_t arb = 0; + unsigned ab, lpw, mpw, hpw; + + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION; + + g_arbitration.outstanding_commands = 0; + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK; + hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT; + mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT; + lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT; + arb = hpw | mpw | lpw | ab; + cmd.cdw11 = arb; + } + + ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, + set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]); + if (ret) { + printf("Set Arbitration Feature: Failed 0x%x\n", ret); + return 1; + } + + g_arbitration.outstanding_commands++; + + while (g_arbitration.outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) { + printf("Set Arbitration Feature failed and use default configuration\n"); + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + char task_pool_name[30]; + uint32_t task_count; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "arb"; + opts.core_mask = g_arbitration.core_mask; + opts.shm_id = g_arbitration.shm_id; + if (spdk_env_init(&opts) < 0) { + return 1; + } + + g_arbitration.tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + return 1; + } + + if (register_controllers() != 0) { + return 1; + } + + if (associate_workers_with_ns() != 0) { + return 1; + } + + snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid()); + + /* + * The task_count will be dynamically calculated based on the + * number of attached active namespaces, queue depth and number + * of cores (workers) involved in the IO perations. + */ + task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ? + g_arbitration.num_namespaces : g_arbitration.num_workers; + task_count *= g_arbitration.queue_depth; + + task_pool = spdk_mempool_create(task_pool_name, task_count, + sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY); + if (task_pool == NULL) { + fprintf(stderr, "could not initialize task pool\n"); + return 1; + } + + print_configuration(argv[0]); + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + + unregister_controllers(); + + cleanup(task_count); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/cmb_copy/.gitignore b/src/spdk/examples/nvme/cmb_copy/.gitignore new file mode 100644 index 00000000..fce73803 --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/.gitignore @@ -0,0 +1 @@ +cmb_copy diff --git a/src/spdk/examples/nvme/cmb_copy/Makefile b/src/spdk/examples/nvme/cmb_copy/Makefile new file mode 100644 index 00000000..86c18143 --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Eideticom Inc +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Eideticom Inc nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = cmb_copy + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/cmb_copy/cmb_copy.c b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c new file mode 100644 index 00000000..223133ca --- /dev/null +++ b/src/spdk/examples/nvme/cmb_copy/cmb_copy.c @@ -0,0 +1,394 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Eideticom Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Eideticom Inc, nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/nvme.h" + +#define CMB_COPY_DELIM "-" +#define CMB_COPY_READ 0 +#define CMB_COPY_WRITE 1 + +struct nvme_io { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_transport_id trid; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + unsigned nsid; + unsigned slba; + unsigned nlbas; + uint32_t lba_size; + unsigned done; +}; + +struct cmb_t { + struct spdk_nvme_transport_id trid; + struct spdk_nvme_ctrlr *ctrlr; +}; + +struct config { + struct nvme_io read; + struct nvme_io write; + struct cmb_t cmb; + size_t copy_size; +}; + +static struct config g_config; + +/* Namespaces index from 1. Return 0 to invoke an error */ +static unsigned get_nsid(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return g_config.read.nsid; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return g_config.write.nsid; + } + return 0; +} + +static int get_rw(const struct spdk_nvme_transport_id *trid) +{ + if (!strcmp(trid->traddr, g_config.read.trid.traddr)) { + return CMB_COPY_READ; + } + if (!strcmp(trid->traddr, g_config.write.trid.traddr)) { + return CMB_COPY_WRITE; + } + return -1; +} + +static void +check_io(void *arg, const struct spdk_nvme_cpl *completion) +{ + int *rw = (unsigned *)arg; + + if (*rw == CMB_COPY_READ) { + g_config.read.done = 1; + } else { + g_config.write.done = 1; + } +} + +static int +cmb_copy(void) +{ + int rc = 0, rw; + void *buf; + + /* Allocate QPs for the read and write controllers */ + g_config.read.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.read.ctrlr, NULL, 0); + g_config.write.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_config.write.ctrlr, NULL, 0); + if (g_config.read.qpair == NULL || g_config.read.qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return -ENOMEM; + } + + /* Allocate a buffer from our CMB */ + buf = spdk_nvme_ctrlr_alloc_cmb_io_buffer(g_config.cmb.ctrlr, g_config.copy_size); + if (buf == NULL) { + printf("ERROR: buffer allocation failed\n"); + printf("Are you sure %s has a valid CMB?\n", + g_config.cmb.trid.traddr); + return -ENOMEM; + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + rw = CMB_COPY_READ; + /* Do the read to the CMB IO buffer */ + rc = spdk_nvme_ns_cmd_read(g_config.read.ns, g_config.read.qpair, buf, + g_config.read.slba, g_config.read.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + return -EIO; + } + while (!g_config.read.done) { + spdk_nvme_qpair_process_completions(g_config.read.qpair, 0); + } + + /* Do the write from the CMB IO buffer */ + rw = CMB_COPY_WRITE; + rc = spdk_nvme_ns_cmd_write(g_config.write.ns, g_config.write.qpair, buf, + g_config.write.slba, g_config.write.nlbas, + check_io, &rw, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + return -EIO; + } + while (!g_config.write.done) { + spdk_nvme_qpair_process_completions(g_config.write.qpair, 0); + } + + /* Clear the done flags */ + g_config.read.done = 0; + g_config.write.done = 0; + + /* Free CMB buffer */ + spdk_nvme_ctrlr_free_cmb_io_buffer(g_config.cmb.ctrlr, buf, + g_config.copy_size); + + /* Free the queues */ + spdk_nvme_ctrlr_free_io_qpair(g_config.read.qpair); + spdk_nvme_ctrlr_free_io_qpair(g_config.write.qpair); + + return rc; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* We will only attach to the read or write controller */ + if (strcmp(trid->traddr, g_config.read.trid.traddr) && + strcmp(trid->traddr, g_config.write.trid.traddr)) { + printf("%s - not probed %s!\n", __func__, trid->traddr); + return 0; + } + + printf("%s - probed %s!\n", __func__, trid->traddr); + return 1; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, get_nsid(trid)); + if (ns == NULL) { + fprintf(stderr, "Could not locate namespace %d on controller %s.\n", + get_nsid(trid), trid->traddr); + exit(-1); + } + if (get_rw(trid) == CMB_COPY_READ) { + g_config.read.ctrlr = ctrlr; + g_config.read.ns = ns; + g_config.read.lba_size = spdk_nvme_ns_get_sector_size(ns); + } else { + g_config.write.ctrlr = ctrlr; + g_config.write.ns = ns; + g_config.write.lba_size = spdk_nvme_ns_get_sector_size(ns); + } + printf("%s - attached %s!\n", __func__, trid->traddr); + + return; +} + +static void +usage(char *program_name) +{ + printf("%s options (all mandatory)", program_name); + printf("\n"); + printf("\t[-r NVMe read parameters]\n"); + printf("\t[-w NVMe write parameters]\n"); + printf("\t[-c CMB to use for data buffers]\n"); + printf("\n"); + printf("Read/Write params:\n"); + printf(" <pci id>-<namespace>-<start LBA>-<number of LBAs>\n"); +} + +static void +parse(char *in, struct nvme_io *io) +{ + char *tok = NULL; + + tok = strtok(in, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + snprintf(&io->trid.traddr[0], SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->nsid = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->slba = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok == NULL) { + goto err; + } + io->nlbas = atoi(tok); + + tok = strtok(NULL, CMB_COPY_DELIM); + if (tok != NULL) { + goto err; + } + return; + +err: + fprintf(stderr, "%s: error parsing %s\n", __func__, in); + exit(-1); + +} + +static int +parse_args(int argc, char **argv) +{ + int op; + unsigned read = 0, write = 0, cmb = 0; + + while ((op = getopt(argc, argv, "r:w:c:")) != -1) { + switch (op) { + case 'r': + parse(optarg, &g_config.read); + read = 1; + break; + case 'w': + parse(optarg, &g_config.write); + write = 1; + break; + case 'c': + snprintf(g_config.cmb.trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1, + "%s", optarg); + cmb = 1; + break; + default: + usage(argv[0]); + return 1; + } + } + + if ((!read || !write || !cmb)) { + usage(argv[0]); + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc = 0; + struct spdk_env_opts opts; + + /* + * Parse the input arguments. For now we use the following + * format list: + * + * <pci id>-<namespace>-<start LBA>-<number of LBAs> + * + */ + rc = parse_args(argc, argv); + if (rc) { + fprintf(stderr, "Error in parse_args(): %d\n", + rc); + return -1; + } + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "cmb_copy"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + /* + * CMBs only apply to PCIe attached NVMe controllers so we + * only probe the PCIe bus. This is the default when we pass + * in NULL for the first argument. + */ + + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc) { + fprintf(stderr, "Error in spdk_nvme_probe(): %d\n", + rc); + return -1; + } + + /* + * For now enforce that the read and write controller are not + * the same. This avoids an internal only DMA. + */ + if (!strcmp(g_config.write.trid.traddr, g_config.read.trid.traddr)) { + fprintf(stderr, "Read and Write controllers must differ!\n"); + return -1; + } + + /* + * Perform a few sanity checks and set the buffer size for the + * CMB. + */ + if (g_config.read.nlbas * g_config.read.lba_size != + g_config.write.nlbas * g_config.write.lba_size) { + fprintf(stderr, "Read and write sizes do not match!\n"); + return -1; + } + g_config.copy_size = g_config.read.nlbas * g_config.read.lba_size; + + /* + * Get the ctrlr pointer for the CMB. For now we assume this + * is either the read or write NVMe controller though in + * theory that is not a necessary condition. + */ + + if (!strcmp(g_config.cmb.trid.traddr, g_config.read.trid.traddr)) { + g_config.cmb.ctrlr = g_config.read.ctrlr; + } + if (!strcmp(g_config.cmb.trid.traddr, g_config.write.trid.traddr)) { + g_config.cmb.ctrlr = g_config.write.ctrlr; + } + + /* + * Call the cmb_copy() function which performs the CMB + * based copy or returns an error code if it fails. + */ + rc = cmb_copy(); + if (rc) { + fprintf(stderr, "Error in spdk_cmb_copy(): %d\n", + rc); + return -1; + } + + return rc; +} diff --git a/src/spdk/examples/nvme/fio_plugin/.gitignore b/src/spdk/examples/nvme/fio_plugin/.gitignore new file mode 100644 index 00000000..1b0b36ac --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/src/spdk/examples/nvme/fio_plugin/Makefile b/src/spdk/examples/nvme/fio_plugin/Makefile new file mode 100644 index 00000000..dfd7917d --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. + +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP := fio_plugin + +CFLAGS += -I$(CONFIG_FIO_SOURCE_DIR) +LDFLAGS += -shared -rdynamic + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/fio_plugin/README.md b/src/spdk/examples/nvme/fio_plugin/README.md new file mode 100644 index 00000000..2c533282 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/README.md @@ -0,0 +1,97 @@ +# Compiling fio + +First, clone the fio source repository from https://github.com/axboe/fio + + git clone https://github.com/axboe/fio + +Then check out the fio 3.3: + + cd fio && git checkout fio-3.3 + +Finally, compile the code: + + make + +# Compiling SPDK + +First, clone the SPDK source repository from https://github.com/spdk/spdk + + git clone https://github.com/spdk/spdk + git submodule update --init + +Then, run the SPDK configure script to enable fio (point it to the root of the fio repository): + + cd spdk + ./configure --with-fio=/path/to/fio/repo <other configuration options> + +Finally, build SPDK: + + make + +**Note to advanced users**: These steps assume you're using the DPDK submodule. If you are using your +own version of DPDK, the fio plugin requires that DPDK be compiled with -fPIC. You can compile DPDK +with -fPIC by modifying your DPDK configuration file and adding the line: + + EXTRA_CFLAGS=-fPIC + +# Usage + +To use the SPDK fio plugin with fio, specify the plugin binary using LD_PRELOAD when running +fio and set ioengine=spdk in the fio configuration file (see example_config.fio in the same +directory as this README). + + LD_PRELOAD=<path to spdk repo>/examples/nvme/fio_plugin/fio_plugin fio + +To select NVMe devices, you pass an SPDK Transport Identifier string as the filename. These are in the +form: + + filename=key=value [key=value] ... ns=value + +Specifically, for local PCIe NVMe devices it will look like this: + + filename=trtype=PCIe traddr=0000.04.00.0 ns=1 + +And remote devices accessed via NVMe over Fabrics will look like this: + + filename=trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1 + + +**Note**: The specification of the PCIe address should not use the normal ':' +and instead only use '.'. This is a limitation in fio - it splits filenames on +':'. Also, the NVMe namespaces start at 1, not 0, and the namespace must be +specified at the end of the string. + +Currently the SPDK fio plugin is limited to the thread usage model, so fio jobs must also specify thread=1 +when using the SPDK fio plugin. + +fio also currently has a race condition on shutdown if dynamically loading the ioengine by specifying the +engine's full path via the ioengine parameter - LD_PRELOAD is recommended to avoid this race condition. + +When testing random workloads, it is recommended to set norandommap=1. fio's random map +processing consumes extra CPU cycles which will degrade performance over time with +the fio_plugin since all I/O are submitted and completed on a single CPU core. + +When testing FIO on multiple NVMe SSDs with SPDK plugin, it is recommended to use multiple jobs in FIO configurion. +It has been observed that there are some performance gap between FIO(with SPDK plugin enabled) and SPDK perf +(examples/nvme/perf/perf) on testing multiple NVMe SSDs. If you use one job(i.e., use one CPU core) configured for +FIO test, the performance is worse than SPDK perf (also using one CPU core) against many NVMe SSDs. But if you use +multiple jobs for FIO test, the performance of FIO is similiar with SPDK perf. After analyzing this phenomenon, we +think that is caused by the FIO architecture. Mainly FIO can scale with multiple threads (i.e., using CPU cores), +but it is not good to use one thread against many I/O devices. + +# End-to-end Data Protection (Optional) + +Running with PI setting, following settings steps are required. +First, format device namespace with proper PI setting. For example: + + nvme format /dev/nvme0n1 -l 1 -i 1 -p 0 -m 1 + +In fio configure file, add PRACT and set PRCHK by flags(GUARD|REFTAG|APPTAG) properly. For example: + + pi_act=0 + pi_chk=GUARD + +Blocksize should be set as the sum of data and metadata. For example, if data blocksize is 512 Byte, host generated +PI metadata is 8 Byte, then blocksize in fio configure file should be 520 Byte: + + bs=520 diff --git a/src/spdk/examples/nvme/fio_plugin/example_config.fio b/src/spdk/examples/nvme/fio_plugin/example_config.fio new file mode 100644 index 00000000..a8e62ccb --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/example_config.fio @@ -0,0 +1,15 @@ +[global] +ioengine=spdk +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=2 +iodepth=128 +rw=randrw +bs=4k + +[test] +numjobs=1 diff --git a/src/spdk/examples/nvme/fio_plugin/fio_plugin.c b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c new file mode 100644 index 00000000..7785c399 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/fio_plugin.c @@ -0,0 +1,943 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/endian.h" +#include "spdk/crc16.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +#define NVME_IO_ALIGN 4096 +#define FIO_NVME_PI_APPTAG 0x1234 + +static bool spdk_env_initialized; +static int spdk_enable_sgl = 0; +static uint32_t spdk_pract_flag; +static uint32_t spdk_prchk_flags; + +struct spdk_fio_options { + void *pad; /* off1 used in option descriptions may not be 0 */ + int mem_size; + int shm_id; + int enable_sgl; + char *hostnqn; + int pi_act; + char *pi_chk; +}; + +struct spdk_fio_request { + struct io_u *io; + /** Offset in current iovec, fio only uses 1 vector */ + uint32_t iov_offset; + + /** Application tag and its mask for NVMe PI */ + uint16_t appmask; + uint16_t apptag; + + struct spdk_fio_thread *fio_thread; +}; + +struct spdk_fio_ctrlr { + struct spdk_nvme_transport_id tr_id; + struct spdk_nvme_ctrlr_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_fio_ctrlr *next; +}; + +static struct spdk_fio_ctrlr *ctrlr_g; +static int td_count; +static pthread_t g_ctrlr_thread_id = 0; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static bool g_error; + +struct spdk_fio_qpair { + struct fio_file *f; + struct spdk_nvme_qpair *qpair; + struct spdk_nvme_ns *ns; + uint32_t io_flags; + bool do_nvme_pi; + struct spdk_fio_qpair *next; + struct spdk_fio_ctrlr *fio_ctrlr; +}; + +struct spdk_fio_thread { + struct thread_data *td; + + struct spdk_fio_qpair *fio_qpair; + struct spdk_fio_qpair *fio_qpair_current; // the current fio_qpair to be handled. + + struct io_u **iocq; // io completion queue + unsigned int iocq_count; // number of iocq entries filled by last getevents + unsigned int iocq_size; // number of iocq entries allocated + struct fio_file *current_f; // fio_file given by user + +}; + +static void * +spdk_fio_poll_ctrlrs(void *arg) +{ + struct spdk_fio_ctrlr *fio_ctrlr; + int oldstate; + int rc; + + /* Loop until the thread is cancelled */ + while (true) { + rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + pthread_mutex_lock(&mutex); + fio_ctrlr = ctrlr_g; + + while (fio_ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); + fio_ctrlr = fio_ctrlr->next; + } + + pthread_mutex_unlock(&mutex); + + rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); + if (rc != 0) { + SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", + rc, spdk_strerror(rc)); + } + + /* This is a pthread cancellation point and cannot be removed. */ + sleep(1); + } + + return NULL; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_options *fio_options = td->eo; + + if (fio_options->hostnqn) { + snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); + } + + return true; +} + +static struct spdk_fio_ctrlr * +get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) +{ + struct spdk_fio_ctrlr *fio_ctrlr = ctrlr_g; + while (fio_ctrlr) { + if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { + return fio_ctrlr; + } + + fio_ctrlr = fio_ctrlr->next; + } + + return NULL; +} + +static bool +fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair) +{ + struct spdk_nvme_ns *ns = NULL; + const struct spdk_nvme_ns_data *nsdata; + + ns = fio_qpair->ns; + nsdata = spdk_nvme_ns_get_data(ns); + + if (!spdk_nvme_ns_supports_extended_lba(ns)) { + return false; + } + + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return false; + } + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (nsdata->dps.md_start) { + return false; + } + + /* Controller performs PI setup and check */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return false; + } + + /* Type3 don't support REFTAG */ + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { + return false; + } + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct thread_data *td = cb_ctx; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_ctrlr *fio_ctrlr; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns; + struct fio_file *f = fio_thread->current_f; + uint32_t ns_id; + char *p; + + p = strstr(f->file_name, "ns="); + assert(p != NULL); + ns_id = atoi(p + 3); + if (!ns_id) { + SPDK_ERRLOG("namespace id should be >=1, but current value=0\n"); + g_error = true; + return; + } + + fio_ctrlr = get_fio_ctrlr(trid); + /* it is a new ctrlr and needs to be added */ + if (!fio_ctrlr) { + /* Create an fio_ctrlr and add it to the list */ + fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); + if (!fio_ctrlr) { + SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); + g_error = true; + return; + } + fio_ctrlr->opts = *opts; + fio_ctrlr->ctrlr = ctrlr; + fio_ctrlr->tr_id = *trid; + fio_ctrlr->next = ctrlr_g; + ctrlr_g = fio_ctrlr; + } + + ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); + if (ns == NULL) { + SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + + if (!spdk_nvme_ns_is_active(ns)) { + SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); + g_error = true; + return; + } + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if ((fio_qpair->f == f) || + ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && + (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { + /* Not the error case. Avoid duplicated connection */ + return; + } + fio_qpair = fio_qpair->next; + } + + /* create a new qpair */ + fio_qpair = calloc(1, sizeof(*fio_qpair)); + if (!fio_qpair) { + g_error = true; + SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); + return; + } + + fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, NULL, 0); + if (!fio_qpair->qpair) { + SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); + g_error = true; + free(fio_qpair); + return; + } + + fio_qpair->ns = ns; + fio_qpair->f = f; + fio_qpair->fio_ctrlr = fio_ctrlr; + fio_qpair->next = fio_thread->fio_qpair; + fio_thread->fio_qpair = fio_qpair; + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + fio_qpair->io_flags = spdk_pract_flag | spdk_prchk_flags; + } + + fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); + + f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); + if (f->real_file_size <= 0) { + g_error = true; + SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); + return; + } + + f->filetype = FIO_TYPE_BLOCK; + fio_file_set_size_known(f); +} + +static void parse_prchk_flags(const char *prchk_str) +{ + if (!prchk_str) { + return; + } + + if (strstr(prchk_str, "GUARD") != NULL) { + spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(prchk_str, "REFTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(prchk_str, "APPTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } +} + +/* Called once at initialization. This is responsible for gathering the size of + * each "file", which in our case are in the form + * 'key=value [key=value] ... ns=value' + * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' + * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ +static int spdk_fio_setup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread; + struct spdk_fio_options *fio_options = td->eo; + struct spdk_env_opts opts; + struct fio_file *f; + char *p; + int rc = 0; + struct spdk_nvme_transport_id trid; + struct spdk_fio_ctrlr *fio_ctrlr; + char *trid_info; + unsigned int i; + + if (!td->o.use_thread) { + log_err("spdk: must set thread=1 when using spdk plugin\n"); + return 1; + } + + pthread_mutex_lock(&mutex); + + fio_thread = calloc(1, sizeof(*fio_thread)); + assert(fio_thread != NULL); + + td->io_ops_data = fio_thread; + fio_thread->td = td; + + fio_thread->iocq_size = td->o.iodepth; + fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + if (!spdk_env_initialized) { + spdk_env_opts_init(&opts); + opts.name = "fio"; + opts.mem_size = fio_options->mem_size; + opts.shm_id = fio_options->shm_id; + spdk_enable_sgl = fio_options->enable_sgl; + spdk_pract_flag = fio_options->pi_act; + parse_prchk_flags(fio_options->pi_chk); + if (spdk_env_init(&opts) < 0) { + SPDK_ERRLOG("Unable to initialize SPDK env\n"); + free(fio_thread->iocq); + free(fio_thread); + fio_thread = NULL; + pthread_mutex_unlock(&mutex); + return 1; + } + spdk_env_initialized = true; + spdk_unaffinitize_thread(); + + /* Spawn a thread to continue polling the controllers */ + rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); + if (rc != 0) { + SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); + } + } + + for_each_file(td, f, i) { + memset(&trid, 0, sizeof(trid)); + + trid.trtype = SPDK_NVME_TRANSPORT_PCIE; + + p = strstr(f->file_name, " ns="); + if (p == NULL) { + SPDK_ERRLOG("Failed to find namespace 'ns=X'\n"); + continue; + } + + trid_info = strndup(f->file_name, p - f->file_name); + if (!trid_info) { + SPDK_ERRLOG("Failed to allocate space for trid_info\n"); + continue; + } + + rc = spdk_nvme_transport_id_parse(&trid, trid_info); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); + free(trid_info); + continue; + } + free(trid_info); + + if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_pci_addr pci_addr; + if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { + SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); + continue; + } + spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); + } else { + if (trid.subnqn[0] == '\0') { + snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", + SPDK_NVMF_DISCOVERY_NQN); + } + } + + fio_thread->current_f = f; + + fio_ctrlr = get_fio_ctrlr(&trid); + if (fio_ctrlr) { + attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); + } else { + /* Enumerate all of the controllers */ + if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) { + SPDK_ERRLOG("spdk_nvme_probe() failed\n"); + continue; + } + } + + if (g_error) { + log_err("Failed to initialize spdk fio plugin\n"); + rc = 1; + break; + } + } + + td_count++; + + pthread_mutex_unlock(&mutex); + + return rc; +} + +static int spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL); + return td->orig_buffer == NULL; +} + +static void spdk_fio_iomem_free(struct thread_data *td) +{ + spdk_dma_free(td->orig_buffer); +} + +static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + fio_req->io = io_u; + fio_req->fio_thread = fio_thread; + + io_u->engine_data = fio_req; + + return 0; +} + +static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static void +fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_nvme_protection_info *pi; + uint16_t crc16; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + fio_req->appmask = 0xffff; + fio_req->apptag = FIO_NVME_PI_APPTAG; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + memset(pi, 0, sizeof(*pi)); + + if (io_u->ddir == DDIR_WRITE) { + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&pi->guard, crc16); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + to_be16(&pi->app_tag, FIO_NVME_PI_APPTAG); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&pi->ref_tag, (uint32_t)lba + i); + } + } + } +} + +static void +fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_nvme_protection_info *pi; + uint16_t crc16, guard, app_tag; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, ref_tag, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include last 8 bytes of PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&guard, crc16); + if (pi->guard != guard) { + fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, guard, pi->guard); + } + + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Previously we used the number of lbas as + * application tag for writes + */ + to_be16(&app_tag, FIO_NVME_PI_APPTAG); + if (pi->app_tag != app_tag) { + fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, app_tag, pi->app_tag); + } + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&ref_tag, (uint32_t)lba + i); + if (pi->ref_tag != ref_tag) { + fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%08x but returned with 0x%08x," + " may read the LBA without write it first\n", + lba + i, ref_tag, pi->ref_tag); + } + } + } +} + +static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_fio_request *fio_req = ctx; + struct spdk_fio_thread *fio_thread = fio_req->fio_thread; + + if (fio_thread->fio_qpair->do_nvme_pi) { + fio_extended_lba_verify_pi(fio_thread->fio_qpair, fio_req->io); + } + + assert(fio_thread->iocq_count < fio_thread->iocq_size); + fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; +} + +static void +spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + + fio_req->iov_offset = sgl_offset; +} + +static int +spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) +{ + struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; + struct io_u *io_u = fio_req->io; + + *address = io_u->buf; + *length = io_u->xfer_buflen; + + if (fio_req->iov_offset) { + assert(fio_req->iov_offset <= io_u->xfer_buflen); + *address += fio_req->iov_offset; + *length -= fio_req->iov_offset; + } + + return 0; +} + +#if FIO_IOOPS_VERSION >= 24 +typedef enum fio_q_status fio_q_status_t; +#else +typedef int fio_q_status_t; +#endif + +static fio_q_status_t +spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_qpair *fio_qpair; + struct spdk_nvme_ns *ns = NULL; + uint32_t block_size; + uint64_t lba; + uint32_t lba_count; + + /* Find the namespace that corresponds to the file in the io_u */ + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + if (fio_qpair->f == io_u->file) { + ns = fio_qpair->ns; + break; + } + fio_qpair = fio_qpair->next; + } + if (fio_qpair == NULL || ns == NULL) { + return -ENXIO; + } + + block_size = spdk_nvme_ns_get_extended_sector_size(ns); + + lba = io_u->offset / block_size; + lba_count = io_u->xfer_buflen / block_size; + + // TODO: considering situations that fio will randomize and verify io_u + if (fio_qpair->do_nvme_pi) { + fio_extended_lba_setup_pi(fio_qpair, io_u); + } + + switch (io_u->ddir) { + case DDIR_READ: + if (!spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); + } else { + rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); + } + break; + case DDIR_WRITE: + if (!spdk_enable_sgl) { + rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); + } else { + rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); + } + break; + default: + assert(false); + break; + } + + /* NVMe read/write functions return -ENOMEM if there are no free requests. */ + if (rc == -ENOMEM) { + return FIO_Q_BUSY; + } + + if (rc != 0) { + return -abs(rc); + } + + return FIO_Q_QUEUED; +} + +static struct io_u *spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + + assert(event >= 0); + assert((unsigned)event < fio_thread->iocq_count); + return fio_thread->iocq[event]; +} + +static int spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair = NULL; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * 1000000000L + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->iocq_count = 0; + + /* fetch the next qpair */ + if (fio_thread->fio_qpair_current) { + fio_qpair = fio_thread->fio_qpair_current->next; + } + + for (;;) { + if (fio_qpair == NULL) { + fio_qpair = fio_thread->fio_qpair; + } + + while (fio_qpair != NULL) { + spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); + + if (fio_thread->iocq_count >= min) { + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; + } + + fio_qpair = fio_qpair->next; + } + + if (t) { + uint64_t elapse; + + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + /* reset the currrent handling qpair */ + fio_thread->fio_qpair_current = fio_qpair; + return fio_thread->iocq_count; +} + +static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static void spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops_data; + struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; + + fio_qpair = fio_thread->fio_qpair; + while (fio_qpair != NULL) { + spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); + fio_qpair_tmp = fio_qpair->next; + free(fio_qpair); + fio_qpair = fio_qpair_tmp; + } + + free(fio_thread); + + pthread_mutex_lock(&mutex); + td_count--; + if (td_count == 0) { + struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; + + fio_ctrlr = ctrlr_g; + while (fio_ctrlr != NULL) { + spdk_nvme_detach(fio_ctrlr->ctrlr); + fio_ctrlr_tmp = fio_ctrlr->next; + free(fio_ctrlr); + fio_ctrlr = fio_ctrlr_tmp; + } + ctrlr_g = NULL; + } + pthread_mutex_unlock(&mutex); + if (!ctrlr_g) { + if (pthread_cancel(g_ctrlr_thread_id) == 0) { + pthread_join(g_ctrlr_thread_id, NULL); + } + } +} + +/* This function enables addition of SPDK parameters to the fio config + * Adding new parameters by defining them here and defining a callback + * function to read the parameter value. */ +static struct fio_option options[] = { + { + .name = "mem_size_mb", + .lname = "Memory size in MB", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, mem_size), + .def = "512", + .help = "Memory Size for SPDK (MB)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "shm_id", + .lname = "shared memory ID", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, shm_id), + .def = "-1", + .help = "Shared Memory ID", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "enable_sgl", + .lname = "SGL used for I/O commands", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, enable_sgl), + .def = "0", + .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "hostnqn", + .lname = "Host NQN to use when connecting to controllers.", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, hostnqn), + .help = "Host NQN", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_act", + .lname = "Protection Information Action", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, pi_act), + .def = "1", + .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_chk", + .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, pi_chk), + .def = NULL, + .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = NULL, + }, +}; + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk", + .version = FIO_IOOPS_VERSION, + .queue = spdk_fio_queue, + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .setup = spdk_fio_setup, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, + .options = options, + .option_struct_size = sizeof(struct spdk_fio_options), +}; + +static void fio_init fio_spdk_register(void) +{ + register_ioengine(&ioengine); +} + +static void fio_exit fio_spdk_unregister(void) +{ + unregister_ioengine(&ioengine); +} diff --git a/src/spdk/examples/nvme/fio_plugin/full_bench.fio b/src/spdk/examples/nvme/fio_plugin/full_bench.fio new file mode 100644 index 00000000..4dea21d1 --- /dev/null +++ b/src/spdk/examples/nvme/fio_plugin/full_bench.fio @@ -0,0 +1,40 @@ +[global] +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +disable_slat=1 +disable_bw=1 +lat_percentiles=1 +clat_percentiles=0 +percentile_list=50:99:99.999 + +[precondition-sequential] +stonewall +description="Sequentially write to the device twice" +rw=write +iodepth=128 +bs=128k +loops=2 + +[4k_randwrite_qd1] +stonewall +description="4KiB Random Write QD=1" +bs=4k +rw=randwrite +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 + +[4k_randread_qd1] +stonewall +description="4KiB Random Read QD=1" +bs=4k +rw=randread +iodepth=1 +time_based=1 +ramp_time=60 +runtime=240 diff --git a/src/spdk/examples/nvme/hello_world/.gitignore b/src/spdk/examples/nvme/hello_world/.gitignore new file mode 100644 index 00000000..242c034c --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/.gitignore @@ -0,0 +1 @@ +hello_world diff --git a/src/spdk/examples/nvme/hello_world/Makefile b/src/spdk/examples/nvme/hello_world/Makefile new file mode 100644 index 00000000..890d761a --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = hello_world + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hello_world/hello_world.c b/src/spdk/examples/nvme/hello_world/hello_world.c new file mode 100644 index 00000000..34913073 --- /dev/null +++ b/src/spdk/examples/nvme/hello_world/hello_world.c @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct ns_entry *next; + struct spdk_nvme_qpair *qpair; +}; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + /* + * spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe + * controller. During initialization, the IDENTIFY data for the + * controller is read using an NVMe admin command, and that data + * can be retrieved using spdk_nvme_ctrlr_get_data() to get + * detailed information on the controller. Refer to the NVMe + * specification for more details on IDENTIFY for NVMe controllers. + */ + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->next = g_namespaces; + g_namespaces = entry; + + printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns) / 1000000000); +} + +struct hello_world_sequence { + struct ns_entry *ns_entry; + char *buf; + unsigned using_cmb_io; + int is_completed; +}; + +static void +read_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + + /* + * The read I/O has completed. Print the contents of the + * buffer, free the buffer, then mark the sequence as + * completed. This will trigger the hello_world() function + * to exit its polling loop. + */ + printf("%s", sequence->buf); + spdk_free(sequence->buf); + sequence->is_completed = 1; +} + +static void +write_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct hello_world_sequence *sequence = arg; + struct ns_entry *ns_entry = sequence->ns_entry; + int rc; + + /* + * The write I/O has completed. Free the buffer associated with + * the write I/O and allocate a new zeroed buffer for reading + * the data back from the NVMe namespace. + */ + if (sequence->using_cmb_io) { + spdk_nvme_ctrlr_free_cmb_io_buffer(ns_entry->ctrlr, sequence->buf, 0x1000); + } else { + spdk_free(sequence->buf); + } + sequence->buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + + rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + read_complete, (void *)sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting read I/O failed\n"); + exit(1); + } +} + +static void +hello_world(void) +{ + struct ns_entry *ns_entry; + struct hello_world_sequence sequence; + int rc; + + ns_entry = g_namespaces; + while (ns_entry != NULL) { + /* + * Allocate an I/O qpair that we can use to submit read/write requests + * to namespaces on the controller. NVMe controllers typically support + * many qpairs per controller. Any I/O qpair allocated for a controller + * can submit I/O to any namespace on that controller. + * + * The SPDK NVMe driver provides no synchronization for qpair accesses - + * the application must ensure only a single thread submits I/O to a + * qpair, and that same thread must also check for completions on that + * qpair. This enables extremely efficient I/O processing by making all + * I/O operations completely lockless. + */ + ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, NULL, 0); + if (ns_entry->qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return; + } + + /* + * Use spdk_dma_zmalloc to allocate a 4KB zeroed buffer. This memory + * will be pinned, which is required for data buffers used for SPDK NVMe + * I/O operations. + */ + sequence.using_cmb_io = 1; + sequence.buf = spdk_nvme_ctrlr_alloc_cmb_io_buffer(ns_entry->ctrlr, 0x1000); + if (sequence.buf == NULL) { + sequence.using_cmb_io = 0; + sequence.buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + } + if (sequence.buf == NULL) { + printf("ERROR: write buffer allocation failed\n"); + return; + } + if (sequence.using_cmb_io) { + printf("INFO: using controller memory buffer for IO\n"); + } else { + printf("INFO: using host memory buffer for IO\n"); + } + sequence.is_completed = 0; + sequence.ns_entry = ns_entry; + + /* + * Print "Hello world!" to sequence.buf. We will write this data to LBA + * 0 on the namespace, and then later read it back into a separate buffer + * to demonstrate the full I/O path. + */ + snprintf(sequence.buf, 0x1000, "%s", "Hello world!\n"); + + /* + * Write the data buffer to LBA 0 of this namespace. "write_complete" and + * "&sequence" are specified as the completion callback function and + * argument respectively. write_complete() will be called with the + * value of &sequence as a parameter when the write I/O is completed. + * This allows users to potentially specify different completion + * callback routines for each I/O, as well as pass a unique handle + * as an argument so the application knows which I/O has completed. + * + * Note that the SPDK NVMe driver will only check for completions + * when the application calls spdk_nvme_qpair_process_completions(). + * It is the responsibility of the application to trigger the polling + * process. + */ + rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf, + 0, /* LBA start */ + 1, /* number of LBAs */ + write_complete, &sequence, 0); + if (rc != 0) { + fprintf(stderr, "starting write I/O failed\n"); + exit(1); + } + + /* + * Poll for completions. 0 here means process all available completions. + * In certain usage models, the caller may specify a positive integer + * instead of 0 to signify the maximum number of completions it should + * process. This function will never block - if there are no + * completions pending on the specified qpair, it will return immediately. + * + * When the write I/O completes, write_complete() will submit a new I/O + * to read LBA 0 into a separate buffer, specifying read_complete() as its + * completion routine. When the read I/O completes, read_complete() will + * print the buffer contents and set sequence.is_completed = 1. That will + * break this loop and then exit the program. + */ + while (!sequence.is_completed) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + /* + * Free the I/O qpair. This typically is done when an application exits. + * But SPDK does support freeing and then reallocating qpairs during + * operation. It is the responsibility of the caller to ensure all + * pending I/O are completed before trying to free the qpair. + */ + spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair); + ns_entry = ns_entry->next; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + int nsid, num_ns; + struct ctrlr_entry *entry; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + entry = malloc(sizeof(struct ctrlr_entry)); + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + printf("Attached to %s\n", trid->traddr); + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + /* + * Each controller has one or more namespaces. An NVMe namespace is basically + * equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how + * many namespaces exist on the controller. For Intel(R) P3X00 controllers, + * it will just be one namespace. + * + * Note that in NVMe, namespace IDs start at 1, not 0. + */ + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + printf("Using controller %s with %d namespaces.\n", entry->name, num_ns); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } +} + +static void +cleanup(void) +{ + struct ns_entry *ns_entry = g_namespaces; + struct ctrlr_entry *ctrlr_entry = g_controllers; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + free(ns_entry); + ns_entry = next; + } + + while (ctrlr_entry) { + struct ctrlr_entry *next = ctrlr_entry->next; + + spdk_nvme_detach(ctrlr_entry->ctrlr); + free(ctrlr_entry); + ctrlr_entry = next; + } +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "hello_world"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Initializing NVMe Controllers\n"); + + /* + * Start the SPDK NVMe enumeration process. probe_cb will be called + * for each NVMe controller found, giving our application a choice on + * whether to attach to each controller. attach_cb will then be + * called for each controller after the SPDK NVMe driver has completed + * initializing the controller we chose to attach. + */ + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + cleanup(); + return 1; + } + + if (g_controllers == NULL) { + fprintf(stderr, "no NVMe controllers found\n"); + cleanup(); + return 1; + } + + printf("Initialization complete.\n"); + hello_world(); + cleanup(); + return 0; +} diff --git a/src/spdk/examples/nvme/hotplug/.gitignore b/src/spdk/examples/nvme/hotplug/.gitignore new file mode 100644 index 00000000..e6ff5380 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/.gitignore @@ -0,0 +1 @@ +hotplug diff --git a/src/spdk/examples/nvme/hotplug/Makefile b/src/spdk/examples/nvme/hotplug/Makefile new file mode 100644 index 00000000..0dcdda9a --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = hotplug + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/hotplug/hotplug.c b/src/spdk/examples/nvme/hotplug/hotplug.c new file mode 100644 index 00000000..66d93a71 --- /dev/null +++ b/src/spdk/examples/nvme/hotplug/hotplug.c @@ -0,0 +1,491 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/queue.h" + +struct dev_ctx { + TAILQ_ENTRY(dev_ctx) tailq; + bool is_new; + bool is_removed; + bool is_draining; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + uint32_t io_size_blocks; + uint64_t size_in_ios; + uint64_t io_completed; + uint64_t prev_io_completed; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + char name[1024]; +}; + +struct perf_task { + struct dev_ctx *dev; + void *buf; +}; + +static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs); + +static uint64_t g_tsc_rate; + +static uint32_t g_io_size_bytes = 4096; +static int g_queue_depth = 4; +static int g_time_in_sec; +static int g_expected_insert_times = -1; +static int g_expected_removal_times = -1; +static int g_insert_times; +static int g_removal_times; +static int g_shm_id = -1; + +static void +task_complete(struct perf_task *task); + +static void +register_dev(struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) { + perror("dev_ctx malloc"); + exit(1); + } + + snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + dev->ctrlr = ctrlr; + dev->is_new = true; + dev->is_removed = false; + dev->is_draining = false; + + dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1); + + if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) { + fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name); + goto skip; + } + + if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) { + fprintf(stderr, "Controller %s: Invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + dev->name, + spdk_nvme_ns_get_size(dev->ns), + spdk_nvme_ns_get_sector_size(dev->ns), + g_io_size_bytes); + goto skip; + } + + dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes; + dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns); + + dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + if (!dev->qpair) { + fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + goto skip; + } + g_insert_times++; + TAILQ_INSERT_TAIL(&g_devs, dev, tailq); + return; + +skip: + free(dev); +} + +static void +unregister_dev(struct dev_ctx *dev) +{ + fprintf(stderr, "unregister_dev: %s\n", dev->name); + + spdk_nvme_ctrlr_free_io_qpair(dev->qpair); + spdk_nvme_detach(dev->ctrlr); + + TAILQ_REMOVE(&g_devs, dev, tailq); + free(dev); +} + +static struct perf_task * +alloc_task(struct dev_ctx *dev) +{ + struct perf_task *task; + + task = calloc(1, sizeof(*task)); + if (task == NULL) { + return NULL; + } + + task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL); + if (task->buf == NULL) { + free(task); + return NULL; + } + + task->dev = dev; + + return task; +} + +static void +free_task(struct perf_task *task) +{ + spdk_dma_free(task->buf); + free(task); +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static void +submit_single_io(struct perf_task *task) +{ + struct dev_ctx *dev = task->dev; + uint64_t offset_in_ios; + int rc; + + offset_in_ios = dev->offset_in_ios++; + if (dev->offset_in_ios == dev->size_in_ios) { + dev->offset_in_ios = 0; + } + + rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf, + offset_in_ios * dev->io_size_blocks, + dev->io_size_blocks, io_complete, task, 0); + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + free_task(task); + } else { + dev->current_queue_depth++; + } +} + +static void +task_complete(struct perf_task *task) +{ + struct dev_ctx *dev; + + dev = task->dev; + dev->current_queue_depth--; + dev->io_completed++; + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!dev->is_draining && !dev->is_removed) { + submit_single_io(task); + } else { + free_task(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct perf_task *)ctx); +} + +static void +check_io(struct dev_ctx *dev) +{ + spdk_nvme_qpair_process_completions(dev->qpair, 0); +} + +static void +submit_io(struct dev_ctx *dev, int queue_depth) +{ + struct perf_task *task; + + while (queue_depth-- > 0) { + task = alloc_task(dev); + if (task == NULL) { + fprintf(stderr, "task allocation failed\n"); + exit(1); + } + + submit_single_io(task); + } +} + +static void +drain_io(struct dev_ctx *dev) +{ + dev->is_draining = true; + while (dev->current_queue_depth > 0) { + check_io(dev); + } +} + +static void +print_stats(void) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n", + dev->name, + dev->io_completed, + dev->io_completed - dev->prev_io_completed); + dev->prev_io_completed = dev->io_completed; + } + + fprintf(stderr, "\n"); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + fprintf(stderr, "Attached to %s\n", trid->traddr); + + register_dev(ctrlr); +} + +static void +remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) +{ + struct dev_ctx *dev; + + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->ctrlr == ctrlr) { + /* + * Mark the device as removed, but don't detach yet. + * + * The I/O handling code will detach once it sees that + * is_removed is true and all outstanding I/O have been completed. + */ + dev->is_removed = true; + fprintf(stderr, "Controller removed: %s\n", dev->name); + return; + } + } + + /* + * If we get here, this remove_cb is for a controller that we are not tracking + * in g_devs (for example, because we skipped it during register_dev), + * so immediately detach it. + */ + spdk_nvme_detach(ctrlr); +} + +static void +io_loop(void) +{ + struct dev_ctx *dev, *dev_tmp; + uint64_t tsc_end; + uint64_t next_stats_tsc; + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + next_stats_tsc = spdk_get_ticks(); + + while (1) { + uint64_t now; + + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + TAILQ_FOREACH(dev, &g_devs, tailq) { + if (dev->is_new) { + /* Submit initial I/O for this controller. */ + submit_io(dev, g_queue_depth); + dev->is_new = false; + } + + check_io(dev); + } + + /* + * Check for hotplug events. + */ + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + break; + } + + /* + * Check for devices which were hot-removed and have finished + * processing outstanding I/Os. + * + * unregister_dev() may remove devs from the list, so use the + * removal-safe iterator. + */ + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + if (dev->is_removed && dev->current_queue_depth == 0) { + g_removal_times++; + unregister_dev(dev); + } + } + + now = spdk_get_ticks(); + if (now > tsc_end) { + break; + } + if (now > next_stats_tsc) { + print_stats(); + next_stats_tsc += g_tsc_rate; + } + + if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) { + break; + } + } + + TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { + drain_io(dev); + unregister_dev(dev); + } +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-i shm id (optional)]\n"); + printf("\t[-n expected hot insert times]\n"); + printf("\t[-r expected hot removal times]\n"); + printf("\t[-t time in seconds]\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + /* default value */ + g_time_in_sec = 0; + + while ((op = getopt(argc, argv, "i:n:r:t:")) != -1) { + switch (op) { + case 'i': + g_shm_id = atoi(optarg); + break; + case 'n': + g_expected_insert_times = atoi(optarg); + break; + case 'r': + g_expected_removal_times = atoi(optarg); + break; + case 't': + g_time_in_sec = atoi(optarg); + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + return 0; +} + + +static int +register_controllers(void) +{ + fprintf(stderr, "Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + /* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */ + g_insert_times = 0; + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "hotplug"; + opts.core_mask = "0x1"; + if (g_shm_id > -1) { + opts.shm_id = g_shm_id; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + /* Detect the controllers that are plugged in at startup. */ + if (register_controllers() != 0) { + return 1; + } + + fprintf(stderr, "Initialization complete. Starting I/O...\n"); + io_loop(); + + if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) { + fprintf(stderr, "Expected inserts %d != actual inserts %d\n", + g_expected_insert_times, g_insert_times); + return 1; + } + + if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) { + fprintf(stderr, "Expected removals %d != actual removals %d\n", + g_expected_removal_times, g_removal_times); + return 1; + } + + return 0; +} diff --git a/src/spdk/examples/nvme/identify/.gitignore b/src/spdk/examples/nvme/identify/.gitignore new file mode 100644 index 00000000..5c5444c1 --- /dev/null +++ b/src/spdk/examples/nvme/identify/.gitignore @@ -0,0 +1 @@ +identify diff --git a/src/spdk/examples/nvme/identify/Makefile b/src/spdk/examples/nvme/identify/Makefile new file mode 100644 index 00000000..0aa5e52b --- /dev/null +++ b/src/spdk/examples/nvme/identify/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = identify + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/identify/identify.c b/src/spdk/examples/nvme/identify/identify.c new file mode 100644 index 00000000..3958483b --- /dev/null +++ b/src/spdk/examples/nvme/identify/identify.c @@ -0,0 +1,1723 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/nvme_ocssd.h" +#include "spdk/env.h" +#include "spdk/nvme_intel.h" +#include "spdk/nvmf_spec.h" +#include "spdk/pci_ids.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/uuid.h" + +#define MAX_DISCOVERY_LOG_ENTRIES ((uint64_t)1000) + +#define NUM_CHUNK_INFO_ENTRIES 8 + +static int outstanding_commands; + +struct feature { + uint32_t result; + bool valid; +}; + +static struct feature features[256]; + +static struct spdk_nvme_error_information_entry error_page[256]; + +static struct spdk_nvme_health_information_page health_page; + +static struct spdk_nvme_firmware_page firmware_page; + +static struct spdk_nvme_cmds_and_effect_log_page cmd_effects_log_page; + +static struct spdk_nvme_intel_smart_information_page intel_smart_page; + +static struct spdk_nvme_intel_temperature_page intel_temperature_page; + +static struct spdk_nvme_intel_marketing_description_page intel_md_page; + +static struct spdk_nvmf_discovery_log_page *g_discovery_page; +static size_t g_discovery_page_size; +static uint64_t g_discovery_page_numrec; + +static struct spdk_ocssd_geometry_data geometry_data; + +static struct spdk_ocssd_chunk_information_entry g_ocssd_chunk_info_page[NUM_CHUNK_INFO_ENTRIES ]; + +static bool g_hex_dump = false; + +static int g_shm_id = -1; + +static int g_dpdk_mem = 64; + +static int g_master_core = 0; + +static char g_core_mask[16] = "0x1"; + +static struct spdk_nvme_transport_id g_trid; + +static int g_controllers_found = 0; + +static void +hex_dump(const void *data, size_t size) +{ + size_t offset = 0, i; + const uint8_t *bytes = data; + + while (size) { + printf("%08zX:", offset); + + for (i = 0; i < 16; i++) { + if (i == 8) { + printf("-"); + } else { + printf(" "); + } + + if (i < size) { + printf("%02X", bytes[offset + i]); + } else { + printf(" "); + } + } + + printf(" "); + + for (i = 0; i < 16; i++) { + if (i < size) { + if (bytes[offset + i] > 0x20 && bytes[offset + i] < 0x7F) { + printf("%c", bytes[offset + i]); + } else { + printf("."); + } + } + } + + printf("\n"); + + offset += 16; + if (size > 16) { + size -= 16; + } else { + break; + } + } +} + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct feature *feature = cb_arg; + int fid = feature - features; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get_feature(0x%02X) failed\n", fid); + } else { + feature->result = cpl->cdw0; + feature->valid = true; + } + outstanding_commands--; +} + +static void +get_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get log page failed\n"); + } + outstanding_commands--; +} + +static void +get_ocssd_geometry_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("get ocssd geometry failed\n"); + } + outstanding_commands--; +} + +static int +get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid) +{ + struct spdk_nvme_cmd cmd = {}; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10 = fid; + + return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]); +} + +static void +get_features(struct spdk_nvme_ctrlr *ctrlr) +{ + size_t i; + + uint8_t features_to_get[] = { + SPDK_NVME_FEAT_ARBITRATION, + SPDK_NVME_FEAT_POWER_MANAGEMENT, + SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD, + SPDK_NVME_FEAT_ERROR_RECOVERY, + SPDK_NVME_FEAT_NUMBER_OF_QUEUES, + SPDK_OCSSD_FEAT_MEDIA_FEEDBACK, + }; + + /* Submit several GET FEATURES commands and wait for them to complete */ + outstanding_commands = 0; + for (i = 0; i < SPDK_COUNTOF(features_to_get); i++) { + if (!spdk_nvme_ctrlr_is_ocssd_supported(ctrlr) && + features_to_get[i] == SPDK_OCSSD_FEAT_MEDIA_FEEDBACK) { + continue; + } + if (get_feature(ctrlr, features_to_get[i]) == 0) { + outstanding_commands++; + } else { + printf("get_feature(0x%02X) failed to submit command\n", features_to_get[i]); + } + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_error_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ERROR, + SPDK_NVME_GLOBAL_NS_TAG, error_page, + sizeof(*error_page) * (cdata->elpe + 1), + 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_health_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, &health_page, sizeof(health_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_firmware_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_FIRMWARE_SLOT, + SPDK_NVME_GLOBAL_NS_TAG, &firmware_page, sizeof(firmware_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_cmd_effects_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG, + SPDK_NVME_GLOBAL_NS_TAG, &cmd_effects_log_page, sizeof(cmd_effects_log_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_smart_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_SMART, SPDK_NVME_GLOBAL_NS_TAG, + &intel_smart_page, sizeof(intel_smart_page), 0, get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static int +get_intel_temperature_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE, + SPDK_NVME_GLOBAL_NS_TAG, &intel_temperature_page, sizeof(intel_temperature_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static int +get_intel_md_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION, + SPDK_NVME_GLOBAL_NS_TAG, &intel_md_page, sizeof(intel_md_page), 0, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + return 0; +} + +static void +get_discovery_log_page_header_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvmf_discovery_log_page *new_discovery_page; + struct spdk_nvme_ctrlr *ctrlr = cb_arg; + uint16_t recfmt; + uint64_t remaining; + uint64_t offset; + + outstanding_commands--; + if (spdk_nvme_cpl_is_error(cpl)) { + /* Return without printing anything - this may not be a discovery controller */ + free(g_discovery_page); + g_discovery_page = NULL; + return; + } + + /* Got the first 4K of the discovery log page */ + recfmt = from_le16(&g_discovery_page->recfmt); + if (recfmt != 0) { + printf("Unrecognized discovery log record format %" PRIu16 "\n", recfmt); + return; + } + + g_discovery_page_numrec = from_le64(&g_discovery_page->numrec); + + /* Pick an arbitrary limit to avoid ridiculously large buffer size. */ + if (g_discovery_page_numrec > MAX_DISCOVERY_LOG_ENTRIES) { + printf("Discovery log has %" PRIu64 " entries - limiting to %" PRIu64 ".\n", + g_discovery_page_numrec, MAX_DISCOVERY_LOG_ENTRIES); + g_discovery_page_numrec = MAX_DISCOVERY_LOG_ENTRIES; + } + + /* + * Now that we now how many entries should be in the log page, we can allocate + * the full log page buffer. + */ + g_discovery_page_size += g_discovery_page_numrec * sizeof(struct + spdk_nvmf_discovery_log_page_entry); + new_discovery_page = realloc(g_discovery_page, g_discovery_page_size); + if (new_discovery_page == NULL) { + free(g_discovery_page); + printf("Discovery page allocation failed!\n"); + return; + } + + g_discovery_page = new_discovery_page; + + /* Retrieve the rest of the discovery log page */ + offset = offsetof(struct spdk_nvmf_discovery_log_page, entries); + remaining = g_discovery_page_size - offset; + while (remaining) { + uint32_t size; + + /* Retrieve up to 4 KB at a time */ + size = spdk_min(remaining, 4096); + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, (char *)g_discovery_page + offset, size, offset, + get_log_page_completion, NULL)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + offset += size; + remaining -= size; + outstanding_commands++; + } +} + +static int +get_discovery_log_page(struct spdk_nvme_ctrlr *ctrlr) +{ + /* Allocate the initial discovery log page buffer - this will be resized later. */ + g_discovery_page_size = sizeof(*g_discovery_page); + g_discovery_page = calloc(1, g_discovery_page_size); + if (g_discovery_page == NULL) { + printf("Discovery log page allocation failed!\n"); + exit(1); + } + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_DISCOVERY, + 0, g_discovery_page, g_discovery_page_size, 0, + get_discovery_log_page_header_completion, ctrlr)) { + printf("spdk_nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + return 0; +} + +static void +get_log_pages(struct spdk_nvme_ctrlr *ctrlr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + outstanding_commands = 0; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (get_error_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Error Log Page failed\n"); + } + + if (get_health_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (SMART/health) failed\n"); + } + + if (get_firmware_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Firmware Slot Information) failed\n"); + } + + if (cdata->lpa.celp) { + if (get_cmd_effects_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Commands Supported and Effects) failed\n"); + } + } + + if (cdata->vid == SPDK_PCI_VID_INTEL) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + if (get_intel_smart_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel SMART/health) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + if (get_intel_temperature_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel temperature) failed\n"); + } + } + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + if (get_intel_md_log_page(ctrlr) == 0) { + outstanding_commands++; + } else { + printf("Get Log Page (Intel Marketing Description) failed\n"); + } + } + + } + + if (get_discovery_log_page(ctrlr) == 0) { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static int +get_ocssd_chunk_info_log_page(struct spdk_nvme_ns *ns) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, + nsid, &g_ocssd_chunk_info_page, sizeof(g_ocssd_chunk_info_page), 0, + get_log_page_completion, NULL) == 0) { + outstanding_commands++; + } else { + printf("get_ocssd_chunk_info_log_page() failed\n"); + return -1; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + return 0; +} + +static void +get_ocssd_geometry(struct spdk_nvme_ns *ns, struct spdk_ocssd_geometry_data *geometry_data) +{ + struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns); + int nsid = spdk_nvme_ns_get_id(ns); + outstanding_commands = 0; + + if (spdk_nvme_ocssd_ctrlr_cmd_geometry(ctrlr, nsid, geometry_data, + sizeof(*geometry_data), get_ocssd_geometry_completion, NULL)) { + printf("Get OpenChannel SSD geometry failed\n"); + exit(1); + } else { + outstanding_commands++; + } + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +print_hex_be(const void *v, size_t size) +{ + const uint8_t *buf = v; + + while (size--) { + printf("%02X", *buf++); + } +} + +static void +print_uint128_hex(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + printf("0x%llX%016llX", hi, lo); + } else { + printf("0x%llX", lo); + } +} + +static void +print_uint128_dec(uint64_t *v) +{ + unsigned long long lo = v[0], hi = v[1]; + if (hi) { + /* can't handle large (>64-bit) decimal values for now, so fall back to hex */ + print_uint128_hex(v); + } else { + printf("%llu", (unsigned long long)lo); + } +} + +/* The len should be <= 8. */ +static void +print_uint_var_dec(uint8_t *array, unsigned int len) +{ + uint64_t result = 0; + int i = len; + + while (i > 0) { + result += (uint64_t)array[i - 1] << (8 * (i - 1)); + i--; + } + printf("%lu", result); +} + +/* Print ASCII string as defined by the NVMe spec */ +static void +print_ascii_string(const void *buf, size_t size) +{ + const uint8_t *str = buf; + + /* Trim trailing spaces */ + while (size > 0 && str[size - 1] == ' ') { + size--; + } + + while (size--) { + if (*str >= 0x20 && *str <= 0x7E) { + printf("%c", *str); + } else { + printf("."); + } + str++; + } +} + +static void +print_ocssd_chunk_info(struct spdk_ocssd_chunk_information_entry *chk_info, int chk_num) +{ + int i; + char *cs_str, *ct_str; + + printf("OCSSD Chunk Info Glance\n"); + printf("======================\n"); + + for (i = 0; i < chk_num; i++) { + cs_str = chk_info[i].cs.free ? "Free" : + chk_info[i].cs.closed ? "Closed" : + chk_info[i].cs.open ? "Open" : + chk_info[i].cs.offline ? "Offline" : "Unknown"; + ct_str = chk_info[i].ct.seq_write ? "Sequential Write" : + chk_info[i].ct.rnd_write ? "Random Write" : "Unknown"; + + printf("------------\n"); + printf("Chunk index: %d\n", i); + printf("Chunk state: %s(0x%x)\n", cs_str, *(uint8_t *) & (chk_info[i].cs)); + printf("Chunk type (write mode): %s\n", ct_str); + printf("Chunk type (size_deviate): %s\n", chk_info[i].ct.size_deviate ? "Yes" : "No"); + printf("Wear-level Index: %d\n", chk_info[i].wli); + printf("Starting LBA: %ld\n", chk_info[i].slba); + printf("Number of blocks in chunk: %ld\n", chk_info[i].cnlb); + printf("Write Pointer: %ld\n", chk_info[i].wp); + } +} + +static void +print_ocssd_geometry(struct spdk_ocssd_geometry_data *geometry_data) +{ + printf("Namespace OCSSD Geometry\n"); + printf("=======================\n"); + + if (geometry_data->mjr < 2) { + printf("Open-Channel Spec version is less than 2.0\n"); + printf("OC version: maj:%d\n", geometry_data->mjr); + return; + } + + printf("OC version: maj:%d min:%d\n", geometry_data->mjr, geometry_data->mnr); + printf("LBA format:\n"); + printf(" Group bits: %d\n", geometry_data->lbaf.grp_len); + printf(" PU bits: %d\n", geometry_data->lbaf.pu_len); + printf(" Chunk bits: %d\n", geometry_data->lbaf.chk_len); + printf(" Logical block bits: %d\n", geometry_data->lbaf.lbk_len); + + printf("Media and Controller Capabilities:\n"); + printf(" Namespace supports Vector Chunk Copy: %s\n", + geometry_data->mccap.vec_chk_cpy ? "Supported" : "Not Supported"); + printf(" Namespace supports multiple resets a free chunk: %s\n", + geometry_data->mccap.multi_reset ? "Supported" : "Not Supported"); + + printf("Wear-level Index Delta Threshold: %d\n", geometry_data->wit); + printf("Groups (channels): %d\n", geometry_data->num_grp); + printf("PUs (LUNs) per group: %d\n", geometry_data->num_pu); + printf("Chunks per LUN: %d\n", geometry_data->num_chk); + printf("Logical blks per chunk: %d\n", geometry_data->clba); + printf("MIN write size: %d\n", geometry_data->ws_min); + printf("OPT write size: %d\n", geometry_data->ws_opt); + printf("Cache min write size: %d\n", geometry_data->mw_cunits); + printf("Max open chunks: %d\n", geometry_data->maxoc); + printf("Max open chunks per PU: %d\n", geometry_data->maxocpu); + printf("\n"); +} + +static void +print_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + const struct spdk_uuid *uuid; + uint32_t i; + uint32_t flags; + char uuid_str[SPDK_UUID_STRING_LEN]; + + nsdata = spdk_nvme_ns_get_data(ns); + flags = spdk_nvme_ns_get_flags(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + if (g_hex_dump) { + hex_dump(nsdata, sizeof(*nsdata)); + printf("\n"); + } + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Inactive namespace ID\n\n"); + return; + } + + printf("Deallocate: %s\n", + (flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Deallocated/Unwritten Error: %s\n", + nsdata->nsfeat.dealloc_or_unwritten_error ? "Supported" : "Not Supported"); + printf("Deallocated Read Value: %s\n", + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_00 ? "All 0x00" : + nsdata->dlfeat.bits.read_value == SPDK_NVME_DEALLOC_READ_FF ? "All 0xFF" : + "Unknown"); + printf("Deallocate in Write Zeroes: %s\n", + nsdata->dlfeat.bits.write_zero_deallocate ? "Supported" : "Not Supported"); + printf("Deallocated Guard Field: %s\n", + nsdata->dlfeat.bits.guard_value ? "CRC for Read Value" : "0xFFFF"); + printf("Flush: %s\n", + (flags & SPDK_NVME_NS_FLUSH_SUPPORTED) ? "Supported" : "Not Supported"); + printf("Reservation: %s\n", + (flags & SPDK_NVME_NS_RESERVATION_SUPPORTED) ? "Supported" : "Not Supported"); + if (flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + printf("End-to-End Data Protection: Supported\n"); + printf("Protection Type: Type%d\n", nsdata->dps.pit); + printf("Metadata Transfered as: %s\n", + nsdata->flbas.extended ? "Extended Data LBA" : "Separate Metadata Buffer"); + printf("Metadata Location: %s\n", + nsdata->dps.md_start ? "First 8 Bytes" : "Last 8 Bytes"); + } + printf("Namespace Sharing Capabilities: %s\n", + nsdata->nmic.can_share ? "Multiple Controllers" : "Private"); + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + if (nsdata->noiob) { + printf("Optimal I/O Boundary: %u blocks\n", nsdata->noiob); + } + if (!spdk_mem_all_zero(nsdata->nguid, sizeof(nsdata->nguid))) { + printf("NGUID: "); + print_hex_be(nsdata->nguid, sizeof(nsdata->nguid)); + printf("\n"); + } + if (!spdk_mem_all_zero(&nsdata->eui64, sizeof(nsdata->eui64))) { + printf("EUI64: "); + print_hex_be(&nsdata->eui64, sizeof(nsdata->eui64)); + printf("\n"); + } + uuid = spdk_nvme_ns_get_uuid(ns); + if (uuid) { + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), uuid); + printf("UUID: %s\n", uuid_str); + } + printf("Thin Provisioning: %s\n", + nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + printf("Per-NS Atomic Units: %s\n", + nsdata->nsfeat.ns_atomic_write_unit ? "Yes" : "No"); + if (nsdata->nawun) { + printf("Atomic Write Unit (Normal): %d\n", nsdata->nawun + 1); + } + if (nsdata->nawupf) { + printf("Atomic Write Unit (PFail): %d\n", nsdata->nawupf + 1); + } + + printf("NGUID/EUI64 Never Reused: %s\n", + nsdata->nsfeat.guid_never_reused ? "Yes" : "No"); + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("\n"); + + if (spdk_nvme_ctrlr_is_ocssd_supported(spdk_nvme_ns_get_ctrlr(ns))) { + get_ocssd_geometry(ns, &geometry_data); + print_ocssd_geometry(&geometry_data); + get_ocssd_chunk_info_log_page(ns); + print_ocssd_chunk_info(g_ocssd_chunk_info_page, NUM_CHUNK_INFO_ENTRIES); + } +} + +static const char * +admin_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_DELETE_IO_SQ: + return "Delete I/O Submission Queue"; + case SPDK_NVME_OPC_CREATE_IO_SQ: + return "Create I/O Submission Queue"; + case SPDK_NVME_OPC_GET_LOG_PAGE: + return "Get Log Page"; + case SPDK_NVME_OPC_DELETE_IO_CQ: + return "Delete I/O Completion Queue"; + case SPDK_NVME_OPC_CREATE_IO_CQ: + return "Create I/O Completion Queue"; + case SPDK_NVME_OPC_IDENTIFY: + return "Identify"; + case SPDK_NVME_OPC_ABORT: + return "Abort"; + case SPDK_NVME_OPC_SET_FEATURES: + return "Set Features"; + case SPDK_NVME_OPC_GET_FEATURES: + return "Get Features"; + case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST: + return "Asynchronous Event Request"; + case SPDK_NVME_OPC_NS_MANAGEMENT: + return "Namespace Management"; + case SPDK_NVME_OPC_FIRMWARE_COMMIT: + return "Firmware Commit"; + case SPDK_NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD: + return "Firmware Image Download"; + case SPDK_NVME_OPC_DEVICE_SELF_TEST: + return "Device Self-test"; + case SPDK_NVME_OPC_NS_ATTACHMENT: + return "Namespace Attachment"; + case SPDK_NVME_OPC_KEEP_ALIVE: + return "Keep Alive"; + case SPDK_NVME_OPC_DIRECTIVE_SEND: + return "Directive Send"; + case SPDK_NVME_OPC_DIRECTIVE_RECEIVE: + return "Directive Receive"; + case SPDK_NVME_OPC_VIRTUALIZATION_MANAGEMENT: + return "Virtualization Management"; + case SPDK_NVME_OPC_NVME_MI_SEND: + return "NVMe-MI Send"; + case SPDK_NVME_OPC_NVME_MI_RECEIVE: + return "NVMe-MI Receive"; + case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG: + return "Doorbell Buffer Config"; + case SPDK_NVME_OPC_FORMAT_NVM: + return "Format NVM"; + case SPDK_NVME_OPC_SECURITY_SEND: + return "Security Send"; + case SPDK_NVME_OPC_SECURITY_RECEIVE: + return "Security Receive"; + case SPDK_NVME_OPC_SANITIZE: + return "Sanitize"; + default: + if (opc >= 0xC0) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static const char * +io_opc_name(uint8_t opc) +{ + switch (opc) { + case SPDK_NVME_OPC_FLUSH: + return "Flush"; + case SPDK_NVME_OPC_WRITE: + return "Write"; + case SPDK_NVME_OPC_READ: + return "Read"; + case SPDK_NVME_OPC_WRITE_UNCORRECTABLE: + return "Write Uncorrectable"; + case SPDK_NVME_OPC_COMPARE: + return "Compare"; + case SPDK_NVME_OPC_WRITE_ZEROES: + return "Write Zeroes"; + case SPDK_NVME_OPC_DATASET_MANAGEMENT: + return "Dataset Management"; + case SPDK_NVME_OPC_RESERVATION_REGISTER: + return "Reservation Register"; + case SPDK_NVME_OPC_RESERVATION_REPORT: + return "Reservation Report"; + case SPDK_NVME_OPC_RESERVATION_ACQUIRE: + return "Reservation Acquire"; + case SPDK_NVME_OPC_RESERVATION_RELEASE: + return "Reservation Release"; + default: + if (opc >= 0x80) { + return "Vendor specific"; + } + return "Unknown"; + } +} + +static void +print_controller(struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_transport_id *trid) +{ + const struct spdk_nvme_ctrlr_data *cdata; + union spdk_nvme_cap_register cap; + union spdk_nvme_vs_register vs; + uint8_t str[512]; + uint32_t i; + struct spdk_nvme_error_information_entry *error_entry; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + uint32_t nsid; + + cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr); + vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr); + + get_features(ctrlr); + get_log_pages(ctrlr); + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr) != 0) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("NVMe Controller at %04x:%02x:%02x.%x [%04x:%04x]\n", + pci_addr.domain, pci_addr.bus, + pci_addr.dev, pci_addr.func, + pci_id.vendor_id, pci_id.device_id); + } + printf("=====================================================\n"); + + if (g_hex_dump) { + hex_dump(cdata, sizeof(*cdata)); + printf("\n"); + } + + printf("Controller Capabilities/Features\n"); + printf("================================\n"); + printf("Vendor ID: %04x\n", cdata->vid); + printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); + printf("Serial Number: "); + print_ascii_string(cdata->sn, sizeof(cdata->sn)); + printf("\n"); + printf("Model Number: "); + print_ascii_string(cdata->mn, sizeof(cdata->mn)); + printf("\n"); + printf("Firmware Version: "); + print_ascii_string(cdata->fr, sizeof(cdata->fr)); + printf("\n"); + printf("Recommended Arb Burst: %d\n", cdata->rab); + printf("IEEE OUI Identifier: %02x %02x %02x\n", + cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); + printf("Multi-path I/O\n"); + printf(" May have multiple subsystem ports: %s\n", cdata->cmic.multi_port ? "Yes" : "No"); + printf(" May be connected to multiple hosts: %s\n", cdata->cmic.multi_host ? "Yes" : "No"); + printf(" Associated with SR-IOV VF: %s\n", cdata->cmic.sr_iov ? "Yes" : "No"); + printf("Max Data Transfer Size: "); + if (cdata->mdts == 0) { + printf("Unlimited\n"); + } else { + printf("%" PRIu64 "\n", (uint64_t)1 << (12 + cap.bits.mpsmin + cdata->mdts)); + } + if (features[SPDK_NVME_FEAT_ERROR_RECOVERY].valid) { + unsigned tler = features[SPDK_NVME_FEAT_ERROR_RECOVERY].result & 0xFFFF; + printf("Error Recovery Timeout: "); + if (tler == 0) { + printf("Unlimited\n"); + } else { + printf("%u milliseconds\n", tler * 100); + } + } + printf("NVMe Specification Version (VS): %u.%u", vs.bits.mjr, vs.bits.mnr); + if (vs.bits.ter) { + printf(".%u", vs.bits.ter); + } + printf("\n"); + if (cdata->ver.raw != 0) { + printf("NVMe Specification Version (Identify): %u.%u", cdata->ver.bits.mjr, cdata->ver.bits.mnr); + if (cdata->ver.bits.ter) { + printf(".%u", cdata->ver.bits.ter); + } + printf("\n"); + } + + printf("Maximum Queue Entries: %u\n", cap.bits.mqes + 1); + printf("Contiguous Queues Required: %s\n", cap.bits.cqr ? "Yes" : "No"); + printf("Arbitration Mechanisms Supported\n"); + printf(" Weighted Round Robin: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_WRR ? "Supported" : "Not Supported"); + printf(" Vendor Specific: %s\n", + cap.bits.ams & SPDK_NVME_CAP_AMS_VS ? "Supported" : "Not Supported"); + printf("Reset Timeout: %" PRIu64 " ms\n", (uint64_t)500 * cap.bits.to); + printf("Doorbell Stride: %" PRIu64 " bytes\n", + (uint64_t)1 << (2 + cap.bits.dstrd)); + printf("NVM Subsystem Reset: %s\n", + cap.bits.nssrs ? "Supported" : "Not Supported"); + printf("Command Sets Supported\n"); + printf(" NVM Command Set: %s\n", + cap.bits.css & SPDK_NVME_CAP_CSS_NVM ? "Supported" : "Not Supported"); + printf("Boot Partition: %s\n", + cap.bits.bps ? "Supported" : "Not Supported"); + printf("Memory Page Size Minimum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmin)); + printf("Memory Page Size Maximum: %" PRIu64 " bytes\n", + (uint64_t)1 << (12 + cap.bits.mpsmax)); + printf("Optional Asynchronous Events Supported\n"); + printf(" Namespace Attribute Notices: %s\n", + cdata->oaes.ns_attribute_notices ? "Supported" : "Not Supported"); + printf(" Firmware Activation Notices: %s\n", + cdata->oaes.fw_activation_notices ? "Supported" : "Not Supported"); + + printf("128-bit Host Identifier: %s\n", + cdata->ctratt.host_id_exhid_supported ? "Supported" : "Not Supported"); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Security Send/Receive: %s\n", + cdata->oacs.security ? "Supported" : "Not Supported"); + printf("Format NVM: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("Firmware Activate/Download: %s\n", + cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Namespace Management: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Device Self-Test: %s\n", + cdata->oacs.device_self_test ? "Supported" : "Not Supported"); + printf("Directives: %s\n", + cdata->oacs.directives ? "Supported" : "Not Supported"); + printf("NVMe-MI: %s\n", + cdata->oacs.nvme_mi ? "Supported" : "Not Supported"); + printf("Virtualization Management: %s\n", + cdata->oacs.virtualization_management ? "Supported" : "Not Supported"); + printf("Doorbell Buffer Config: %s\n", + cdata->oacs.doorbell_buffer_config ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl + 1); + printf("Async Event Request Limit: %d\n", cdata->aerl + 1); + printf("Number of Firmware Slots: "); + if (cdata->oacs.firmware != 0) { + printf("%d\n", cdata->frmw.num_slots); + } else { + printf("N/A\n"); + } + printf("Firmware Slot 1 Read-Only: "); + if (cdata->oacs.firmware != 0) { + printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + } else { + printf("N/A\n"); + } + if (cdata->fwug == 0x00) { + printf("Firmware Update Granularity: No Information Provided\n"); + } else if (cdata->fwug == 0xFF) { + printf("Firmware Update Granularity: No Restriction\n"); + } else { + printf("Firmware Update Granularity: %u KiB\n", + cdata->fwug * 4); + } + printf("Per-Namespace SMART Log: %s\n", + cdata->lpa.ns_smart ? "Yes" : "No"); + printf("Command Effects Log Page: %s\n", + cdata->lpa.celp ? "Supported" : "Not Supported"); + printf("Get Log Page Extended Data: %s\n", + cdata->lpa.edlp ? "Supported" : "Not Supported"); + printf("Telemetry Log Pages: %s\n", + cdata->lpa.telemetry ? "Supported" : "Not Supported"); + printf("Error Log Page Entries Supported: %d\n", cdata->elpe + 1); + if (cdata->kas == 0) { + printf("Keep Alive: Not Supported\n"); + } else { + printf("Keep Alive: Supported\n"); + printf("Keep Alive Granularity: %u ms\n", + cdata->kas * 100); + } + printf("\n"); + + printf("NVM Command Set Attributes\n"); + printf("==========================\n"); + printf("Submission Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->sqes.max); + printf(" Min: %d\n", 1 << cdata->sqes.min); + printf("Completion Queue Entry Size\n"); + printf(" Max: %d\n", 1 << cdata->cqes.max); + printf(" Min: %d\n", 1 << cdata->cqes.min); + printf("Number of Namespaces: %d\n", cdata->nn); + printf("Compare Command: %s\n", + cdata->oncs.compare ? "Supported" : "Not Supported"); + printf("Write Uncorrectable Command: %s\n", + cdata->oncs.write_unc ? "Supported" : "Not Supported"); + printf("Dataset Management Command: %s\n", + cdata->oncs.dsm ? "Supported" : "Not Supported"); + printf("Write Zeroes Command: %s\n", + cdata->oncs.write_zeroes ? "Supported" : "Not Supported"); + printf("Set Features Save Field: %s\n", + cdata->oncs.set_features_save ? "Supported" : "Not Supported"); + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + printf("Timestamp: %s\n", + cdata->oncs.timestamp ? "Supported" : "Not Supported"); + printf("Volatile Write Cache: %s\n", + cdata->vwc.present ? "Present" : "Not Present"); + printf("Atomic Write Unit (Normal): %d\n", cdata->awun + 1); + printf("Atomic Write Unit (PFail): %d\n", cdata->awupf + 1); + printf("Scatter-Gather List\n"); + printf(" SGL Command Set: %s\n", + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED ? "Supported" : + cdata->sgls.supported == SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED ? "Supported (Dword aligned)" : + "Not Supported"); + printf(" SGL Keyed: %s\n", + cdata->sgls.keyed_sgl ? "Supported" : "Not Supported"); + printf(" SGL Bit Bucket Descriptor: %s\n", + cdata->sgls.bit_bucket_descriptor ? "Supported" : "Not Supported"); + printf(" SGL Metadata Pointer: %s\n", + cdata->sgls.metadata_pointer ? "Supported" : "Not Supported"); + printf(" Oversized SGL: %s\n", + cdata->sgls.oversized_sgl ? "Supported" : "Not Supported"); + printf(" SGL Metadata Address: %s\n", + cdata->sgls.metadata_address ? "Supported" : "Not Supported"); + printf(" SGL Offset: %s\n", + cdata->sgls.sgl_offset ? "Supported" : "Not Supported"); + printf(" Transport SGL Data Block: %s\n", + cdata->sgls.transport_sgl ? "Supported" : "Not Supported"); + printf("\n"); + + printf("Firmware Slot Information\n"); + printf("=========================\n"); + if (g_hex_dump) { + hex_dump(&firmware_page, sizeof(firmware_page)); + printf("\n"); + } + printf("Active slot: %u\n", firmware_page.afi.active_slot); + if (firmware_page.afi.next_reset_slot) { + printf("Next controller reset slot: %u\n", firmware_page.afi.next_reset_slot); + } + for (i = 0; i < 7; i++) { + if (!spdk_mem_all_zero(firmware_page.revision[i], sizeof(firmware_page.revision[i]))) { + printf("Slot %u Firmware Revision: ", i + 1); + print_ascii_string(firmware_page.revision[i], sizeof(firmware_page.revision[i])); + printf("\n"); + } + } + printf("\n"); + + if (cdata->lpa.celp) { + printf("Commands Supported and Effects\n"); + printf("==============================\n"); + + if (g_hex_dump) { + hex_dump(&cmd_effects_log_page, sizeof(cmd_effects_log_page)); + printf("\n"); + } + + printf("Admin Commands\n"); + printf("--------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.admin_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.admin_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + admin_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + + printf("I/O Commands\n"); + printf("------------\n"); + for (i = 0; i < SPDK_COUNTOF(cmd_effects_log_page.io_cmds_supported); i++) { + struct spdk_nvme_cmds_and_effect_entry *cmd = &cmd_effects_log_page.io_cmds_supported[i]; + if (cmd->csupp) { + printf("%30s (%02Xh): Supported %s%s%s%s%s\n", + io_opc_name(i), i, + cmd->lbcc ? "LBA-Change " : "", + cmd->ncc ? "NS-Cap-Change " : "", + cmd->nic ? "NS-Inventory-Change " : "", + cmd->ccc ? "Ctrlr-Cap-Change " : "", + cmd->cse == 0 ? "" : cmd->cse == 1 ? "Per-NS-Exclusive" : cmd->cse == 2 ? "All-NS-Exclusive" : ""); + } + } + printf("\n"); + } + + printf("Error Log\n"); + printf("=========\n"); + for (i = 0; i <= cdata->elpe; i++) { + error_entry = &error_page[i]; + if (error_entry->error_count == 0) { + continue; + } + if (i != 0) { + printf("-----------\n"); + } + + printf("Entry: %u\n", i); + printf("Error Count: 0x%"PRIx64"\n", error_entry->error_count); + printf("Submission Queue Id: 0x%x\n", error_entry->sqid); + printf("Command Id: 0x%x\n", error_entry->cid); + printf("Phase Bit: %x\n", error_entry->status.p); + printf("Status Code: 0x%x\n", error_entry->status.sc); + printf("Status Code Type: 0x%x\n", error_entry->status.sct); + printf("Do Not Retry: %x\n", error_entry->status.dnr); + printf("Error Location: 0x%x\n", error_entry->error_location); + printf("LBA: 0x%"PRIx64"\n", error_entry->lba); + printf("Namespace: 0x%x\n", error_entry->nsid); + printf("Vendor Log Page: 0x%x\n", error_entry->vendor_specific); + + } + printf("\n"); + + if (features[SPDK_NVME_FEAT_ARBITRATION].valid) { + uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result; + unsigned ab, lpw, mpw, hpw; + + ab = arb & 0x7; + lpw = ((arb >> 8) & 0xFF) + 1; + mpw = ((arb >> 16) & 0xFF) + 1; + hpw = ((arb >> 24) & 0xFF) + 1; + + printf("Arbitration\n"); + printf("===========\n"); + printf("Arbitration Burst: "); + if (ab == 0x7) { + printf("no limit\n"); + } else { + printf("%u\n", 1u << ab); + } + printf("Low Priority Weight: %u\n", lpw); + printf("Medium Priority Weight: %u\n", mpw); + printf("High Priority Weight: %u\n", hpw); + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_POWER_MANAGEMENT].valid) { + unsigned ps = features[SPDK_NVME_FEAT_POWER_MANAGEMENT].result & 0x1F; + printf("Power Management\n"); + printf("================\n"); + printf("Number of Power States: %u\n", cdata->npss + 1); + printf("Current Power State: Power State #%u\n", ps); + for (i = 0; i <= cdata->npss; i++) { + const struct spdk_nvme_power_state *psd = &cdata->psd[i]; + printf("Power State #%u: ", i); + if (psd->mps) { + /* MP scale is 0.0001 W */ + printf("Max Power: %u.%04u W\n", + psd->mp / 10000, + psd->mp % 10000); + } else { + /* MP scale is 0.01 W */ + printf("Max Power: %3u.%02u W\n", + psd->mp / 100, + psd->mp % 100); + } + /* TODO: print other power state descriptor fields */ + } + printf("Non-Operational Permissive Mode: %s\n", + cdata->ctratt.non_operational_power_state_permissive_mode ? "Supported" : "Not Supported"); + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].valid) { + printf("Health Information\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(&health_page, sizeof(health_page)); + printf("\n"); + } + + printf("Critical Warnings:\n"); + printf(" Available Spare Space: %s\n", + health_page.critical_warning.bits.available_spare ? "WARNING" : "OK"); + printf(" Temperature: %s\n", + health_page.critical_warning.bits.temperature ? "WARNING" : "OK"); + printf(" Device Reliability: %s\n", + health_page.critical_warning.bits.device_reliability ? "WARNING" : "OK"); + printf(" Read Only: %s\n", + health_page.critical_warning.bits.read_only ? "Yes" : "No"); + printf(" Volatile Memory Backup: %s\n", + health_page.critical_warning.bits.volatile_memory_backup ? "WARNING" : "OK"); + printf("Current Temperature: %u Kelvin (%d Celsius)\n", + health_page.temperature, + (int)health_page.temperature - 273); + printf("Temperature Threshold: %u Kelvin (%d Celsius)\n", + features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result, + (int)features[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD].result - 273); + printf("Available Spare: %u%%\n", health_page.available_spare); + printf("Available Spare Threshold: %u%%\n", health_page.available_spare_threshold); + printf("Life Percentage Used: %u%%\n", health_page.percentage_used); + printf("Data Units Read: "); + print_uint128_dec(health_page.data_units_read); + printf("\n"); + printf("Data Units Written: "); + print_uint128_dec(health_page.data_units_written); + printf("\n"); + printf("Host Read Commands: "); + print_uint128_dec(health_page.host_read_commands); + printf("\n"); + printf("Host Write Commands: "); + print_uint128_dec(health_page.host_write_commands); + printf("\n"); + printf("Controller Busy Time: "); + print_uint128_dec(health_page.controller_busy_time); + printf(" minutes\n"); + printf("Power Cycles: "); + print_uint128_dec(health_page.power_cycles); + printf("\n"); + printf("Power On Hours: "); + print_uint128_dec(health_page.power_on_hours); + printf(" hours\n"); + printf("Unsafe Shutdowns: "); + print_uint128_dec(health_page.unsafe_shutdowns); + printf("\n"); + printf("Unrecoverable Media Errors: "); + print_uint128_dec(health_page.media_errors); + printf("\n"); + printf("Lifetime Error Log Entries: "); + print_uint128_dec(health_page.num_error_info_log_entries); + printf("\n"); + printf("Warning Temperature Time: %u minutes\n", health_page.warning_temp_time); + printf("Critical Temperature Time: %u minutes\n", health_page.critical_temp_time); + for (i = 0; i < 8; i++) { + if (health_page.temp_sensor[i] != 0) { + printf("Temperature Sensor %d: %u Kelvin (%d Celsius)\n", + i + 1, health_page.temp_sensor[i], + (int)health_page.temp_sensor[i] - 273); + } + } + printf("\n"); + } + + if (features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].valid) { + uint32_t result = features[SPDK_NVME_FEAT_NUMBER_OF_QUEUES].result; + + printf("Number of Queues\n"); + printf("================\n"); + printf("Number of I/O Submission Queues: %u\n", (result & 0xFFFF) + 1); + printf("Number of I/O Completion Queues: %u\n", (result & 0xFFFF0000 >> 16) + 1); + printf("\n"); + } + + if (features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].valid) { + uint32_t result = features[SPDK_OCSSD_FEAT_MEDIA_FEEDBACK].result; + + printf("OCSSD Media Feedback\n"); + printf("=======================\n"); + printf("High ECC status: %u\n", (result & 0x1)); + printf("Vector High ECC status: %u\n", (result & 0x2 >> 1)); + printf("\n"); + } + + if (cdata->hctma.bits.supported) { + printf("Host Controlled Thermal Management\n"); + printf("==================================\n"); + printf("Minimum Thermal Management Temperature: "); + if (cdata->mntmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mntmt, (int)cdata->mntmt - 273); + } else { + printf("Not Reported\n"); + } + printf("Maximum Thermal Managment Temperature: "); + if (cdata->mxtmt) { + printf("%u Kelvin (%d Celsius)\n", cdata->mxtmt, (int)cdata->mxtmt - 273); + } else { + printf("Not Reported\n"); + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_SMART)) { + size_t i = 0; + + printf("Intel Health Information\n"); + printf("==================\n"); + for (i = 0; + i < SPDK_COUNTOF(intel_smart_page.attributes); i++) { + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PROGRAM_FAIL_COUNT) { + printf("Program Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_ERASE_FAIL_COUNT) { + printf("Erase Fail Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_WEAR_LEVELING_COUNT) { + printf("Wear Leveling Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Min: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[0], 2); + printf("\n"); + printf(" Max: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[2], 2); + printf("\n"); + printf(" Avg: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[4], 2); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_E2E_ERROR_COUNT) { + printf("End to End Error Detection Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_CRC_ERROR_COUNT) { + printf("CRC Error Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_MEDIA_WEAR) { + printf("Timed Workload, Media Wear:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_READ_PERCENTAGE) { + printf("Timed Workload, Host Read/Write Ratio:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("%%"); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_TIMER) { + printf("Timed Workload, Timer:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS) { + printf("Thermal Throttle Status:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value:\n"); + printf(" Percentage: %d%%\n", intel_smart_page.attributes[i].raw_value[0]); + printf(" Throttling Event Count: "); + print_uint_var_dec(&intel_smart_page.attributes[i].raw_value[1], 4); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER) { + printf("Retry Buffer Overflow Counter:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT) { + printf("PLL Lock Loss Count:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_NAND_BYTES_WRITTEN) { + printf("NAND Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + if (intel_smart_page.attributes[i].code == SPDK_NVME_INTEL_SMART_HOST_BYTES_WRITTEN) { + printf("Host Bytes Written:\n"); + printf(" Normalized Value : %d\n", + intel_smart_page.attributes[i].normalized_value); + printf(" Current Raw Value: "); + print_uint_var_dec(intel_smart_page.attributes[i].raw_value, 6); + printf("\n"); + } + } + printf("\n"); + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_LOG_TEMPERATURE)) { + printf("Intel Temperature Information\n"); + printf("==================\n"); + printf("Current Temperature: %lu\n", intel_temperature_page.current_temperature); + printf("Overtemp shutdown Flag for last critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_last); + printf("Overtemp shutdown Flag for life critical component temperature: %lu\n", + intel_temperature_page.shutdown_flag_life); + printf("Highest temperature: %lu\n", intel_temperature_page.highest_temperature); + printf("Lowest temperature: %lu\n", intel_temperature_page.lowest_temperature); + printf("Specified Maximum Operating Temperature: %lu\n", + intel_temperature_page.specified_max_op_temperature); + printf("Specified Minimum Operating Temperature: %lu\n", + intel_temperature_page.specified_min_op_temperature); + printf("Estimated offset: %ld\n", intel_temperature_page.estimated_offset); + printf("\n"); + printf("\n"); + + } + + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr, SPDK_NVME_INTEL_MARKETING_DESCRIPTION)) { + printf("Intel Marketing Information\n"); + printf("==================\n"); + snprintf(str, sizeof(intel_md_page.marketing_product), "%s", intel_md_page.marketing_product); + printf("Marketing Product Information: %s\n", str); + printf("\n"); + printf("\n"); + } + + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + print_namespace(spdk_nvme_ctrlr_get_ns(ctrlr, nsid)); + } + + if (g_discovery_page) { + printf("Discovery Log Page\n"); + printf("==================\n"); + + if (g_hex_dump) { + hex_dump(g_discovery_page, g_discovery_page_size); + printf("\n"); + } + + printf("Generation Counter: %" PRIu64 "\n", + from_le64(&g_discovery_page->genctr)); + printf("Number of Records: %" PRIu64 "\n", + from_le64(&g_discovery_page->numrec)); + printf("Record Format: %" PRIu16 "\n", + from_le16(&g_discovery_page->recfmt)); + printf("\n"); + + for (i = 0; i < g_discovery_page_numrec; i++) { + struct spdk_nvmf_discovery_log_page_entry *entry = &g_discovery_page->entries[i]; + + printf("Discovery Log Entry %u\n", i); + printf("----------------------\n"); + printf("Transport Type: %u (%s)\n", + entry->trtype, spdk_nvme_transport_id_trtype_str(entry->trtype)); + printf("Address Family: %u (%s)\n", + entry->adrfam, spdk_nvme_transport_id_adrfam_str(entry->adrfam)); + printf("Subsystem Type: %u (%s)\n", + entry->subtype, + entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY ? "Discovery Service" : + entry->subtype == SPDK_NVMF_SUBTYPE_NVME ? "NVM Subsystem" : + "Unknown"); + printf("Transport Requirements:\n"); + printf(" Secure Channel: %s\n", + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED ? "Not Specified" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_REQUIRED ? "Required" : + entry->treq.secure_channel == SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED ? "Not Required" : + "Reserved"); + printf("Port ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->portid), from_le16(&entry->portid)); + printf("Controller ID: %" PRIu16 " (0x%04" PRIx16 ")\n", + from_le16(&entry->cntlid), from_le16(&entry->cntlid)); + printf("Admin Max SQ Size: %" PRIu16 "\n", + from_le16(&entry->asqsz)); + snprintf(str, sizeof(entry->trsvcid) + 1, "%s", entry->trsvcid); + printf("Transport Service Identifier: %s\n", str); + snprintf(str, sizeof(entry->subnqn) + 1, "%s", entry->subnqn); + printf("NVM Subsystem Qualified Name: %s\n", str); + snprintf(str, sizeof(entry->traddr) + 1, "%s", entry->traddr); + printf("Transport Address: %s\n", str); + + if (entry->trtype == SPDK_NVMF_TRTYPE_RDMA) { + printf("Transport Specific Address Subtype - RDMA\n"); + printf(" RDMA QP Service Type: %u (%s)\n", + entry->tsas.rdma.rdma_qptype, + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED ? "Reliable Connected" : + entry->tsas.rdma.rdma_qptype == SPDK_NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM ? "Reliable Datagram" : + "Unknown"); + printf(" RDMA Provider Type: %u (%s)\n", + entry->tsas.rdma.rdma_prtype, + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_NONE ? "No provider specified" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IB ? "InfiniBand" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE ? "InfiniBand RoCE" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_ROCE2 ? "InfiniBand RoCE v2" : + entry->tsas.rdma.rdma_prtype == SPDK_NVMF_RDMA_PRTYPE_IWARP ? "iWARP" : + "Unknown"); + printf(" RDMA CM Service: %u (%s)\n", + entry->tsas.rdma.rdma_cms, + entry->tsas.rdma.rdma_cms == SPDK_NVMF_RDMA_CMS_RDMA_CM ? "RDMA_CM" : + "Unknown"); + if (entry->adrfam == SPDK_NVMF_ADRFAM_IB) { + printf(" RDMA Partition Key: %" PRIu32 "\n", + from_le32(&entry->tsas.rdma.rdma_pkey)); + } + } + } + free(g_discovery_page); + g_discovery_page = NULL; + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -r trid remote NVMe over Fabrics target address\n"); + printf(" Format: 'key:value [key:value] ...'\n"); + printf(" Keys:\n"); + printf(" trtype Transport type (e.g. RDMA)\n"); + printf(" adrfam Address family (e.g. IPv4, IPv6)\n"); + printf(" traddr Transport address (e.g. 192.168.100.8)\n"); + printf(" trsvcid Transport service identifier (e.g. 4420)\n"); + printf(" subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf(" Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n"); + + spdk_tracelog_usage(stdout, "-L"); + + printf(" -i shared memory group ID\n"); + printf(" -p core number in decimal to run this application which started from 0\n"); + printf(" -d DPDK huge memory size in MB\n"); + printf(" -x print hex dump of raw data\n"); + printf(" -v verbose (enable warnings)\n"); + printf(" -H show this usage\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op, rc; + + g_trid.trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + while ((op = getopt(argc, argv, "d:i:p:r:xHL:")) != -1) { + switch (op) { + case 'd': + g_dpdk_mem = atoi(optarg); + break; + case 'i': + g_shm_id = atoi(optarg); + break; + case 'p': + g_master_core = atoi(optarg); + if (g_master_core < 0) { + fprintf(stderr, "Invalid core number\n"); + return 1; + } + snprintf(g_core_mask, sizeof(g_core_mask), "0x%llx", 1ULL << g_master_core); + break; + case 'r': + if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { + fprintf(stderr, "Error parsing transport address\n"); + return 1; + } + break; + case 'x': + g_hex_dump = true; + break; + case 'L': + rc = spdk_log_set_trace_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -L flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + + case 'H': + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + g_controllers_found++; + print_controller(ctrlr, trid); + spdk_nvme_detach(ctrlr); +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + struct spdk_nvme_ctrlr *ctrlr; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "identify"; + opts.shm_id = g_shm_id; + opts.mem_size = g_dpdk_mem; + opts.mem_channel = 1; + opts.master_core = g_master_core; + opts.core_mask = g_core_mask; + if (g_trid.trtype != SPDK_NVME_TRANSPORT_PCIE) { + opts.no_pci = true; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + /* A specific trid is required. */ + if (strlen(g_trid.traddr) != 0) { + ctrlr = spdk_nvme_connect(&g_trid, NULL, 0); + if (!ctrlr) { + fprintf(stderr, "spdk_nvme_connect() failed\n"); + return 1; + } + + g_controllers_found++; + print_controller(ctrlr, &g_trid); + spdk_nvme_detach(ctrlr); + } else if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_controllers_found == 0) { + fprintf(stderr, "No NVMe controllers found.\n"); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/nvme_manage/.gitignore b/src/spdk/examples/nvme/nvme_manage/.gitignore new file mode 100644 index 00000000..cdc78a1a --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/.gitignore @@ -0,0 +1 @@ +nvme_manage diff --git a/src/spdk/examples/nvme/nvme_manage/Makefile b/src/spdk/examples/nvme/nvme_manage/Makefile new file mode 100644 index 00000000..4f49872e --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = nvme_manage + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/nvme_manage/nvme_manage.c b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c new file mode 100644 index 00000000..360cbaac --- /dev/null +++ b/src/spdk/examples/nvme/nvme_manage/nvme_manage.c @@ -0,0 +1,970 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/util.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns_data *common_ns_data; + int outstanding_admin_cmds; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; +static int g_shm_id = -1; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +enum controller_display_model { + CONTROLLER_DISPLAY_ALL = 0x0, + CONTROLLER_DISPLAY_SIMPLISTIC = 0x1, +}; + +static int +cmp_devs(const void *ap, const void *bp) +{ + const struct dev *a = ap, *b = bp; + + return spdk_pci_addr_compare(&a->pci_addr, &b->pci_addr); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +identify_common_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + if (cpl->status.sc != SPDK_NVME_SC_SUCCESS) { + /* Identify Namespace for NSID = FFFFFFFFh is optional, so failure is not fatal. */ + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + dev->outstanding_admin_cmds--; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + struct spdk_nvme_cmd cmd; + + /* add to dev list */ + dev = &devs[num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; + + /* Retrieve controller data */ + dev->cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + dev->common_ns_data = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (dev->common_ns_data == NULL) { + fprintf(stderr, "common_ns_data allocation failure\n"); + return; + } + + /* Identify Namespace with NSID set to FFFFFFFFh to get common namespace capabilities. */ + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10 = 0; /* CNS = 0 (Identify Namespace) */ + cmd.nsid = SPDK_NVME_GLOBAL_NS_TAG; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, dev->common_ns_data, + sizeof(struct spdk_nvme_ns_data), identify_common_ns_cb, dev) != 0) { + dev->outstanding_admin_cmds--; + spdk_dma_free(dev->common_ns_data); + dev->common_ns_data = NULL; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void usage(void) +{ + printf("NVMe Management Options"); + printf("\n"); + printf("\t[1: list controllers]\n"); + printf("\t[2: create namespace]\n"); + printf("\t[3: delete namespace]\n"); + printf("\t[4: attach namespace to controller]\n"); + printf("\t[5: detach namespace from controller]\n"); + printf("\t[6: format namespace or controller]\n"); + printf("\t[7: firmware update]\n"); + printf("\t[8: quit]\n"); +} + +static void +display_namespace_dpc(const struct spdk_nvme_ns_data *nsdata) +{ + if (nsdata->dpc.pit1 || nsdata->dpc.pit2 || nsdata->dpc.pit3) { + if (nsdata->dpc.pit1) { + printf("PIT1 "); + } + + if (nsdata->dpc.pit2) { + printf("PIT2 "); + } + + if (nsdata->dpc.pit3) { + printf("PIT3 "); + } + } else { + printf("Not Supported\n"); + return; + } + + if (nsdata->dpc.md_start && nsdata->dpc.md_end) { + printf("Location: Head or Tail\n"); + } else if (nsdata->dpc.md_start) { + printf("Location: Head\n"); + } else if (nsdata->dpc.md_end) { + printf("Location: Tail\n"); + } else { + printf("Not Supported\n"); + } +} + +static void +display_namespace(struct spdk_nvme_ns *ns) +{ + const struct spdk_nvme_ns_data *nsdata; + uint32_t i; + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Namespace ID:%d\n", spdk_nvme_ns_get_id(ns)); + + printf("Size (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nsze, + (long long)nsdata->nsze / 1024 / 1024); + printf("Capacity (in LBAs): %lld (%lldM)\n", + (long long)nsdata->ncap, + (long long)nsdata->ncap / 1024 / 1024); + printf("Utilization (in LBAs): %lld (%lldM)\n", + (long long)nsdata->nuse, + (long long)nsdata->nuse / 1024 / 1024); + printf("Format Progress Indicator: %s\n", + nsdata->fpi.fpi_supported ? "Supported" : "Not Supported"); + if (nsdata->fpi.fpi_supported && nsdata->fpi.percentage_remaining) { + printf("Formatted Percentage: %d%%\n", 100 - nsdata->fpi.percentage_remaining); + } + printf("Number of LBA Formats: %d\n", nsdata->nlbaf + 1); + printf("Current LBA Format: LBA Format #%02d\n", + nsdata->flbas.format); + for (i = 0; i <= nsdata->nlbaf; i++) + printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", + i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + printf("Data Protection Capabilities:"); + display_namespace_dpc(nsdata); + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE == nsdata->dps.pit) { + printf("Data Protection Setting: N/A\n"); + } else { + printf("Data Protection Setting: PIT%d Location: %s\n", + nsdata->dps.pit, nsdata->dps.md_start ? "Head" : "Tail"); + } + printf("Multipath IO and Sharing: %s\n", + nsdata->nmic.can_share ? "Supported" : "Not Supported"); + printf("\n"); +} + +static void +display_controller(struct dev *dev, int model) +{ + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ctrlr_data *cdata; + uint8_t str[128]; + uint32_t i; + + cdata = spdk_nvme_ctrlr_get_data(dev->ctrlr); + + if (model == CONTROLLER_DISPLAY_SIMPLISTIC) { + printf("%04x:%02x:%02x.%02x ", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("%-40.40s %-20.20s ", + cdata->mn, cdata->sn); + printf("%5d ", cdata->cntlid); + printf("\n"); + return; + } + + printf("=====================================================\n"); + printf("NVMe Controller: %04x:%02x:%02x.%02x\n", + dev->pci_addr.domain, dev->pci_addr.bus, dev->pci_addr.dev, dev->pci_addr.func); + printf("============================\n"); + printf("Controller Capabilities/Features\n"); + printf("Controller ID: %d\n", cdata->cntlid); + snprintf(str, sizeof(cdata->sn) + 1, "%s", cdata->sn); + printf("Serial Number: %s\n", str); + printf("\n"); + + printf("Admin Command Set Attributes\n"); + printf("============================\n"); + printf("Namespace Manage And Attach: %s\n", + cdata->oacs.ns_manage ? "Supported" : "Not Supported"); + printf("Namespace Format: %s\n", + cdata->oacs.format ? "Supported" : "Not Supported"); + printf("\n"); + printf("NVM Command Set Attributes\n"); + printf("============================\n"); + if (cdata->fna.format_all_ns) { + printf("Namespace format operation applies to all namespaces\n"); + } else { + printf("Namespace format operation applies to per namespace\n"); + } + printf("\n"); + printf("Namespace Attributes\n"); + printf("============================\n"); + for (i = 1; i <= spdk_nvme_ctrlr_get_num_ns(dev->ctrlr); i++) { + ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, i); + if (ns == NULL) { + continue; + } + display_namespace(ns); + } +} + +static void +display_controller_list(void) +{ + struct dev *iter; + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_ALL); + } +} + +static char * +get_line(char *buf, int buf_size, FILE *f) +{ + char *ret; + size_t len; + + ret = fgets(buf, buf_size, f); + if (ret == NULL) { + return NULL; + } + + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') { + buf[len - 1] = '\0'; + } + return buf; +} + +static struct dev * +get_controller(void) +{ + struct spdk_pci_addr pci_addr; + char address[64]; + char *p; + int ch; + struct dev *iter; + + memset(address, 0, sizeof(address)); + + foreach_dev(iter) { + display_controller(iter, CONTROLLER_DISPLAY_SIMPLISTIC); + } + + printf("Please Input PCI Address(domain:bus:dev.func):\n"); + + while ((ch = getchar()) != '\n' && ch != EOF); + p = get_line(address, 64, stdin); + if (p == NULL) { + return NULL; + } + + while (isspace(*p)) { + p++; + } + + if (spdk_pci_addr_parse(&pci_addr, p) < 0) { + return NULL; + } + + foreach_dev(iter) { + if (spdk_pci_addr_compare(&pci_addr, &iter->pci_addr) == 0) { + return iter; + } + } + return NULL; +} + +static int +get_lba_format(const struct spdk_nvme_ns_data *ns_data) +{ + int lbaf, i; + + printf("\nSupported LBA formats:\n"); + for (i = 0; i <= ns_data->nlbaf; i++) { + printf("%2d: %d data bytes", i, 1 << ns_data->lbaf[i].lbads); + if (ns_data->lbaf[i].ms) { + printf(" + %d metadata bytes", ns_data->lbaf[i].ms); + } + printf("\n"); + } + + printf("Please input LBA format index (0 - %d):\n", ns_data->nlbaf); + if (scanf("%d", &lbaf) != 1 || lbaf > ns_data->nlbaf) { + return -1; + } + + return lbaf; +} + +static void +identify_allocated_ns_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + dev->outstanding_admin_cmds--; +} + +static uint32_t +get_allocated_nsid(struct dev *dev) +{ + uint32_t nsid; + size_t i; + struct spdk_nvme_ns_list *ns_list; + struct spdk_nvme_cmd cmd = {0}; + + ns_list = spdk_dma_zmalloc(sizeof(*ns_list), 4096, NULL); + if (ns_list == NULL) { + printf("Allocation error\n"); + return 0; + } + + cmd.opc = SPDK_NVME_OPC_IDENTIFY; + cmd.cdw10 = SPDK_NVME_IDENTIFY_ALLOCATED_NS_LIST; + cmd.nsid = 0; + + dev->outstanding_admin_cmds++; + if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, ns_list, sizeof(*ns_list), + identify_allocated_ns_cb, dev)) { + printf("Identify command failed\n"); + spdk_dma_free(ns_list); + return 0; + } + + while (dev->outstanding_admin_cmds) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + + printf("Allocated Namespace IDs:\n"); + for (i = 0; i < SPDK_COUNTOF(ns_list->ns_list); i++) { + if (ns_list->ns_list[i] == 0) { + break; + } + printf("%u\n", ns_list->ns_list[i]); + } + + spdk_dma_free(ns_list); + + printf("Please Input Namespace ID:\n"); + if (!scanf("%u", &nsid)) { + printf("Invalid Namespace ID\n"); + nsid = 0; + } + + return nsid; +} + +static void +ns_attach(struct dev *device, int attachment_op, int ctrlr_id, int ns_id) +{ + int ret = 0; + struct spdk_nvme_ctrlr_list *ctrlr_list; + + ctrlr_list = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_list), + 4096, NULL); + if (ctrlr_list == NULL) { + printf("Allocation error (controller list)\n"); + exit(1); + } + + ctrlr_list->ctrlr_count = 1; + ctrlr_list->ctrlr_list[0] = ctrlr_id; + + if (attachment_op == SPDK_NVME_NS_CTRLR_ATTACH) { + ret = spdk_nvme_ctrlr_attach_ns(device->ctrlr, ns_id, ctrlr_list); + } else if (attachment_op == SPDK_NVME_NS_CTRLR_DETACH) { + ret = spdk_nvme_ctrlr_detach_ns(device->ctrlr, ns_id, ctrlr_list); + } + + if (ret) { + fprintf(stdout, "ns attach: Failed\n"); + } + + spdk_dma_free(ctrlr_list); +} + +static void +ns_manage_add(struct dev *device, uint64_t ns_size, uint64_t ns_capacity, int ns_lbasize, + uint8_t ns_dps_type, uint8_t ns_dps_location, uint8_t ns_nmic) +{ + uint32_t nsid; + struct spdk_nvme_ns_data *ndata; + + ndata = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ns_data), 4096, NULL); + if (ndata == NULL) { + printf("Allocation error (namespace data)\n"); + exit(1); + } + + ndata->nsze = ns_size; + ndata->ncap = ns_capacity; + ndata->flbas.format = ns_lbasize; + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + ndata->dps.pit = ns_dps_type; + ndata->dps.md_start = ns_dps_location; + } + ndata->nmic.can_share = ns_nmic; + nsid = spdk_nvme_ctrlr_create_ns(device->ctrlr, ndata); + if (nsid == 0) { + fprintf(stdout, "ns manage: Failed\n"); + } else { + printf("Created namespace ID %u\n", nsid); + } + + spdk_dma_free(ndata); +} + +static void +ns_manage_delete(struct dev *device, int ns_id) +{ + int ret = 0; + + ret = spdk_nvme_ctrlr_delete_ns(device->ctrlr, ns_id); + if (ret) { + fprintf(stdout, "ns manage: Failed\n"); + return; + } +} + +static void +nvme_manage_format(struct dev *device, int ns_id, int ses, int pi, int pil, int ms, int lbaf) +{ + int ret = 0; + struct spdk_nvme_format format = {}; + + format.lbaf = lbaf; + format.ms = ms; + format.pi = pi; + format.pil = pil; + format.ses = ses; + ret = spdk_nvme_ctrlr_format(device->ctrlr, ns_id, &format); + if (ret) { + fprintf(stdout, "nvme format: Failed\n"); + return; + } +} + +static void +attach_and_detach_ns(int attachment_op) +{ + uint32_t nsid; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + nsid = get_allocated_nsid(ctrlr); + if (nsid == 0) { + printf("Invalid Namespace ID\n"); + return; + } + + ns_attach(ctrlr, attachment_op, ctrlr->cdata->cntlid, nsid); +} + +static void +add_ns(void) +{ + uint64_t ns_size = 0; + uint64_t ns_capacity = 0; + int ns_lbasize; + int ns_dps_type = 0; + int ns_dps_location = 0; + int ns_nmic = 0; + struct dev *ctrlr = NULL; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + if (!ctrlr->common_ns_data) { + printf("Controller did not return common namespace capabilities\n"); + return; + } + + ns_lbasize = get_lba_format(ctrlr->common_ns_data); + if (ns_lbasize < 0) { + printf("Invalid LBA format number\n"); + return; + } + + printf("Please Input Namespace Size (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_size)) { + printf("Invalid Namespace Size\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Namespace Capacity (in LBAs):\n"); + if (!scanf("%" SCNu64, &ns_capacity)) { + printf("Invalid Namespace Capacity\n"); + while (getchar() != '\n'); + return; + } + + printf("Please Input Data Protection Type (0 - 3):\n"); + if (!scanf("%d", &ns_dps_type)) { + printf("Invalid Data Protection Type\n"); + while (getchar() != '\n'); + return; + } + + if (SPDK_NVME_FMT_NVM_PROTECTION_DISABLE != ns_dps_type) { + printf("Please Input Data Protection Location (1: Head; 0: Tail):\n"); + if (!scanf("%d", &ns_dps_location)) { + printf("Invalid Data Protection Location\n"); + while (getchar() != '\n'); + return; + } + } + + printf("Please Input Multi-path IO and Sharing Capabilities (1: Share; 0: Private):\n"); + if (!scanf("%d", &ns_nmic)) { + printf("Invalid Multi-path IO and Sharing Capabilities\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_add(ctrlr, ns_size, ns_capacity, ns_lbasize, + ns_dps_type, ns_dps_location, ns_nmic); +} + +static void +delete_ns(void) +{ + int ns_id; + struct dev *ctrlr; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI Address.\n"); + return; + } + + if (!ctrlr->cdata->oacs.ns_manage) { + printf("Controller does not support ns management\n"); + return; + } + + printf("Please Input Namespace ID:\n"); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + + ns_manage_delete(ctrlr, ns_id); +} + +static void +format_nvm(void) +{ + int ns_id; + int ses; + int pil; + int pi; + int ms; + int lbaf; + char option; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ns_data *nsdata; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.format) { + printf("Controller does not support Format NVM command\n"); + return; + } + + if (cdata->fna.format_all_ns) { + ns_id = SPDK_NVME_GLOBAL_NS_TAG; + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, 1); + } else { + printf("Please Input Namespace ID (1 - %d):\n", cdata->nn); + if (!scanf("%d", &ns_id)) { + printf("Invalid Namespace ID\n"); + while (getchar() != '\n'); + return; + } + ns = spdk_nvme_ctrlr_get_ns(ctrlr->ctrlr, ns_id); + } + + if (ns == NULL) { + printf("Namespace ID %d not found\n", ns_id); + while (getchar() != '\n'); + return; + } + + nsdata = spdk_nvme_ns_get_data(ns); + + printf("Please Input Secure Erase Setting:\n"); + printf(" 0: No secure erase operation requested\n"); + printf(" 1: User data erase\n"); + if (cdata->fna.crypto_erase_supported) { + printf(" 2: Cryptographic erase\n"); + } + if (!scanf("%d", &ses)) { + printf("Invalid Secure Erase Setting\n"); + while (getchar() != '\n'); + return; + } + + lbaf = get_lba_format(nsdata); + if (lbaf < 0) { + printf("Invalid LBA format number\n"); + return; + } + + if (nsdata->lbaf[lbaf].ms) { + printf("Please Input Protection Information:\n"); + printf(" 0: Protection information is not enabled\n"); + printf(" 1: Protection information is enabled, Type 1\n"); + printf(" 2: Protection information is enabled, Type 2\n"); + printf(" 3: Protection information is enabled, Type 3\n"); + if (!scanf("%d", &pi)) { + printf("Invalid protection information\n"); + while (getchar() != '\n'); + return; + } + + if (pi) { + printf("Please Input Protection Information Location:\n"); + printf(" 0: Protection information transferred as the last eight bytes of metadata\n"); + printf(" 1: Protection information transferred as the first eight bytes of metadata\n"); + if (!scanf("%d", &pil)) { + printf("Invalid protection information location\n"); + while (getchar() != '\n'); + return; + } + } else { + pil = 0; + } + + printf("Please Input Metadata Setting:\n"); + printf(" 0: Metadata is transferred as part of a separate buffer\n"); + printf(" 1: Metadata is transferred as part of an extended data LBA\n"); + if (!scanf("%d", &ms)) { + printf("Invalid metadata setting\n"); + while (getchar() != '\n'); + return; + } + } else { + ms = 0; + pi = 0; + pil = 0; + } + + printf("Warning: use this utility at your own risk.\n" + "This command will format your namespace and all data will be lost.\n" + "This command may take several minutes to complete,\n" + "so do not interrupt the utility until it completes.\n" + "Press 'Y' to continue with the format operation.\n"); + + while (getchar() != '\n'); + if (!scanf("%c", &option)) { + printf("Invalid option\n"); + while (getchar() != '\n'); + return; + } + + if (option == 'y' || option == 'Y') { + nvme_manage_format(ctrlr, ns_id, ses, pi, pil, ms, lbaf); + } else { + printf("NVMe format abort\n"); + } +} + +static void +update_firmware_image(void) +{ + int rc; + int fd = -1; + int slot; + unsigned int size; + struct stat fw_stat; + char path[256]; + void *fw_image; + struct dev *ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + enum spdk_nvme_fw_commit_action commit_action; + struct spdk_nvme_status status; + + ctrlr = get_controller(); + if (ctrlr == NULL) { + printf("Invalid controller PCI BDF.\n"); + return; + } + + cdata = ctrlr->cdata; + + if (!cdata->oacs.firmware) { + printf("Controller does not support firmware download and commit command\n"); + return; + } + + printf("Please Input The Path Of Firmware Image\n"); + + if (get_line(path, sizeof(path), stdin) == NULL) { + printf("Invalid path setting\n"); + while (getchar() != '\n'); + return; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("Open file failed"); + return; + } + rc = fstat(fd, &fw_stat); + if (rc < 0) { + printf("Fstat failed\n"); + close(fd); + return; + } + + if (fw_stat.st_size % 4) { + printf("Firmware image size is not multiple of 4\n"); + close(fd); + return; + } + + size = fw_stat.st_size; + + fw_image = spdk_dma_zmalloc(size, 4096, NULL); + if (fw_image == NULL) { + printf("Allocation error\n"); + close(fd); + return; + } + + if (read(fd, fw_image, size) != ((ssize_t)(size))) { + printf("Read firmware image failed\n"); + close(fd); + spdk_dma_free(fw_image); + return; + } + close(fd); + + printf("Please Input Slot(0 - 7):\n"); + if (!scanf("%d", &slot)) { + printf("Invalid Slot\n"); + spdk_dma_free(fw_image); + while (getchar() != '\n'); + return; + } + + commit_action = SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG; + rc = spdk_nvme_ctrlr_update_firmware(ctrlr->ctrlr, fw_image, size, slot, commit_action, &status); + if (rc == -ENXIO && status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC && + status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) { + printf("conventional reset is needed to enable firmware !\n"); + } else if (rc) { + printf("spdk_nvme_ctrlr_update_firmware failed\n"); + } else { + printf("spdk_nvme_ctrlr_update_firmware success\n"); + } + spdk_dma_free(fw_image); +} + +static void +args_usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -i shared memory group ID\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "i:")) != -1) { + switch (op) { + case 'i': + g_shm_id = atoi(optarg); + break; + default: + args_usage(argv[0]); + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int i, rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "nvme_manage"; + opts.core_mask = "0x1"; + opts.shm_id = g_shm_id; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + qsort(devs, num_devs, sizeof(devs[0]), cmp_devs); + + usage(); + + while (1) { + int cmd; + bool exit_flag = false; + + if (!scanf("%d", &cmd)) { + printf("Invalid Command: command must be number 1-8\n"); + while (getchar() != '\n'); + usage(); + continue; + } + switch (cmd) { + case 1: + display_controller_list(); + break; + case 2: + add_ns(); + break; + case 3: + delete_ns(); + break; + case 4: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_ATTACH); + break; + case 5: + attach_and_detach_ns(SPDK_NVME_NS_CTRLR_DETACH); + break; + case 6: + format_nvm(); + break; + case 7: + update_firmware_image(); + break; + case 8: + exit_flag = true; + break; + default: + printf("Invalid Command\n"); + break; + } + + if (exit_flag) { + break; + } + + while (getchar() != '\n'); + printf("press Enter to display cmd menu ...\n"); + while (getchar() != '\n'); + usage(); + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return 0; +} diff --git a/src/spdk/examples/nvme/perf/.gitignore b/src/spdk/examples/nvme/perf/.gitignore new file mode 100644 index 00000000..bd14107d --- /dev/null +++ b/src/spdk/examples/nvme/perf/.gitignore @@ -0,0 +1 @@ +perf diff --git a/src/spdk/examples/nvme/perf/Makefile b/src/spdk/examples/nvme/perf/Makefile new file mode 100644 index 00000000..573f56a0 --- /dev/null +++ b/src/spdk/examples/nvme/perf/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = perf + +ifeq ($(OS),Linux) +SYS_LIBS += -laio +CFLAGS += -DHAVE_LIBAIO +endif + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/perf/README.md b/src/spdk/examples/nvme/perf/README.md new file mode 100644 index 00000000..e5ec38d1 --- /dev/null +++ b/src/spdk/examples/nvme/perf/README.md @@ -0,0 +1,5 @@ +# Compiling perf on FreeBSD + +To use perf test on FreeBSD over NVMe-oF, explicitly link userspace library of HBA. For example, on a setup with Mellanox HBA, + + LIBS += -lmlx5 diff --git a/src/spdk/examples/nvme/perf/perf.c b/src/spdk/examples/nvme/perf/perf.c new file mode 100644 index 00000000..f8f4d75e --- /dev/null +++ b/src/spdk/examples/nvme/perf/perf.c @@ -0,0 +1,1726 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/env.h" +#include "spdk/fd.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" +#include "spdk/histogram_data.h" +#include "spdk/endian.h" +#include "spdk/crc16.h" + +#if HAVE_LIBAIO +#include <libaio.h> +#endif + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_intel_rw_latency_page *latency_page; + struct ctrlr_entry *next; + char name[1024]; +}; + +enum entry_type { + ENTRY_TYPE_NVME_NS, + ENTRY_TYPE_AIO_FILE, +}; + +struct ns_entry { + enum entry_type type; + + union { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + } nvme; +#if HAVE_LIBAIO + struct { + int fd; + } aio; +#endif + } u; + + struct ns_entry *next; + uint32_t io_size_blocks; + uint32_t num_io_requests; + uint64_t size_in_ios; + uint32_t io_flags; + uint16_t apptag_mask; + uint16_t apptag; + char name[1024]; + const struct spdk_nvme_ns_data *nsdata; +}; + +static const double g_latency_cutoffs[] = { + 0.01, + 0.10, + 0.25, + 0.50, + 0.75, + 0.90, + 0.95, + 0.98, + 0.99, + 0.995, + 0.999, + 0.9999, + 0.99999, + 0.999999, + 0.9999999, + -1, +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + uint64_t io_completed; + uint64_t total_tsc; + uint64_t min_tsc; + uint64_t max_tsc; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + + union { + struct { + struct spdk_nvme_qpair *qpair; + } nvme; + +#if HAVE_LIBAIO + struct { + struct io_event *events; + io_context_t ctx; + } aio; +#endif + } u; + + struct ns_worker_ctx *next; + + struct spdk_histogram_data *histogram; +}; + +struct perf_task { + struct ns_worker_ctx *ns_ctx; + void *buf; + uint64_t submit_tsc; + uint16_t appmask; + uint16_t apptag; + uint64_t lba; + bool is_read; +#if HAVE_LIBAIO + struct iocb iocb; +#endif +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + struct worker_thread *next; + unsigned lcore; +}; + +static int g_outstanding_commands; + +static bool g_latency_ssd_tracking_enable = false; +static int g_latency_sw_tracking_level = 0; + +static struct ctrlr_entry *g_controllers = NULL; +static int g_controllers_found = 0; +static struct ns_entry *g_namespaces = NULL; +static int g_num_namespaces = 0; +static struct worker_thread *g_workers = NULL; +static int g_num_workers = 0; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_align = 0x200; +static uint32_t g_io_size_bytes; +static uint32_t g_max_io_md_size; +static uint32_t g_max_io_size_blocks; +static uint32_t g_metacfg_pract_flag; +static uint32_t g_metacfg_prchk_flags; +static int g_rw_percentage; +static int g_is_random; +static int g_queue_depth; +static int g_time_in_sec; +static uint32_t g_max_completions; +static int g_dpdk_mem; +static int g_shm_id = -1; +static uint32_t g_disable_sq_cmb; +static bool g_no_pci; +static bool g_warn; + +static const char *g_core_mask; + +struct trid_entry { + struct spdk_nvme_transport_id trid; + uint16_t nsid; + TAILQ_ENTRY(trid_entry) tailq; +}; + +static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list); + +static int g_aio_optind; /* Index of first AIO filename in argv */ + +static void +task_complete(struct perf_task *task); + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + uint32_t max_xfer_size, entries; + struct spdk_nvme_io_qpair_opts opts; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + g_warn = true; + return; + } + + if (spdk_nvme_ns_get_size(ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(ns) > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + g_warn = true; + return; + } + + max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns); + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + /* NVMe driver may add additional entries based on + * stripe size and maximum transfer size, we assume + * 1 more entry be used for stripe. + */ + entries = (g_io_size_bytes - 1) / max_xfer_size + 2; + if ((g_queue_depth * entries) > opts.io_queue_size) { + printf("controller IO queue size %u less than required\n", + opts.io_queue_size); + printf("Consider using lower queue depth or small IO size because " + "IO requests may be queued at the NVMe driver.\n"); + g_warn = true; + } + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->type = ENTRY_TYPE_NVME_NS; + entry->u.nvme.ctrlr = ctrlr; + entry->u.nvme.ns = ns; + entry->num_io_requests = entries; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / + g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + entry->io_flags = g_metacfg_pract_flag | g_metacfg_prchk_flags; + } + + if (g_max_io_md_size < spdk_nvme_ns_get_md_size(ns)) { + g_max_io_md_size = spdk_nvme_ns_get_md_size(ns); + } + + if (g_max_io_size_blocks < entry->io_size_blocks) { + g_max_io_size_blocks = entry->io_size_blocks; + } + + entry->nsdata = spdk_nvme_ns_get_data(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +unregister_namespaces(void) +{ + struct ns_entry *entry = g_namespaces; + + while (entry) { + struct ns_entry *next = entry->next; + free(entry); + entry = next; + } +} + +static void +enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + printf("enable_latency_tracking_complete failed\n"); + } + g_outstanding_commands--; +} + +static void +set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable) +{ + int res; + union spdk_nvme_intel_feat_latency_tracking latency_tracking; + + if (enable) { + latency_tracking.bits.enable = 0x01; + } else { + latency_tracking.bits.enable = 0x00; + } + + res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING, + latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL); + if (res) { + printf("fail to allocate nvme request.\n"); + return; + } + g_outstanding_commands++; + + while (g_outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry) +{ + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + uint32_t nsid; + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + entry->latency_page = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_rw_latency_page), + 4096, NULL); + if (entry->latency_page == NULL) { + printf("Allocation error (latency page)\n"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(ctrlr, true); + } + + if (trid_entry->nsid == 0) { + for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } + } else { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid); + if (!ns) { + perror("Namespace does not exist."); + exit(1); + } + + register_ns(ctrlr, ns); + } + +} + +#if HAVE_LIBAIO +static int +register_aio_file(const char *path) +{ + struct ns_entry *entry; + + int flags, fd; + uint64_t size; + uint32_t blklen; + + if (g_rw_percentage == 100) { + flags = O_RDONLY; + } else if (g_rw_percentage == 0) { + flags = O_WRONLY; + } else { + flags = O_RDWR; + } + + flags |= O_DIRECT; + + fd = open(path, flags); + if (fd < 0) { + fprintf(stderr, "Could not open AIO device %s: %s\n", path, strerror(errno)); + return -1; + } + + size = spdk_fd_get_size(fd); + if (size == 0) { + fprintf(stderr, "Could not determine size of AIO device %s\n", path); + close(fd); + return -1; + } + + blklen = spdk_fd_get_blocklen(fd); + if (blklen == 0) { + fprintf(stderr, "Could not determine block size of AIO device %s\n", path); + close(fd); + return -1; + } + + /* + * TODO: This should really calculate the LCM of the current g_io_align and blklen. + * For now, it's fairly safe to just assume all block sizes are powers of 2. + */ + if (g_io_align < blklen) { + g_io_align = blklen; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + close(fd); + perror("aio ns_entry malloc"); + return -1; + } + + entry->type = ENTRY_TYPE_AIO_FILE; + entry->u.aio.fd = fd; + entry->size_in_ios = size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / blklen; + + snprintf(entry->name, sizeof(entry->name), "%s", path); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; + + return 0; +} + +static int +aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, void *buf, + unsigned long nbytes, uint64_t offset, void *cb_ctx) +{ + iocb->aio_fildes = fd; + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = cmd; + iocb->u.c.buf = buf; + iocb->u.c.nbytes = nbytes; + iocb->u.c.offset = offset; + iocb->data = cb_ctx; + + if (io_submit(aio_ctx, 1, &iocb) < 0) { + printf("io_submit"); + return -1; + } + + return 0; +} + +static void +aio_check_io(struct ns_worker_ctx *ns_ctx) +{ + int count, i; + struct timespec timeout; + + timeout.tv_sec = 0; + timeout.tv_nsec = 0; + + count = io_getevents(ns_ctx->u.aio.ctx, 1, g_queue_depth, ns_ctx->u.aio.events, &timeout); + if (count < 0) { + fprintf(stderr, "io_getevents error\n"); + exit(1); + } + + for (i = 0; i < count; i++) { + task_complete(ns_ctx->u.aio.events[i].data); + } +} +#endif /* HAVE_LIBAIO */ + +static void +task_extended_lba_setup_pi(struct ns_entry *entry, struct perf_task *task, uint64_t lba, + uint32_t lba_count, bool is_write) +{ + struct spdk_nvme_protection_info *pi; + uint32_t i, md_size, sector_size, pi_offset; + uint16_t crc16; + + task->appmask = 0; + task->apptag = 0; + + if (!spdk_nvme_ns_supports_extended_lba(entry->u.nvme.ns)) { + return; + } + + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return; + } + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return; + } + + /* Type3 don't support REFTAG */ + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { + return; + } + + sector_size = spdk_nvme_ns_get_sector_size(entry->u.nvme.ns); + md_size = spdk_nvme_ns_get_md_size(entry->u.nvme.ns); + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (entry->nsdata->dps.md_start) { + return; + } + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + task->appmask = 0xffff; + task->apptag = lba_count; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = ((sector_size + md_size) * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(task->buf + pi_offset); + memset(pi, 0, sizeof(*pi)); + + if (is_write) { + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include PI */ + crc16 = spdk_crc16_t10dif(task->buf + (sector_size + md_size) * i, + sector_size + md_size - 8); + to_be16(&pi->guard, crc16); + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + to_be16(&pi->app_tag, lba_count); + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&pi->ref_tag, (uint32_t)lba + i); + } + } + } +} + +static void +task_extended_lba_pi_verify(struct ns_entry *entry, struct perf_task *task, + uint64_t lba, uint32_t lba_count) +{ + struct spdk_nvme_protection_info *pi; + uint32_t i, md_size, sector_size, pi_offset, ref_tag; + uint16_t crc16, guard, app_tag; + + if (spdk_nvme_ns_get_pi_type(entry->u.nvme.ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return; + } + + sector_size = spdk_nvme_ns_get_sector_size(entry->u.nvme.ns); + md_size = spdk_nvme_ns_get_md_size(entry->u.nvme.ns); + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (entry->nsdata->dps.md_start) { + return; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = ((sector_size + md_size) * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(task->buf + pi_offset); + + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include last 8 bytes of PI */ + crc16 = spdk_crc16_t10dif(task->buf + (sector_size + md_size) * i, + sector_size + md_size - 8); + to_be16(&guard, crc16); + if (pi->guard != guard) { + fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, guard, pi->guard); + } + + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Previously we used the number of lbas as + * application tag for writes + */ + to_be16(&app_tag, lba_count); + if (pi->app_tag != app_tag) { + fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, app_tag, pi->app_tag); + } + } + if (entry->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&ref_tag, (uint32_t)lba + i); + if (pi->ref_tag != ref_tag) { + fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," + " Preferred 0x%08x but returned with 0x%08x," + " may read the LBA without write it first\n", + lba + i, ref_tag, pi->ref_tag); + } + } + } +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct perf_task *task) +{ + uint64_t offset_in_ios; + int rc; + struct ns_worker_ctx *ns_ctx = task->ns_ctx; + struct ns_entry *entry = ns_ctx->entry; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + task->is_read = false; + task->submit_tsc = spdk_get_ticks(); + task->lba = offset_in_ios * entry->io_size_blocks; + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { +#if HAVE_LIBAIO + if (entry->type == ENTRY_TYPE_AIO_FILE) { + rc = aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD, task->buf, + g_io_size_bytes, offset_in_ios * g_io_size_bytes, task); + } else +#endif + { + task_extended_lba_setup_pi(entry, task, task->lba, + entry->io_size_blocks, false); + task->is_read = true; + + rc = spdk_nvme_ns_cmd_read_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair, + task->buf, NULL, + task->lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->appmask, task->apptag); + } + } else { +#if HAVE_LIBAIO + if (entry->type == ENTRY_TYPE_AIO_FILE) { + rc = aio_submit(ns_ctx->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE, task->buf, + g_io_size_bytes, offset_in_ios * g_io_size_bytes, task); + } else +#endif + { + task_extended_lba_setup_pi(entry, task, task->lba, + entry->io_size_blocks, true); + + rc = spdk_nvme_ns_cmd_write_with_md(entry->u.nvme.ns, ns_ctx->u.nvme.qpair, + task->buf, NULL, + task->lba, + entry->io_size_blocks, io_complete, + task, entry->io_flags, + task->appmask, task->apptag); + } + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static void +task_complete(struct perf_task *task) +{ + struct ns_worker_ctx *ns_ctx; + uint64_t tsc_diff; + struct ns_entry *entry; + + ns_ctx = task->ns_ctx; + entry = ns_ctx->entry; + ns_ctx->current_queue_depth--; + ns_ctx->io_completed++; + tsc_diff = spdk_get_ticks() - task->submit_tsc; + ns_ctx->total_tsc += tsc_diff; + if (ns_ctx->min_tsc > tsc_diff) { + ns_ctx->min_tsc = tsc_diff; + } + if (ns_ctx->max_tsc < tsc_diff) { + ns_ctx->max_tsc = tsc_diff; + } + if (g_latency_sw_tracking_level > 0) { + spdk_histogram_data_tally(ns_ctx->histogram, tsc_diff); + } + + /* add application level verification for end-to-end data protection */ + if (entry->type == ENTRY_TYPE_NVME_NS) { + if (spdk_nvme_ns_supports_extended_lba(entry->u.nvme.ns) && + task->is_read && !g_metacfg_pract_flag) { + task_extended_lba_pi_verify(entry, task, task->lba, + entry->io_size_blocks); + } + } + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (ns_ctx->is_draining) { + spdk_dma_free(task->buf); + free(task); + } else { + submit_single_io(task); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct perf_task *)ctx); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ +#if HAVE_LIBAIO + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { + aio_check_io(ns_ctx); + } else +#endif + { + spdk_nvme_qpair_process_completions(ns_ctx->u.nvme.qpair, g_max_completions); + } +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + struct perf_task *task; + uint32_t max_io_size_bytes; + + while (queue_depth-- > 0) { + task = calloc(1, sizeof(*task)); + if (task == NULL) { + fprintf(stderr, "Out of memory allocating tasks\n"); + exit(1); + } + + /* maximum extended lba format size from all active + * namespace, it's same with g_io_size_bytes for + * namespace without metadata + */ + max_io_size_bytes = g_io_size_bytes + g_max_io_md_size * g_max_io_size_blocks; + task->buf = spdk_dma_zmalloc(max_io_size_bytes, g_io_align, NULL); + if (task->buf == NULL) { + fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n"); + exit(1); + } + memset(task->buf, queue_depth % 8 + 1, max_io_size_bytes); + + task->ns_ctx = ns_ctx; + + submit_single_io(task); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + ns_ctx->u.aio.events = calloc(g_queue_depth, sizeof(struct io_event)); + if (!ns_ctx->u.aio.events) { + return -1; + } + ns_ctx->u.aio.ctx = 0; + if (io_setup(g_queue_depth, &ns_ctx->u.aio.ctx) < 0) { + free(ns_ctx->u.aio.events); + perror("io_setup"); + return -1; + } +#endif + } else { + /* + * TODO: If a controller has multiple namespaces, they could all use the same queue. + * For now, give each namespace/thread combination its own queue. + */ + struct spdk_nvme_io_qpair_opts opts; + + spdk_nvme_ctrlr_get_default_io_qpair_opts(ns_ctx->entry->u.nvme.ctrlr, &opts, sizeof(opts)); + if (opts.io_queue_requests < ns_ctx->entry->num_io_requests) { + opts.io_queue_requests = ns_ctx->entry->num_io_requests; + } + + ns_ctx->u.nvme.qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->u.nvme.ctrlr, &opts, + sizeof(opts)); + if (!ns_ctx->u.nvme.qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return -1; + } + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx) +{ + if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + io_destroy(ns_ctx->u.aio.ctx); + free(ns_ctx->u.aio.events); +#endif + } else { + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->u.nvme.qpair); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + + printf("Starting thread on core %u\n", worker->lcore); + + /* Allocate a queue pair for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (init_ns_worker_ctx(ns_ctx) != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + ns_ctx = ns_ctx->next; + } + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + cleanup_ns_worker_ctx(ns_ctx); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); +#if HAVE_LIBAIO + printf(" [AIO device(s)]..."); +#endif + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-o io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-L enable latency tracking via sw, default: disabled]\n"); + printf("\t\t-L for latency summary, -LL for detailed histogram\n"); + printf("\t[-l enable latency tracking via ssd (if supported), default: disabled]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t[-c core mask for I/O submission/completion.]\n"); + printf("\t\t(default: 1)]\n"); + printf("\t[-D disable submission queue in controller memory buffer, default: enabled]\n"); + printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n"); + printf("\t Format: 'key:value [key:value] ...'\n"); + printf("\t Keys:\n"); + printf("\t trtype Transport type (e.g. PCIe, RDMA)\n"); + printf("\t adrfam Address family (e.g. IPv4, IPv6)\n"); + printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n"); + printf("\t trsvcid Transport service identifier (e.g. 4420)\n"); + printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n"); + printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n"); + printf("\t[-e metadata configuration]\n"); + printf("\t Keys:\n"); + printf("\t PRACT Protection Information Action bit (PRACT=1 or PRACT=0)\n"); + printf("\t PRCHK Control of Protection Information Checking (PRCHK=GUARD|REFTAG|APPTAG)\n"); + printf("\t Example: -e 'PRACT=0,PRCHK=GUARD|REFTAG|APPTAG'\n"); + printf("\t -e 'PRACT=1,PRCHK=GUARD'\n"); + printf("\t[-s DPDK huge memory size in MB.]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default: 0 - unlimited)\n"); + printf("\t[-i shared memory group ID]\n"); +} + +static void +check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + double **cutoff = ctx; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far / total; + while (so_far_pct >= **cutoff && **cutoff > 0) { + printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate); + (*cutoff)++; + } +} + +static void +print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far * 100 / total; + printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", + (double)start * 1000 * 1000 / g_tsc_rate, + (double)end * 1000 * 1000 / g_tsc_rate, + so_far_pct, count); +} + +static void +print_performance(void) +{ + uint64_t total_io_completed, total_io_tsc; + double io_per_second, mb_per_second, average_latency, min_latency, max_latency; + double sum_ave_latency, min_latency_so_far, max_latency_so_far; + double total_io_per_second, total_mb_per_second; + int ns_count; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + total_io_per_second = 0; + total_mb_per_second = 0; + total_io_completed = 0; + total_io_tsc = 0; + min_latency_so_far = (double)UINT64_MAX; + max_latency_so_far = 0; + ns_count = 0; + + printf("========================================================\n"); + printf("%103s\n", "Latency(us)"); + printf("%-55s: %10s %10s %10s %10s %10s\n", + "Device Information", "IOPS", "MB/s", "Average", "min", "max"); + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + if (ns_ctx->io_completed != 0) { + io_per_second = (double)ns_ctx->io_completed / g_time_in_sec; + mb_per_second = io_per_second * g_io_size_bytes / (1024 * 1024); + average_latency = ((double)ns_ctx->total_tsc / ns_ctx->io_completed) * 1000 * 1000 / g_tsc_rate; + min_latency = (double)ns_ctx->min_tsc * 1000 * 1000 / g_tsc_rate; + if (min_latency < min_latency_so_far) { + min_latency_so_far = min_latency; + } + + max_latency = (double)ns_ctx->max_tsc * 1000 * 1000 / g_tsc_rate; + if (max_latency > max_latency_so_far) { + max_latency_so_far = max_latency; + } + + printf("%-43.43s from core %u: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + ns_ctx->entry->name, worker->lcore, + io_per_second, mb_per_second, + average_latency, min_latency, max_latency); + total_io_per_second += io_per_second; + total_mb_per_second += mb_per_second; + total_io_completed += ns_ctx->io_completed; + total_io_tsc += ns_ctx->total_tsc; + ns_count++; + } + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (ns_count != 0 && total_io_completed) { + sum_ave_latency = ((double)total_io_tsc / total_io_completed) * 1000 * 1000 / g_tsc_rate; + printf("========================================================\n"); + printf("%-55s: %10.2f %10.2f %10.2f %10.2f %10.2f\n", + "Total", total_io_per_second, total_mb_per_second, + sum_ave_latency, min_latency_so_far, max_latency_so_far); + printf("\n"); + } + + if (g_latency_sw_tracking_level == 0 || total_io_completed == 0) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + const double *cutoff = g_latency_cutoffs; + + printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("=================================================================================\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, check_cutoff, &cutoff); + + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + + if (g_latency_sw_tracking_level == 1) { + return; + } + + worker = g_workers; + while (worker) { + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore); + printf("==============================================================================\n"); + printf(" Range in us Cumulative IO count\n"); + + spdk_histogram_data_iterate(ns_ctx->histogram, print_bucket, NULL); + printf("\n"); + ns_ctx = ns_ctx->next; + } + worker = worker->next; + } + +} + +static void +print_latency_page(struct ctrlr_entry *entry) +{ + int i; + + printf("\n"); + printf("%s\n", entry->name); + printf("--------------------------------------------------------\n"); + + for (i = 0; i < 32; i++) { + if (entry->latency_page->buckets_32us[i]) { + printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32, entry->latency_page->buckets_32us[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_1ms[i]) { + printf("Bucket %dms - %dms: %d\n", i + 1, i + 2, entry->latency_page->buckets_1ms[i]); + } + } + for (i = 0; i < 31; i++) { + if (entry->latency_page->buckets_32ms[i]) + printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32, + entry->latency_page->buckets_32ms[i]); + } +} + +static void +print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page) +{ + struct ctrlr_entry *ctrlr; + + printf("%s Latency Statistics:\n", op_name); + printf("========================================================\n"); + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr->ctrlr, log_page, SPDK_NVME_GLOBAL_NS_TAG, + ctrlr->latency_page, sizeof(struct spdk_nvme_intel_rw_latency_page), 0, + enable_latency_tracking_complete, + NULL)) { + printf("nvme_ctrlr_cmd_get_log_page() failed\n"); + exit(1); + } + + g_outstanding_commands++; + } else { + printf("Controller %s: %s latency statistics not supported\n", ctrlr->name, op_name); + } + ctrlr = ctrlr->next; + } + + while (g_outstanding_commands) { + ctrlr = g_controllers; + while (ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr); + ctrlr = ctrlr->next; + } + } + + ctrlr = g_controllers; + while (ctrlr) { + if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) { + print_latency_page(ctrlr); + } + ctrlr = ctrlr->next; + } + printf("\n"); +} + +static void +print_stats(void) +{ + print_performance(); + if (g_latency_ssd_tracking_enable) { + if (g_rw_percentage != 0) { + print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY); + } + if (g_rw_percentage != 100) { + print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY); + } + } +} + +static void +unregister_trids(void) +{ + struct trid_entry *trid_entry, *tmp; + + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) { + free(trid_entry); + } +} + +static int +add_trid(const char *trid_str) +{ + struct trid_entry *trid_entry; + struct spdk_nvme_transport_id *trid; + char *ns; + + trid_entry = calloc(1, sizeof(*trid_entry)); + if (trid_entry == NULL) { + return -1; + } + + trid = &trid_entry->trid; + memset(trid, 0, sizeof(*trid)); + trid->trtype = SPDK_NVME_TRANSPORT_PCIE; + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) { + fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str); + free(trid_entry); + return 1; + } + + ns = strcasestr(trid_str, "ns:"); + if (ns) { + char nsid_str[6]; /* 5 digits maximum in an nsid */ + int len; + int nsid; + + ns += 3; + + len = strcspn(ns, " \t\n"); + if (len > 5) { + fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n"); + free(trid_entry); + return 1; + } + + memcpy(nsid_str, ns, len); + nsid_str[len] = '\0'; + + nsid = atoi(nsid_str); + if (nsid <= 0 || nsid > 65535) { + fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n"); + free(trid_entry); + return 1; + } + + trid_entry->nsid = (uint16_t)nsid; + } + + TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq); + return 0; +} + +static int +parse_metadata(const char *metacfg_str) +{ + const char *sep; + + if (strstr(metacfg_str, "PRACT=1") != NULL) { + g_metacfg_pract_flag = SPDK_NVME_IO_FLAGS_PRACT; + } + + sep = strchr(metacfg_str, ','); + if (!sep) { + return 0; + } + + if (strstr(sep, "PRCHK=") != NULL) { + if (strstr(sep, "GUARD") != NULL) { + g_metacfg_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(sep, "REFTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(sep, "APPTAG") != NULL) { + g_metacfg_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } + } + + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type; + int op; + bool mix_specified = false; + + /* default value */ + g_queue_depth = 0; + g_io_size_bytes = 0; + workload_type = NULL; + g_time_in_sec = 0; + g_rw_percentage = -1; + g_core_mask = NULL; + g_max_completions = 0; + + while ((op = getopt(argc, argv, "c:e:i:lm:o:q:r:s:t:w:DLM:")) != -1) { + switch (op) { + case 'c': + g_core_mask = optarg; + break; + case 'e': + if (parse_metadata(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 'i': + g_shm_id = atoi(optarg); + break; + case 'l': + g_latency_ssd_tracking_enable = true; + break; + case 'm': + g_max_completions = atoi(optarg); + break; + case 'o': + g_io_size_bytes = atoi(optarg); + break; + case 'q': + g_queue_depth = atoi(optarg); + break; + case 'r': + if (add_trid(optarg)) { + usage(argv[0]); + return 1; + } + break; + case 's': + g_dpdk_mem = atoi(optarg); + break; + case 't': + g_time_in_sec = atoi(optarg); + break; + case 'w': + workload_type = optarg; + break; + case 'D': + g_disable_sq_cmb = 1; + break; + case 'L': + g_latency_sw_tracking_level++; + break; + case 'M': + g_rw_percentage = atoi(optarg); + mix_specified = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_queue_depth) { + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + usage(argv[0]); + return 1; + } + if (!workload_type) { + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_is_random = 0; + } else { + g_is_random = 1; + } + + if (TAILQ_EMPTY(&g_trid_list)) { + /* If no transport IDs specified, default to enumerating all local PCIe devices */ + add_trid("trtype:PCIe"); + } else { + struct trid_entry *trid_entry, *trid_entry_tmp; + + g_no_pci = true; + /* check whether there is local PCIe type */ + TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) { + if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + g_no_pci = false; + break; + } + } + } + + g_aio_optind = optind; + + return 0; +} + +static int +register_workers(void) +{ + uint32_t i; + struct worker_thread *worker; + + g_workers = NULL; + g_num_workers = 0; + + SPDK_ENV_FOREACH_CORE(i) { + worker = calloc(1, sizeof(*worker)); + if (worker == NULL) { + fprintf(stderr, "Unable to allocate worker\n"); + return -1; + } + + worker->lcore = i; + worker->next = g_workers; + g_workers = worker; + g_num_workers++; + } + + return 0; +} + +static void +unregister_workers(void) +{ + struct worker_thread *worker = g_workers; + + /* Free namespace context and worker thread */ + while (worker) { + struct worker_thread *next_worker = worker->next; + struct ns_worker_ctx *ns_ctx = worker->ns_ctx; + + while (ns_ctx) { + struct ns_worker_ctx *next_ns_ctx = ns_ctx->next; + spdk_histogram_data_free(ns_ctx->histogram); + free(ns_ctx); + ns_ctx = next_ns_ctx; + } + + free(worker); + worker = next_worker; + } +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attaching to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (g_disable_sq_cmb) { + opts->use_cmb_sqs = false; + } + + printf("Attaching to NVMe Controller at %s\n", + trid->traddr); + } + + /* Set io_queue_size to UINT16_MAX, NVMe driver + * will then reduce this to MQES to maximize + * the io_queue_size as much as possible. + */ + opts->io_queue_size = UINT16_MAX; + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct trid_entry *trid_entry = cb_ctx; + struct spdk_pci_addr pci_addr; + struct spdk_pci_device *pci_dev; + struct spdk_pci_id pci_id; + + g_controllers_found++; + if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) { + printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n", + trid->traddr, trid->trsvcid, + trid->subnqn); + } else { + if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) { + return; + } + + pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + if (!pci_dev) { + return; + } + + pci_id = spdk_pci_device_get_id(pci_dev); + + printf("Attached to NVMe Controller at %s [%04x:%04x]\n", + trid->traddr, + pci_id.vendor_id, pci_id.device_id); + } + + register_ctrlr(ctrlr, trid_entry); +} + +static int +register_controllers(void) +{ + struct trid_entry *trid_entry; + + printf("Initializing NVMe Controllers\n"); + + TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) { + if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n", + trid_entry->trid.traddr); + return -1; + } + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + spdk_dma_free(entry->latency_page); + if (g_latency_ssd_tracking_enable && + spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) { + set_latency_tracking_feature(entry->ctrlr, false); + } + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +register_aio_files(int argc, char **argv) +{ +#if HAVE_LIBAIO + int i; + + /* Treat everything after the options as files for AIO */ + for (i = g_aio_optind; i < argc; i++) { + if (register_aio_file(argv[i]) != 0) { + return 1; + } + } +#endif /* HAVE_LIBAIO */ + + return 0; +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->min_tsc = UINT64_MAX; + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + ns_ctx->histogram = spdk_histogram_data_alloc(); + worker->ns_ctx = ns_ctx; + + worker = worker->next; + if (worker == NULL) { + worker = g_workers; + } + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct worker_thread *worker, *master_worker; + unsigned master_core; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "perf"; + opts.shm_id = g_shm_id; + if (g_core_mask) { + opts.core_mask = g_core_mask; + } + + if (g_dpdk_mem) { + opts.mem_size = g_dpdk_mem; + } + if (g_no_pci) { + opts.no_pci = g_no_pci; + } + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + rc = -1; + goto cleanup; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + rc = -1; + goto cleanup; + } + + if (register_aio_files(argc, argv) != 0) { + rc = -1; + goto cleanup; + } + + if (register_controllers() != 0) { + rc = -1; + goto cleanup; + } + + if (g_warn) { + printf("WARNING: Some requested NVMe devices were skipped\n"); + } + + if (g_num_namespaces == 0) { + fprintf(stderr, "No valid NVMe controllers or AIO devices found\n"); + return 0; + } + + if (associate_workers_with_ns() != 0) { + rc = -1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + /* Launch all of the slave workers */ + master_core = spdk_env_get_current_core(); + master_worker = NULL; + worker = g_workers; + while (worker != NULL) { + if (worker->lcore != master_core) { + spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker); + } else { + assert(master_worker == NULL); + master_worker = worker; + } + worker = worker->next; + } + + assert(master_worker != NULL); + rc = work_fn(master_worker); + + spdk_env_thread_wait_all(); + + print_stats(); + +cleanup: + unregister_trids(); + unregister_namespaces(); + unregister_controllers(); + unregister_workers(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/examples/nvme/reserve/.gitignore b/src/spdk/examples/nvme/reserve/.gitignore new file mode 100644 index 00000000..c58b368c --- /dev/null +++ b/src/spdk/examples/nvme/reserve/.gitignore @@ -0,0 +1 @@ +reserve diff --git a/src/spdk/examples/nvme/reserve/Makefile b/src/spdk/examples/nvme/reserve/Makefile new file mode 100644 index 00000000..315faef0 --- /dev/null +++ b/src/spdk/examples/nvme/reserve/Makefile @@ -0,0 +1,39 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = reserve + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/examples/nvme/reserve/reserve.c b/src/spdk/examples/nvme/reserve/reserve.c new file mode 100644 index 00000000..4e0d54e2 --- /dev/null +++ b/src/spdk/examples/nvme/reserve/reserve.c @@ -0,0 +1,394 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/log.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + char name[100]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int outstanding_commands; +static int reserve_command_result; +static bool get_host_id_successful; + +#define HOST_ID 0xABABABABCDCDCDCD +#define EXT_HOST_ID ((uint8_t[]){0x0f, 0x97, 0xcd, 0x74, 0x8c, 0x80, 0x41, 0x42, \ + 0x99, 0x0f, 0x65, 0xc4, 0xf0, 0x39, 0x24, 0x20}) + +#define CR_KEY 0xDEADBEAF5A5A5A5B + +static void +get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + fprintf(stdout, "Get Features - Host Identifier failed\n"); + get_host_id_successful = false; + } else { + get_host_id_successful = true; + } + outstanding_commands--; +} + +static int +get_host_identifier(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + uint8_t host_id[16]; + uint32_t host_id_size; + uint32_t cdw11; + + if (spdk_nvme_ctrlr_get_data(ctrlr)->ctratt.host_id_exhid_supported) { + host_id_size = 16; + cdw11 = 1; + printf("Using 128-bit extended host identifier\n"); + } else { + host_id_size = 8; + cdw11 = 0; + printf("Using 64-bit host identifier\n"); + } + + outstanding_commands = 0; + ret = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_HOST_IDENTIFIER, cdw11, host_id, + host_id_size, + get_feature_completion, NULL); + if (ret) { + fprintf(stdout, "Get Feature: Failed\n"); + return -1; + } + + outstanding_commands++; + get_host_id_successful = false; + + while (outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (get_host_id_successful) { + spdk_trace_dump(stdout, "Get Feature: Host Identifier:", host_id, host_id_size); + } + + return 0; +} + +static void +reservation_ns_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + reserve_command_result = -1; + } else { + reserve_command_result = 0; + } + + outstanding_commands--; +} + +static int +reservation_ns_register(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_register_data rr_data; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + rr_data.crkey = CR_KEY; + rr_data.nrkey = CR_KEY; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_register(ns, qpair, &rr_data, true, + SPDK_NVME_RESERVE_REGISTER_KEY, + SPDK_NVME_RESERVE_PTPL_NO_CHANGES, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Register Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Register Failed\n"); + } + + return 0; +} + +static int +reservation_ns_report(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret, i; + uint8_t *payload; + struct spdk_nvme_reservation_status_data *status; + struct spdk_nvme_reservation_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + outstanding_commands = 0; + reserve_command_result = -1; + + payload = spdk_dma_zmalloc(0x1000, 0x1000, NULL); + if (!payload) { + fprintf(stderr, "DMA Buffer Allocation Failed\n"); + return -1; + } + + ret = spdk_nvme_ns_cmd_reservation_report(ns, qpair, payload, 0x1000, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return 0; + } + + status = (struct spdk_nvme_reservation_status_data *)payload; + fprintf(stdout, "Reservation Generation Counter %u\n", status->generation); + fprintf(stdout, "Reservation type %u\n", status->type); + fprintf(stdout, "Reservation Number of Registered Controllers %u\n", status->nr_regctl); + fprintf(stdout, "Reservation Persist Through Power Loss State %u\n", status->ptpl_state); + for (i = 0; i < status->nr_regctl; i++) { + cdata = (struct spdk_nvme_reservation_ctrlr_data *)(payload + sizeof(struct + spdk_nvme_reservation_status_data) * (i + 1)); + fprintf(stdout, "Controller ID %u\n", cdata->ctrlr_id); + fprintf(stdout, "Controller Reservation Status %u\n", cdata->rcsts.status); + fprintf(stdout, "Controller Host ID 0x%"PRIx64"\n", cdata->host_id); + fprintf(stdout, "Controller Reservation Key 0x%"PRIx64"\n", cdata->key); + } + + spdk_dma_free(payload); + return 0; +} + +static int +reservation_ns_acquire(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_acquire_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + cdata.prkey = 0; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_acquire(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_ACQUIRE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Acquire Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Acquire Failed\n"); + } + + return 0; +} + +static int +reservation_ns_release(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_key_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + + outstanding_commands = 0; + reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_release(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_RELEASE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Release Failed\n"); + return -1; + } + + outstanding_commands++; + while (outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (reserve_command_result) { + fprintf(stderr, "Reservation Release Failed\n"); + } + + return 0; +} + +static void +reserve_controller(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + const struct spdk_pci_addr *pci_addr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + printf("NVMe Controller at PCI bus %d, device %d, function %d\n", + pci_addr->bus, pci_addr->dev, pci_addr->func); + printf("=====================================================\n"); + + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + + if (!cdata->oncs.reservations) { + return; + } + + get_host_identifier(ctrlr); + + /* tested 1 namespace */ + reservation_ns_register(ctrlr, qpair, 1); + reservation_ns_acquire(ctrlr, qpair, 1); + reservation_ns_report(ctrlr, qpair, 1); + reservation_ns_release(ctrlr, qpair, 1); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + /* + * Provide both 64-bit and 128-bit host identifiers. + * + * The NVMe library will choose which one to use based on whether the controller + * supports extended host identifiers. + */ + to_le64(opts->host_id, HOST_ID); + memcpy(opts->extended_host_id, EXT_HOST_ID, sizeof(opts->extended_host_id)); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &devs[num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; +} + +int main(int argc, char **argv) +{ + struct dev *iter; + int rc, i; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "reserve"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + rc = 0; + + foreach_dev(iter) { + struct spdk_nvme_qpair *qpair; + + qpair = spdk_nvme_ctrlr_alloc_io_qpair(iter->ctrlr, NULL, 0); + if (!qpair) { + fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + rc = 1; + } else { + reserve_controller(iter->ctrlr, qpair, &iter->pci_addr); + } + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return rc; +} |