diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/test/nvme | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/test/nvme')
47 files changed, 7275 insertions, 0 deletions
diff --git a/src/spdk/test/nvme/Makefile b/src/spdk/test/nvme/Makefile new file mode 100644 index 000000000..b2ed73a09 --- /dev/null +++ b/src/spdk/test/nvme/Makefile @@ -0,0 +1,46 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = aer reset sgl e2edp overhead deallocated_value err_injection \ + startup reserve +DIRS-$(CONFIG_NVME_CUSE) += cuse + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/test/nvme/aer/.gitignore b/src/spdk/test/nvme/aer/.gitignore new file mode 100644 index 000000000..313796176 --- /dev/null +++ b/src/spdk/test/nvme/aer/.gitignore @@ -0,0 +1 @@ +aer diff --git a/src/spdk/test/nvme/aer/Makefile b/src/spdk/test/nvme/aer/Makefile new file mode 100644 index 000000000..616800777 --- /dev/null +++ b/src/spdk/test/nvme/aer/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = aer + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/aer/aer.c b/src/spdk/test/nvme/aer/aer.c new file mode 100644 index 000000000..701109ced --- /dev/null +++ b/src/spdk/test/nvme/aer/aer.c @@ -0,0 +1,610 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_health_information_page *health_page; + struct spdk_nvme_ns_list *changed_ns_list; + uint32_t orig_temp_threshold; + char name[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +}; + +static void get_feature_test(struct dev *dev); + +static struct dev g_devs[MAX_DEVS]; +static int g_num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = g_devs; iter - g_devs < g_num_devs; iter++) + +static int g_outstanding_commands = 0; +static int g_aer_done = 0; +static int g_temperature_done = 0; +static int g_failed = 0; +static struct spdk_nvme_transport_id g_trid; +static char *g_touch_file; + +/* Enable AER temperature test */ +static int g_enable_temp_test = 0; +/* Enable AER namespace attribute notice test, this variable holds + * the NSID that is expected to be in the Changed NS List. + */ +static uint32_t g_expected_ns_test = 0; + +static void +set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + g_outstanding_commands--; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: set feature (temp threshold) failed\n", dev->name); + g_failed = 1; + return; + } + + /* Admin command completions are synchronized by the NVMe driver, + * so we don't need to do any special locking here. */ + g_temperature_done++; +} + +static int +set_temp_threshold(struct dev *dev, uint32_t temp) +{ + struct spdk_nvme_cmd cmd = {}; + int rc; + + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD; + cmd.cdw11_bits.feat_temp_threshold.bits.tmpth = temp; + + rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev); + if (rc == 0) { + g_outstanding_commands++; + } + + return rc; +} + +static void +get_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + g_outstanding_commands--; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: get feature (temp threshold) failed\n", dev->name); + g_failed = 1; + return; + } + + dev->orig_temp_threshold = cpl->cdw0; + printf("%s: original temperature threshold: %u Kelvin (%d Celsius)\n", + dev->name, dev->orig_temp_threshold, dev->orig_temp_threshold - 273); + + g_temperature_done++; +} + +static int +get_temp_threshold(struct dev *dev) +{ + struct spdk_nvme_cmd cmd = {}; + int rc; + + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD; + + rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, get_temp_completion, dev); + if (rc == 0) { + g_outstanding_commands++; + } + + return rc; +} + +static void +print_health_page(struct dev *dev, struct spdk_nvme_health_information_page *hip) +{ + printf("%s: Current Temperature: %u Kelvin (%d Celsius)\n", + dev->name, hip->temperature, hip->temperature - 273); +} + +static void +get_health_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + g_outstanding_commands --; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: get log page failed\n", dev->name); + g_failed = 1; + return; + } + + print_health_page(dev, dev->health_page); + g_aer_done++; +} + +static void +get_changed_ns_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + bool found = false; + uint32_t i; + + g_outstanding_commands --; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: get log page failed\n", dev->name); + g_failed = 1; + return; + } + + /* Let's compare the expected namespce ID is + * in changed namespace list + */ + if (dev->changed_ns_list->ns_list[0] != 0xffffffffu) { + for (i = 0; i < sizeof(*dev->changed_ns_list) / sizeof(uint32_t); i++) { + if (g_expected_ns_test == dev->changed_ns_list->ns_list[i]) { + printf("%s: changed NS list contains expected NSID: %u\n", + dev->name, g_expected_ns_test); + found = true; + break; + } + } + } + + if (!found) { + printf("%s: Error: Can't find expected NSID %u\n", dev->name, g_expected_ns_test); + g_failed = 1; + } + + g_aer_done++; +} + +static int +get_health_log_page(struct dev *dev) +{ + int rc; + + rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, dev->health_page, sizeof(*dev->health_page), 0, + get_health_log_page_completion, dev); + + if (rc == 0) { + g_outstanding_commands++; + } + + return rc; +} + +static int +get_changed_ns_log_page(struct dev *dev) +{ + int rc; + + rc = spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_CHANGED_NS_LIST, + SPDK_NVME_GLOBAL_NS_TAG, dev->changed_ns_list, + sizeof(*dev->changed_ns_list), 0, + get_changed_ns_log_page_completion, dev); + + if (rc == 0) { + g_outstanding_commands++; + } + + return rc; +} + +static void +cleanup(void) +{ + struct dev *dev; + + foreach_dev(dev) { + if (dev->health_page) { + spdk_free(dev->health_page); + } + if (dev->changed_ns_list) { + spdk_free(dev->changed_ns_list); + } + } +} + +static void +aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) +{ + uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16; + struct dev *dev = arg; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: AER failed\n", dev->name); + g_failed = 1; + return; + } + + printf("%s: aer_cb for log page %d\n", dev->name, log_page_id); + + if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) { + /* Set the temperature threshold back to the original value + * so the AER doesn't trigger again. + */ + set_temp_threshold(dev, dev->orig_temp_threshold); + get_health_log_page(dev); + } else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) { + get_changed_ns_log_page(dev); + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -T enable temperature tests\n"); + printf(" -n expected Namespace attribute notice ID\n"); + printf(" -t <file> touch specified file when ready to receive AER\n"); + printf(" -r trid remote NVMe over Fabrics target address\n"); + printf(" Format: 'key:value [key:value] ...'\n"); + printf(" Keys:\n"); + printf(" trtype Transport type (e.g. RDMA)\n"); + printf(" adrfam Address family (e.g. IPv4, IPv6)\n"); + printf(" traddr Transport address (e.g. 192.168.100.8)\n"); + printf(" trsvcid Transport service identifier (e.g. 4420)\n"); + printf(" subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN); + printf(" Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n"); + + spdk_log_usage(stdout, "-L"); + + printf(" -v verbose (enable warnings)\n"); + printf(" -H show this usage\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op, rc; + long int val; + + spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE); + snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + while ((op = getopt(argc, argv, "n:r:t:HL:T")) != -1) { + switch (op) { + case 'n': + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Invalid NS attribute notice ID\n"); + return val; + } + g_expected_ns_test = (uint32_t)val; + break; + case 'r': + if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) { + fprintf(stderr, "Error parsing transport address\n"); + return 1; + } + break; + case 't': + g_touch_file = optarg; + break; + case 'L': + rc = spdk_log_set_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + spdk_log_set_print_level(SPDK_LOG_DEBUG); +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -L flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + case 'T': + g_enable_temp_test = 1; + break; + case 'H': + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &g_devs[g_num_devs++]; + + dev->ctrlr = ctrlr; + + snprintf(dev->name, sizeof(dev->name), "%s", + trid->traddr); + + printf("Attached to %s\n", dev->name); + + dev->health_page = spdk_zmalloc(sizeof(*dev->health_page), 4096, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (dev->health_page == NULL) { + printf("Allocation error (health page)\n"); + g_failed = 1; + } + dev->changed_ns_list = spdk_zmalloc(sizeof(*dev->changed_ns_list), 4096, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (dev->changed_ns_list == NULL) { + printf("Allocation error (changed namespace list page)\n"); + g_failed = 1; + } +} + +static void +get_feature_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + g_outstanding_commands--; + + if (spdk_nvme_cpl_is_error(cpl)) { + printf("%s: get number of queues failed\n", dev->name); + g_failed = 1; + return; + } + + if (g_aer_done < g_num_devs) { + /* + * Resubmit Get Features command to continue filling admin queue + * while the test is running. + */ + get_feature_test(dev); + } +} + +static void +get_feature_test(struct dev *dev) +{ + struct spdk_nvme_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_NUMBER_OF_QUEUES; + if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, + get_feature_test_cb, dev) != 0) { + printf("Failed to send Get Features command for dev=%p\n", dev); + g_failed = 1; + return; + } + + g_outstanding_commands++; +} + +static int +spdk_aer_temperature_test(void) +{ + struct dev *dev; + + printf("Getting temperature thresholds of all controllers...\n"); + foreach_dev(dev) { + /* Get the original temperature threshold */ + get_temp_threshold(dev); + } + + while (!g_failed && g_temperature_done < g_num_devs) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } + + if (g_failed) { + return g_failed; + } + g_temperature_done = 0; + g_aer_done = 0; + + /* Send admin commands to test admin queue wraparound while waiting for the AER */ + foreach_dev(dev) { + get_feature_test(dev); + } + + if (g_failed) { + return g_failed; + } + + printf("Waiting for all controllers to trigger AER...\n"); + foreach_dev(dev) { + /* Set the temperature threshold to a low value */ + set_temp_threshold(dev, 200); + } + + if (g_failed) { + return g_failed; + } + + while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } + + if (g_failed) { + return g_failed; + } + + return 0; +} + +static int +spdk_aer_changed_ns_test(void) +{ + struct dev *dev; + + g_aer_done = 0; + + printf("Starting namespce attribute notice tests for all controllers...\n"); + + foreach_dev(dev) { + get_feature_test(dev); + } + + if (g_failed) { + return g_failed; + } + + while (!g_failed && (g_aer_done < g_num_devs)) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } + + if (g_failed) { + return g_failed; + } + + return 0; +} + +int main(int argc, char **argv) +{ + struct dev *dev; + int i; + struct spdk_env_opts opts; + int rc; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "aer"; + opts.core_mask = "0x1"; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Asynchronous Event Request test\n"); + + if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_failed) { + goto done; + } + + printf("Registering asynchronous event callbacks...\n"); + foreach_dev(dev) { + spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev); + } + + if (g_touch_file) { + int fd; + + fd = open(g_touch_file, O_CREAT | O_EXCL | O_RDWR, S_IFREG); + if (fd == -1) { + fprintf(stderr, "Could not touch %s (%s).\n", g_touch_file, strerror(errno)); + g_failed = true; + goto done; + } + close(fd); + } + + /* AER temperature test */ + if (g_enable_temp_test) { + if (spdk_aer_temperature_test()) { + goto done; + } + } + + /* AER changed namespace list test */ + if (g_expected_ns_test) { + if (spdk_aer_changed_ns_test()) { + goto done; + } + } + + printf("Cleaning up...\n"); + + while (g_outstanding_commands) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } + + /* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */ + foreach_dev(dev) { + spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL); + } + + for (i = 0; i < g_num_devs; i++) { + struct dev *dev = &g_devs[i]; + + spdk_nvme_detach(dev->ctrlr); + } + +done: + cleanup(); + + return g_failed; +} diff --git a/src/spdk/test/nvme/cuse/.gitignore b/src/spdk/test/nvme/cuse/.gitignore new file mode 100644 index 000000000..b13d42337 --- /dev/null +++ b/src/spdk/test/nvme/cuse/.gitignore @@ -0,0 +1 @@ +cuse diff --git a/src/spdk/test/nvme/cuse/Makefile b/src/spdk/test/nvme/cuse/Makefile new file mode 100644 index 000000000..c847fe13f --- /dev/null +++ b/src/spdk/test/nvme/cuse/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +TEST_FILE = cuse.c + +include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk diff --git a/src/spdk/test/nvme/cuse/cuse.c b/src/spdk/test/nvme/cuse/cuse.c new file mode 100644 index 000000000..fe5c26f0c --- /dev/null +++ b/src/spdk/test/nvme/cuse/cuse.c @@ -0,0 +1,189 @@ + +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk_cunit.h" + +#include "common/lib/test_env.c" +#include "nvme/nvme_cuse.c" + +DEFINE_STUB(nvme_io_msg_send, int, (struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + spdk_nvme_io_msg_fn fn, void *arg), 0); + +DEFINE_STUB(spdk_nvme_ctrlr_alloc_cmb_io_buffer, void *, (struct spdk_nvme_ctrlr *ctrlr, + size_t size), NULL); + +DEFINE_STUB(spdk_nvme_ctrlr_cmd_admin_raw, int, (struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_cmd *cmd, void *buf, uint32_t len, + spdk_nvme_cmd_cb cb_fn, void *cb_arg), 0); + +DEFINE_STUB(spdk_nvme_ctrlr_get_num_ns, uint32_t, (struct spdk_nvme_ctrlr *ctrlr), 128); + +static uint32_t g_active_num_ns = 4; +static uint32_t g_active_nsid_min = 1; + +bool +spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) +{ + return nsid >= g_active_nsid_min && nsid < g_active_num_ns + g_active_nsid_min; +} + +DEFINE_STUB(spdk_nvme_ctrlr_reset, int, (struct spdk_nvme_ctrlr *ctrlr), 0); + +DEFINE_STUB(spdk_nvme_ns_cmd_read, int, (struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *payload, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags), 0); + +DEFINE_STUB(spdk_nvme_ns_cmd_write, int, (struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *payload, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags), 0); + +DEFINE_STUB(spdk_nvme_ns_get_num_sectors, uint64_t, (struct spdk_nvme_ns *ns), 0); + +DEFINE_STUB(spdk_nvme_ns_get_sector_size, uint32_t, (struct spdk_nvme_ns *ns), 0); + +DEFINE_STUB_V(spdk_unaffinitize_thread, (void)); + +DEFINE_STUB(spdk_nvme_ctrlr_get_ns, struct spdk_nvme_ns *, (struct spdk_nvme_ctrlr *ctrlr, + uint32_t nsid), NULL); + +static bool +wait_for_file(char *filename, bool exists) +{ + int i; + + for (i = 0; i < 1000; i++) { + if ((access(filename, F_OK) != -1) ^ (!exists)) { + return true; + } + usleep(100); + } + return false; +} + +static void +verify_devices(struct spdk_nvme_ctrlr *ctrlr) +{ + char ctrlr_name[256]; + size_t ctrlr_name_size; + char ctrlr_dev[256], ns_dev[256 + 10]; + uint32_t nsid, num_ns; + int rv; + + ctrlr_name_size = sizeof(ctrlr_name); + rv = spdk_nvme_cuse_get_ctrlr_name(ctrlr, ctrlr_name, &ctrlr_name_size); + SPDK_CU_ASSERT_FATAL(rv == 0); + + rv = snprintf(ctrlr_dev, sizeof(ctrlr_dev), "/dev/%s", ctrlr_name); + CU_ASSERT(rv > 0); + CU_ASSERT(wait_for_file(ctrlr_dev, true)); + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + + for (nsid = 1; nsid <= num_ns; nsid++) { + snprintf(ns_dev, sizeof(ns_dev), "%sn%" PRIu32, ctrlr_dev, nsid); + if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { + CU_ASSERT(wait_for_file(ns_dev, true)); + } else { + CU_ASSERT(wait_for_file(ns_dev, false)); + } + } + + /* Next one should never exist */ + snprintf(ns_dev, sizeof(ns_dev), "%sn%" PRIu32, ctrlr_dev, nsid); + CU_ASSERT(wait_for_file(ns_dev, false)); +} + +static void +test_cuse_update(void) +{ + int rc; + struct spdk_nvme_ctrlr ctrlr = {}; + + rc = nvme_cuse_start(&ctrlr); + CU_ASSERT(rc == 0); + + g_active_num_ns = 4; + g_active_nsid_min = 1; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 0; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 4; + g_active_nsid_min = spdk_nvme_ctrlr_get_num_ns(&ctrlr) - g_active_num_ns; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 2; + g_active_nsid_min = 2; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 10; + g_active_nsid_min = 5; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 5; + g_active_nsid_min = 3; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + g_active_num_ns = 6; + g_active_nsid_min = 1; + nvme_cuse_update(&ctrlr); + verify_devices(&ctrlr); + + nvme_cuse_stop(&ctrlr); +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + CU_set_error_action(CUEA_ABORT); + CU_initialize_registry(); + suite = CU_add_suite("nvme_cuse", NULL, NULL); + CU_ADD_TEST(suite, test_cuse_update); + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/src/spdk/test/nvme/cuse/nvme_cuse.sh b/src/spdk/test/nvme/cuse/nvme_cuse.sh new file mode 100755 index 000000000..699cd5ac8 --- /dev/null +++ b/src/spdk/test/nvme/cuse/nvme_cuse.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +if [[ $(uname) != "Linux" ]]; then + echo "NVMe cuse tests only supported on Linux" + exit 1 +fi + +modprobe cuse +run_test "nvme_cuse_app" $testdir/cuse +run_test "nvme_cuse_rpc" $testdir/nvme_cuse_rpc.sh +run_test "nvme_cli_cuse" $testdir/spdk_nvme_cli_cuse.sh +run_test "nvme_smartctl_cuse" $testdir/spdk_smartctl_cuse.sh + +# Only run Namespace managment test case when such device is present +bdfs=$(get_nvme_bdfs) + +$rootdir/scripts/setup.sh reset +sleep 1 + +# Find bdf that supports Namespace managment +for bdf in $bdfs; do + nvme_name=$(get_nvme_ctrlr_from_bdf ${bdf}) + if [[ -z "$nvme_name" ]]; then + continue + fi + + # Check Optional Admin Command Support for Namespace Management + oacs=$(nvme id-ctrl /dev/${nvme_name} | grep oacs | cut -d: -f2) + oacs_ns_manage=$((oacs & 0x8)) + + if [[ "$oacs_ns_manage" -ne 0 ]]; then + break + fi +done + +if [[ "$oacs_ns_manage" -ne 0 ]]; then + run_test "nvme_ns_manage_cuse" $testdir/nvme_ns_manage_cuse.sh +fi +$rootdir/scripts/setup.sh + +rmmod cuse diff --git a/src/spdk/test/nvme/cuse/nvme_cuse_rpc.sh b/src/spdk/test/nvme/cuse/nvme_cuse_rpc.sh new file mode 100755 index 000000000..eaf0dbd9c --- /dev/null +++ b/src/spdk/test/nvme/cuse/nvme_cuse_rpc.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +rpc_py=$rootdir/scripts/rpc.py + +bdf=$(get_first_nvme_bdf) +ctrlr_base="/dev/spdk/nvme" + +$SPDK_BIN_DIR/spdk_tgt -m 0x3 & +spdk_tgt_pid=$! +trap 'kill -9 ${spdk_tgt_pid}; exit 1' SIGINT SIGTERM EXIT + +waitforlisten $spdk_tgt_pid + +$rpc_py bdev_nvme_attach_controller -b Nvme0 -t PCIe -a ${bdf} +$rpc_py bdev_nvme_cuse_register -n Nvme0 + +sleep 5 + +if [ ! -c "${ctrlr_base}0" ]; then + exit 1 +fi + +$rpc_py bdev_get_bdevs +$rpc_py bdev_nvme_get_controllers + +$rpc_py bdev_nvme_cuse_unregister -n Nvme0 +sleep 1 +if [ -c "${ctrlr_base}0" ]; then + exit 1 +fi + +# Verify removing non-existent cuse device +$rpc_py bdev_nvme_cuse_unregister -n Nvme0 && false + +$rpc_py bdev_nvme_cuse_register -n Nvme0 +sleep 1 + +if [ ! -c "${ctrlr_base}0" ]; then + exit 1 +fi + +# Verify adding same nvme controller twice fails +$rpc_py bdev_nvme_cuse_register -n Nvme0 && false +sleep 1 + +if [ -c "${ctrlr_base}1" ]; then + exit 1 +fi + +$rpc_py bdev_nvme_detach_controller Nvme0 + +trap - SIGINT SIGTERM EXIT +killprocess $spdk_tgt_pid diff --git a/src/spdk/test/nvme/cuse/nvme_ns_manage_cuse.sh b/src/spdk/test/nvme/cuse/nvme_ns_manage_cuse.sh new file mode 100755 index 000000000..fb390f34e --- /dev/null +++ b/src/spdk/test/nvme/cuse/nvme_ns_manage_cuse.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +NVME_CMD="/usr/local/src/nvme-cli/nvme" + +rpc_py=$rootdir/scripts/rpc.py + +$rootdir/scripts/setup.sh +sleep 1 + +bdfs=$(get_nvme_bdfs) + +$rootdir/scripts/setup.sh reset +sleep 1 + +# Find bdf that supports Namespace Managment +for bdf in $bdfs; do + nvme_name=$(get_nvme_ctrlr_from_bdf ${bdf}) + if [[ -z "$nvme_name" ]]; then + continue + fi + + # Check Optional Admin Command Support for Namespace Management + oacs=$($NVME_CMD id-ctrl /dev/${nvme_name} | grep oacs | cut -d: -f2) + oacs_ns_manage=$((oacs & 0x8)) + + if [[ "$oacs_ns_manage" -ne 0 ]]; then + break + fi +done + +if [[ "${nvme_name}" == "" ]] || [[ "$oacs_ns_manage" -eq 0 ]]; then + echo "No NVMe device supporting Namespace managment found" + $rootdir/scripts/setup.sh + exit 1 +fi + +nvme_dev=/dev/${nvme_name} + +# Detect supported features and configuration +oaes=$($NVME_CMD id-ctrl ${nvme_dev} | grep oaes | cut -d: -f2) +aer_ns_change=$((oaes & 0x100)) + +function reset_nvme_if_aer_unsupported() { + if [[ "$aer_ns_change" -eq "0" ]]; then + sleep 1 + $NVME_CMD reset "$1" || true + fi +} + +function clean_up() { + $rootdir/scripts/setup.sh reset + + # This assumes every NVMe controller contains single namespace, + # encompassing Total NVM Capacity and formatted as 512 block size. + # 512 block size is needed for test/vhost/vhost_boot.sh to + # succesfully run. + + tnvmcap=$($NVME_CMD id-ctrl ${nvme_dev} | grep tnvmcap | cut -d: -f2) + blksize=512 + + size=$((tnvmcap / blksize)) + + echo "Restoring $nvme_dev..." + $NVME_CMD detach-ns ${nvme_dev} -n 0xffffffff -c 0 || true + $NVME_CMD delete-ns ${nvme_dev} -n 0xffffffff || true + $NVME_CMD create-ns ${nvme_dev} -s ${size} -c ${size} -b ${blksize} + $NVME_CMD attach-ns ${nvme_dev} -n 1 -c 0 + $NVME_CMD reset ${nvme_dev} + + $rootdir/scripts/setup.sh +} + +function info_print() { + echo "---" + echo "$@" + echo "---" +} + +# Prepare controller +info_print "delete all namespaces" +$NVME_CMD detach-ns ${nvme_dev} -n 0xffffffff -c 0 || true +$NVME_CMD delete-ns ${nvme_dev} -n 0xffffffff || true + +reset_nvme_if_aer_unsupported ${nvme_dev} +sleep 1 + +PCI_WHITELIST="${bdf}" $rootdir/scripts/setup.sh + +$SPDK_BIN_DIR/spdk_tgt -m 0x3 & +spdk_tgt_pid=$! +trap 'kill -9 ${spdk_tgt_pid}; clean_up; exit 1' SIGINT SIGTERM EXIT + +waitforlisten $spdk_tgt_pid + +$rpc_py bdev_nvme_attach_controller -b Nvme0 -t PCIe -a ${bdf} +$rpc_py bdev_nvme_cuse_register -n Nvme0 + +sleep 1 +[[ -c /dev/spdk/nvme0 ]] + +for dev in /dev/spdk/nvme0n*; do + [[ ! -c ${dev} ]] +done + +info_print "create ns: nsze=10000 ncap=10000 flbias=0" +$NVME_CMD create-ns /dev/spdk/nvme0 -s 10000 -c 10000 -f 0 + +info_print "attach ns: nsid=1 controller=0" +$NVME_CMD attach-ns /dev/spdk/nvme0 -n 1 -c 0 + +reset_nvme_if_aer_unsupported /dev/spdk/nvme0 +sleep 1 + +[[ -c /dev/spdk/nvme0n1 ]] + +info_print "create ns: nsze=10000 ncap=10000 flbias=0" +$NVME_CMD create-ns /dev/spdk/nvme0 -s 10000 -c 10000 -f 0 + +info_print "attach ns: nsid=2 controller=0" +$NVME_CMD attach-ns /dev/spdk/nvme0 -n 2 -c 0 + +reset_nvme_if_aer_unsupported /dev/spdk/nvme0 +sleep 1 + +[[ -c /dev/spdk/nvme0n2 ]] + +info_print "detach ns: nsid=2 controller=0" +$NVME_CMD detach-ns /dev/spdk/nvme0 -n 2 -c 0 || true + +info_print "delete ns: nsid=2" +$NVME_CMD delete-ns /dev/spdk/nvme0 -n 2 || true + +reset_nvme_if_aer_unsupported /dev/spdk/nvme0 +sleep 1 + +[[ ! -c /dev/spdk/nvme0n2 ]] + +info_print "detach ns: nsid=1 controller=0" +$NVME_CMD detach-ns /dev/spdk/nvme0 -n 1 -c 0 || true + +info_print "delete ns: nsid=1" +$NVME_CMD delete-ns /dev/spdk/nvme0 -n 1 || true + +reset_nvme_if_aer_unsupported /dev/spdk/nvme0 +sleep 1 + +# Here we should not have any cuse devices +for dev in /dev/spdk/nvme0n*; do + [[ ! -c ${dev} ]] +done + +$rpc_py bdev_nvme_detach_controller Nvme0 + +sleep 1 +[[ ! -c /dev/spdk/nvme0 ]] + +trap - SIGINT SIGTERM EXIT +killprocess $spdk_tgt_pid +clean_up diff --git a/src/spdk/test/nvme/cuse/spdk_nvme_cli_cuse.sh b/src/spdk/test/nvme/cuse/spdk_nvme_cli_cuse.sh new file mode 100755 index 000000000..cdddd2278 --- /dev/null +++ b/src/spdk/test/nvme/cuse/spdk_nvme_cli_cuse.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +rm -Rf $testdir/match_files +mkdir $testdir/match_files + +KERNEL_OUT=$testdir/match_files/kernel.out +CUSE_OUT=$testdir/match_files/cuse.out + +NVME_CMD=/usr/local/src/nvme-cli/nvme +rpc_py=$rootdir/scripts/rpc.py + +bdf=$(get_first_nvme_bdf) + +PCI_WHITELIST="${bdf}" $rootdir/scripts/setup.sh reset +sleep 1 +nvme_name=$(get_nvme_ctrlr_from_bdf ${bdf}) +if [[ -z "$nvme_name" ]]; then + echo "setup.sh failed bind kernel driver to ${bdf}" + return 1 +fi + +ctrlr="/dev/${nvme_name}" +ns="/dev/${nvme_name}n1" + +waitforblk "${nvme_name}n1" + +oacs=$(${NVME_CMD} id-ctrl $ctrlr | grep oacs | cut -d: -f2) +oacs_firmware=$((oacs & 0x4)) + +set +e + +${NVME_CMD} get-ns-id $ns > ${KERNEL_OUT}.1 +${NVME_CMD} id-ns $ns > ${KERNEL_OUT}.2 +${NVME_CMD} list-ns $ns > ${KERNEL_OUT}.3 + +${NVME_CMD} id-ctrl $ctrlr > ${KERNEL_OUT}.4 +${NVME_CMD} list-ctrl $ctrlr > ${KERNEL_OUT}.5 +if [ "$oacs_firmware" -ne "0" ]; then + ${NVME_CMD} fw-log $ctrlr > ${KERNEL_OUT}.6 +fi +${NVME_CMD} smart-log $ctrlr +${NVME_CMD} error-log $ctrlr > ${KERNEL_OUT}.7 +${NVME_CMD} get-feature $ctrlr -f 1 -s 1 -l 100 > ${KERNEL_OUT}.8 +${NVME_CMD} get-log $ctrlr -i 1 -l 100 > ${KERNEL_OUT}.9 +${NVME_CMD} reset $ctrlr > ${KERNEL_OUT}.10 + +set -e + +$rootdir/scripts/setup.sh + +$SPDK_BIN_DIR/spdk_tgt -m 0x3 & +spdk_tgt_pid=$! +trap 'kill -9 ${spdk_tgt_pid}; exit 1' SIGINT SIGTERM EXIT + +waitforlisten $spdk_tgt_pid + +$rpc_py bdev_nvme_attach_controller -b Nvme0 -t PCIe -a ${bdf} +$rpc_py bdev_nvme_cuse_register -n Nvme0 + +sleep 5 + +if [ ! -c /dev/spdk/nvme0 ]; then + return 1 +fi + +$rpc_py bdev_get_bdevs +$rpc_py bdev_nvme_get_controllers + +set +e + +ns="/dev/spdk/nvme0n1" +${NVME_CMD} get-ns-id $ns > ${CUSE_OUT}.1 +${NVME_CMD} id-ns $ns > ${CUSE_OUT}.2 +${NVME_CMD} list-ns $ns > ${CUSE_OUT}.3 + +ctrlr="/dev/spdk/nvme0" +${NVME_CMD} id-ctrl $ctrlr > ${CUSE_OUT}.4 +${NVME_CMD} list-ctrl $ctrlr > ${CUSE_OUT}.5 +if [ "$oacs_firmware" -ne "0" ]; then + ${NVME_CMD} fw-log $ctrlr > ${CUSE_OUT}.6 +fi +${NVME_CMD} smart-log $ctrlr +${NVME_CMD} error-log $ctrlr > ${CUSE_OUT}.7 +${NVME_CMD} get-feature $ctrlr -f 1 -s 1 -l 100 > ${CUSE_OUT}.8 +${NVME_CMD} get-log $ctrlr -i 1 -l 100 > ${CUSE_OUT}.9 +${NVME_CMD} reset $ctrlr > ${CUSE_OUT}.10 + +set -e + +for i in {1..10}; do + if [ -f "${KERNEL_OUT}.${i}" ] && [ -f "${CUSE_OUT}.${i}" ]; then + sed -i "s/${nvme_name}/nvme0/g" ${KERNEL_OUT}.${i} + diff --suppress-common-lines ${KERNEL_OUT}.${i} ${CUSE_OUT}.${i} + fi +done + +rm -Rf $testdir/match_files + +if [ ! -c "$ctrlr" ]; then + return 1 +fi + +trap - SIGINT SIGTERM EXIT +killprocess $spdk_tgt_pid diff --git a/src/spdk/test/nvme/cuse/spdk_smartctl_cuse.sh b/src/spdk/test/nvme/cuse/spdk_smartctl_cuse.sh new file mode 100755 index 000000000..a92ca1199 --- /dev/null +++ b/src/spdk/test/nvme/cuse/spdk_smartctl_cuse.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +SMARTCTL_CMD='smartctl -d nvme' +rpc_py=$rootdir/scripts/rpc.py + +bdf=$(get_first_nvme_bdf) + +PCI_WHITELIST="${bdf}" $rootdir/scripts/setup.sh reset +sleep 1 +nvme_name=$(get_nvme_ctrlr_from_bdf ${bdf}) +if [[ -z "$nvme_name" ]]; then + echo "setup.sh failed bind kernel driver to ${bdf}" + exit 1 +fi + +KERNEL_SMART_JSON=$(${SMARTCTL_CMD} --json=g -a /dev/${nvme_name} | grep -v "/dev/${nvme_name}" | sort || true) + +${SMARTCTL_CMD} -i /dev/${nvme_name}n1 + +# logs are not provided by json output +KERNEL_SMART_ERRLOG=$(${SMARTCTL_CMD} -l error /dev/${nvme_name}) + +$rootdir/scripts/setup.sh + +$SPDK_BIN_DIR/spdk_tgt -m 0x3 & +spdk_tgt_pid=$! +trap 'kill -9 ${spdk_tgt_pid}; exit 1' SIGINT SIGTERM EXIT + +waitforlisten $spdk_tgt_pid + +$rpc_py bdev_nvme_attach_controller -b Nvme0 -t PCIe -a ${bdf} +$rpc_py bdev_nvme_cuse_register -n Nvme0 + +sleep 5 + +if [ ! -c /dev/spdk/nvme0 ]; then + exit 1 +fi + +CUSE_SMART_JSON=$(${SMARTCTL_CMD} --json=g -a /dev/spdk/nvme0 | grep -v "/dev/spdk/nvme0" | sort || true) + +DIFF_SMART_JSON=$(diff --changed-group-format='%<' --unchanged-group-format='' <(echo "$KERNEL_SMART_JSON") <(echo "$CUSE_SMART_JSON") || true) + +# Mask values can change +ERR_SMART_JSON=$(grep -v "json\.nvme_smart_health_information_log\.\|json\.local_time\.\|json\.temperature\.\|json\.power_on_time\.hours" <<< $DIFF_SMART_JSON || true) + +if [ -n "$ERR_SMART_JSON" ]; then + echo "Wrong values for: $ERR_SMART_JSON" + exit 1 +fi + +CUSE_SMART_ERRLOG=$(${SMARTCTL_CMD} -l error /dev/spdk/nvme0) +if [ "$CUSE_SMART_ERRLOG" != "$KERNEL_SMART_ERRLOG" ]; then + echo "Wrong values in NVMe Error log" + exit 1 +fi + +# Data integity was checked before, now make sure other commads didn't fail +${SMARTCTL_CMD} -i /dev/spdk/nvme0n1 +${SMARTCTL_CMD} -c /dev/spdk/nvme0 +${SMARTCTL_CMD} -A /dev/spdk/nvme0 + +# Health test can fail +${SMARTCTL_CMD} -x /dev/spdk/nvme0 || true +${SMARTCTL_CMD} -H /dev/spdk/nvme0 || true + +$rpc_py bdev_nvme_detach_controller Nvme0 +sleep 1 +if [ -c /dev/spdk/nvme1 ]; then + exit 1 +fi + +trap - SIGINT SIGTERM EXIT +killprocess $spdk_tgt_pid diff --git a/src/spdk/test/nvme/deallocated_value/.gitignore b/src/spdk/test/nvme/deallocated_value/.gitignore new file mode 100644 index 000000000..8460e82ea --- /dev/null +++ b/src/spdk/test/nvme/deallocated_value/.gitignore @@ -0,0 +1 @@ +deallocated_value diff --git a/src/spdk/test/nvme/deallocated_value/Makefile b/src/spdk/test/nvme/deallocated_value/Makefile new file mode 100644 index 000000000..8277c32e0 --- /dev/null +++ b/src/spdk/test/nvme/deallocated_value/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = deallocated_value + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/deallocated_value/deallocated_value.c b/src/spdk/test/nvme/deallocated_value/deallocated_value.c new file mode 100644 index 000000000..91600e83e --- /dev/null +++ b/src/spdk/test/nvme/deallocated_value/deallocated_value.c @@ -0,0 +1,447 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" + +#define NUM_BLOCKS 100 + +/* + * The purpose of this sample app is to determine the read value of deallocated logical blocks + * from a given NVMe Controller. The NVMe 1.3 spec requires the controller to list this value, + * but controllers adhering to the NVMe 1.2 spec may not report this value. According to the spec, + * "The values read from a deallocated logical block and its metadata (excluding protection information) shall + * be all bytes set to 00h, all bytes set to FFh, or the last data written to the associated logical block". + */ + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct ns_entry *next; + struct spdk_nvme_qpair *qpair; +}; + +struct deallocate_context { + struct ns_entry *ns_entry; + char **write_buf; + char **read_buf; + char *zero_buf; + char *FFh_buf; + int writes_completed; + int reads_completed; + int deallocate_completed; + int flush_complete; + int matches_zeroes; + int matches_previous_data; + int matches_FFh; +}; + +static struct ns_entry *g_namespaces = NULL; + +static void cleanup(struct deallocate_context *context); + +static void +fill_random(char *buf, size_t num_bytes) +{ + size_t i; + + srand((unsigned) time(NULL)); + for (i = 0; i < num_bytes; i++) { + buf[i] = rand() % 0x100; + } +} + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->ns = ns; + entry->next = g_namespaces; + g_namespaces = entry; + + printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns) / 1000000000); +} + +static uint32_t +get_max_block_size(void) +{ + struct ns_entry *ns; + uint32_t max_block_size, temp_block_size; + + ns = g_namespaces; + max_block_size = 0; + + while (ns != NULL) { + temp_block_size = spdk_nvme_ns_get_sector_size(ns->ns); + max_block_size = temp_block_size > max_block_size ? temp_block_size : max_block_size; + ns = ns->next; + } + + return max_block_size; +} + +static void +write_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct deallocate_context *context = arg; + + context->writes_completed++; +} + +static void +read_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct deallocate_context *context = arg; + struct ns_entry *ns_entry = context->ns_entry; + int rc; + + rc = memcmp(context->write_buf[context->reads_completed], + context->read_buf[context->reads_completed], spdk_nvme_ns_get_sector_size(ns_entry->ns)); + if (rc == 0) { + context->matches_previous_data++; + } + + rc = memcmp(context->zero_buf, context->read_buf[context->reads_completed], + spdk_nvme_ns_get_sector_size(ns_entry->ns)); + if (rc == 0) { + context->matches_zeroes++; + } + + rc = memcmp(context->FFh_buf, context->read_buf[context->reads_completed], + spdk_nvme_ns_get_sector_size(ns_entry->ns)); + if (rc == 0) { + context->matches_FFh++; + } + context->reads_completed++; +} + +static void +deallocate_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct deallocate_context *context = arg; + + printf("blocks matching previous data: %d\n", context->matches_previous_data); + printf("blocks matching zeroes: %d\n", context->matches_zeroes); + printf("blocks matching 0xFF: %d\n", context->matches_FFh); + printf("Deallocating Blocks 0 to %d with random data.\n", NUM_BLOCKS - 1); + printf("On next read, read value will match deallocated block read value.\n"); + context->deallocate_completed = 1; + context->reads_completed = 0; + context->matches_previous_data = 0; + context->matches_zeroes = 0; + context->matches_FFh = 0; +} + +static void +flush_complete(void *arg, const struct spdk_nvme_cpl *completion) +{ + struct deallocate_context *context = arg; + + context->flush_complete = 1; +} + +static void +deallocate_test(void) +{ + struct ns_entry *ns_entry; + struct spdk_nvme_ctrlr *ctrlr; + const struct spdk_nvme_ctrlr_data *data; + struct deallocate_context context; + struct spdk_nvme_dsm_range range; + uint32_t max_block_size; + int rc, i; + + memset(&context, 0, sizeof(struct deallocate_context)); + max_block_size = get_max_block_size(); + ns_entry = g_namespaces; + + if (max_block_size > 0) { + context.zero_buf = malloc(max_block_size); + } else { + printf("Unable to determine max block size.\n"); + return; + } + + if (context.zero_buf == NULL) { + printf("could not allocate buffer for test.\n"); + return; + } + + context.FFh_buf = malloc(max_block_size); + if (context.FFh_buf == NULL) { + cleanup(&context); + printf("could not allocate buffer for test.\n"); + return; + } + + context.write_buf = calloc(NUM_BLOCKS, sizeof(char *)); + if (context.write_buf == NULL) { + cleanup(&context); + return; + } + + context.read_buf = calloc(NUM_BLOCKS, sizeof(char *)); + if (context.read_buf == NULL) { + printf("could not allocate buffer for test.\n"); + cleanup(&context); + return; + } + + memset(context.zero_buf, 0x00, max_block_size); + memset(context.FFh_buf, 0xFF, max_block_size); + + for (i = 0; i < NUM_BLOCKS; i++) { + context.write_buf[i] = spdk_zmalloc(0x1000, max_block_size, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (context.write_buf[i] == NULL) { + printf("could not allocate buffer for test.\n"); + cleanup(&context); + return; + } + + fill_random(context.write_buf[i], 0x1000); + context.read_buf[i] = spdk_zmalloc(0x1000, max_block_size, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + if (context.read_buf[i] == NULL) { + printf("could not allocate buffer for test.\n"); + cleanup(&context); + return; + } + } + + while (ns_entry != NULL) { + + ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, NULL, 0); + if (ns_entry->qpair == NULL) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed.\n"); + return; + } + + ctrlr = spdk_nvme_ns_get_ctrlr(ns_entry->ns); + data = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("\nController %-20.20s (%-20.20s)\n", data->mn, data->sn); + printf("Controller PCI vendor:%u PCI subsystem vendor:%u\n", data->vid, data->ssvid); + printf("Namespace Block Size:%u\n", spdk_nvme_ns_get_sector_size(ns_entry->ns)); + printf("Writing Blocks 0 to %d with random data.\n", NUM_BLOCKS - 1); + printf("On next read, read value will match random data.\n"); + + context.ns_entry = ns_entry; + + for (i = 0; i < NUM_BLOCKS; i++) { + rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, context.write_buf[i], + i, + 1, + write_complete, &context, 0); + if (rc) { + printf("Error in nvme command completion, values may be inaccurate.\n"); + } + } + while (context.writes_completed < NUM_BLOCKS) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + spdk_nvme_ns_cmd_flush(ns_entry->ns, ns_entry->qpair, flush_complete, &context); + while (!context.flush_complete) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + for (i = 0; i < NUM_BLOCKS; i++) { + rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, context.read_buf[i], + i, /* LBA start */ + 1, /* number of LBAs */ + read_complete, &context, 0); + if (rc) { + printf("Error in nvme command completion, values may be inaccurate.\n"); + } + + /* block after each read command so that we can match the block to the write buffer. */ + while (context.reads_completed <= i) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + } + + context.flush_complete = 0; + range.length = NUM_BLOCKS; + range.starting_lba = 0; + rc = spdk_nvme_ns_cmd_dataset_management(ns_entry->ns, ns_entry->qpair, + SPDK_NVME_DSM_ATTR_DEALLOCATE, &range, 1, deallocate_complete, &context); + if (rc) { + printf("Error in nvme command completion, values may be inaccurate.\n"); + } + + while (!context.deallocate_completed) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + + for (i = 0; i < NUM_BLOCKS; i++) { + rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, context.read_buf[i], + i, /* LBA start */ + 1, /* number of LBAs */ + read_complete, &context, 0); + if (rc) { + printf("Error in nvme command completion, values may be inaccurate.\n"); + } + while (context.reads_completed <= i) { + spdk_nvme_qpair_process_completions(ns_entry->qpair, 0); + } + } + + printf("blocks matching previous data: %d\n", context.matches_previous_data); + printf("blocks matching zeroes: %d\n", context.matches_zeroes); + printf("blocks matching FFh: %d\n", context.matches_FFh); + + /* reset counters in between each namespace. */ + context.matches_previous_data = 0; + context.matches_zeroes = 0; + context.matches_FFh = 0; + context.writes_completed = 0; + context.reads_completed = 0; + context.deallocate_completed = 0; + + spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair); + ns_entry = ns_entry->next; + } + cleanup(&context); +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + int num_ns; + struct spdk_nvme_ns *ns; + + printf("Attached to %s\n", trid->traddr); + /* + * Use only the first namespace from each controller since we are testing controller level functionality. + */ + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + if (num_ns < 1) { + printf("No valid namespaces in controller\n"); + } else { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1); + register_ns(ctrlr, ns); + } +} + +static void +cleanup(struct deallocate_context *context) +{ + struct ns_entry *ns_entry = g_namespaces; + int i; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + free(ns_entry); + ns_entry = next; + } + for (i = 0; i < NUM_BLOCKS; i++) { + if (context->write_buf && context->write_buf[i]) { + spdk_free(context->write_buf[i]); + } else { + break; + } + if (context->read_buf && context->read_buf[i]) { + spdk_free(context->read_buf[i]); + } else { + break; + } + } + + free(context->write_buf); + free(context->read_buf); + free(context->zero_buf); + free(context->FFh_buf); +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "deallocate_test"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Initializing NVMe Controllers\n"); + + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_namespaces == NULL) { + fprintf(stderr, "no NVMe controllers found\n"); + return 1; + } + + printf("Initialization complete.\n"); + deallocate_test(); + return 0; +} diff --git a/src/spdk/test/nvme/e2edp/.gitignore b/src/spdk/test/nvme/e2edp/.gitignore new file mode 100644 index 000000000..df0958204 --- /dev/null +++ b/src/spdk/test/nvme/e2edp/.gitignore @@ -0,0 +1 @@ +nvme_dp diff --git a/src/spdk/test/nvme/e2edp/Makefile b/src/spdk/test/nvme/e2edp/Makefile new file mode 100644 index 000000000..576262269 --- /dev/null +++ b/src/spdk/test/nvme/e2edp/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = nvme_dp + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/e2edp/nvme_dp.c b/src/spdk/test/nvme/e2edp/nvme_dp.c new file mode 100644 index 000000000..9559001e8 --- /dev/null +++ b/src/spdk/test/nvme/e2edp/nvme_dp.c @@ -0,0 +1,652 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NVMe end-to-end data protection test + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/crc16.h" +#include "spdk/endian.h" +#include "spdk/memory.h" + +#define MAX_DEVS 64 + +#define DATA_PATTERN 0x5A + +struct dev { + struct spdk_nvme_ctrlr *ctrlr; + char name[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int io_complete_flag = 0; + +struct io_request { + void *contig; + void *metadata; + bool use_extended_lba; + bool use_sgl; + uint32_t sgl_offset; + uint32_t buf_size; + uint64_t lba; + uint32_t lba_count; + uint16_t apptag_mask; + uint16_t apptag; +}; + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + io_complete_flag = 2; + } else { + io_complete_flag = 1; + } +} + +static void +ns_data_buffer_reset(struct spdk_nvme_ns *ns, struct io_request *req, uint8_t data_pattern) +{ + uint32_t md_size, sector_size; + uint32_t i, offset = 0; + uint8_t *buf; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + + for (i = 0; i < req->lba_count; i++) { + if (req->use_extended_lba) { + offset = (sector_size + md_size) * i; + } else { + offset = sector_size * i; + } + + buf = (uint8_t *)req->contig + offset; + memset(buf, data_pattern, sector_size); + } +} + +static void nvme_req_reset_sgl(void *cb_arg, uint32_t sgl_offset) +{ + struct io_request *req = (struct io_request *)cb_arg; + + req->sgl_offset = sgl_offset; + return; +} + +static int nvme_req_next_sge(void *cb_arg, void **address, uint32_t *length) +{ + struct io_request *req = (struct io_request *)cb_arg; + void *payload; + + payload = req->contig + req->sgl_offset; + *address = payload; + + *length = req->buf_size - req->sgl_offset; + + return 0; +} + +/* CRC-16 Guard checked for extended lba format */ +static uint32_t dp_guard_check_extended_lba_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + struct spdk_nvme_protection_info *pi; + uint32_t md_size, sector_size; + + req->lba_count = 2; + + /* extended LBA only for the test case */ + if (!(spdk_nvme_ns_supports_extended_lba(ns))) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc((sector_size + md_size) * req->lba_count, 0x1000, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + assert(req->contig); + + req->lba = 0; + req->use_extended_lba = true; + req->use_sgl = true; + req->buf_size = (sector_size + md_size) * req->lba_count; + req->metadata = NULL; + ns_data_buffer_reset(ns, req, DATA_PATTERN); + pi = (struct spdk_nvme_protection_info *)(req->contig + sector_size + md_size - 8); + /* big-endian for guard */ + to_be16(&pi->guard, spdk_crc16_t10dif(0, req->contig, sector_size)); + + pi = (struct spdk_nvme_protection_info *)(req->contig + (sector_size + md_size) * 2 - 8); + to_be16(&pi->guard, spdk_crc16_t10dif(0, req->contig + sector_size + md_size, sector_size)); + + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + + return req->lba_count; +} + +/* + * No protection information with PRACT setting to 1, + * both extended LBA format and separate metadata can + * run the test case. + */ +static uint32_t dp_with_pract_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + uint32_t md_size, sector_size, data_len; + + req->lba_count = 8; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + if (md_size == 8) { + /* No additional metadata buffer provided */ + data_len = sector_size * req->lba_count; + } else { + data_len = (sector_size + md_size) * req->lba_count; + } + req->contig = spdk_zmalloc(data_len, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->contig); + + req->metadata = spdk_zmalloc(md_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->metadata); + + switch (spdk_nvme_ns_get_pi_type(ns)) { + case SPDK_NVME_FMT_NVM_PROTECTION_TYPE3: + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD | SPDK_NVME_IO_FLAGS_PRACT; + break; + case SPDK_NVME_FMT_NVM_PROTECTION_TYPE1: + case SPDK_NVME_FMT_NVM_PROTECTION_TYPE2: + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD | SPDK_NVME_IO_FLAGS_PRCHK_REFTAG | + SPDK_NVME_IO_FLAGS_PRACT; + break; + default: + *io_flags = 0; + break; + } + + req->lba = 0; + req->use_extended_lba = false; + + return req->lba_count; +} + +/* Block Reference Tag checked for TYPE1 and TYPE2 with PRACT setting to 0 */ +static uint32_t dp_without_pract_extended_lba_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + struct spdk_nvme_protection_info *pi; + uint32_t md_size, sector_size; + + req->lba_count = 2; + + switch (spdk_nvme_ns_get_pi_type(ns)) { + case SPDK_NVME_FMT_NVM_PROTECTION_TYPE3: + return 0; + default: + break; + } + + /* extended LBA only for the test case */ + if (!(spdk_nvme_ns_supports_extended_lba(ns))) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc((sector_size + md_size) * req->lba_count, 0x1000, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + assert(req->contig); + + req->lba = 0; + req->use_extended_lba = true; + req->metadata = NULL; + pi = (struct spdk_nvme_protection_info *)(req->contig + sector_size + md_size - 8); + /* big-endian for reference tag */ + to_be32(&pi->ref_tag, (uint32_t)req->lba); + + pi = (struct spdk_nvme_protection_info *)(req->contig + (sector_size + md_size) * 2 - 8); + /* is incremented for each subsequent logical block */ + to_be32(&pi->ref_tag, (uint32_t)(req->lba + 1)); + + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + + return req->lba_count; +} + +/* LBA + Metadata without data protection bits setting */ +static uint32_t dp_without_flags_extended_lba_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + uint32_t md_size, sector_size; + + req->lba_count = 16; + + /* extended LBA only for the test case */ + if (!(spdk_nvme_ns_supports_extended_lba(ns))) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc((sector_size + md_size) * req->lba_count, 0x1000, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + assert(req->contig); + + req->lba = 0; + req->use_extended_lba = true; + req->metadata = NULL; + *io_flags = 0; + + return req->lba_count; +} + +/* Block Reference Tag checked for TYPE1 and TYPE2 with PRACT setting to 0 */ +static uint32_t dp_without_pract_separate_meta_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + struct spdk_nvme_protection_info *pi; + uint32_t md_size, sector_size; + + req->lba_count = 2; + + switch (spdk_nvme_ns_get_pi_type(ns)) { + case SPDK_NVME_FMT_NVM_PROTECTION_TYPE3: + return 0; + default: + break; + } + + /* separate metadata payload for the test case */ + if (spdk_nvme_ns_supports_extended_lba(ns)) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc(sector_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->contig); + + req->metadata = spdk_zmalloc(md_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->metadata); + + req->lba = 0; + req->use_extended_lba = false; + + /* last 8 bytes if the metadata size bigger than 8 */ + pi = (struct spdk_nvme_protection_info *)(req->metadata + md_size - 8); + /* big-endian for reference tag */ + to_be32(&pi->ref_tag, (uint32_t)req->lba); + + pi = (struct spdk_nvme_protection_info *)(req->metadata + md_size * 2 - 8); + /* is incremented for each subsequent logical block */ + to_be32(&pi->ref_tag, (uint32_t)(req->lba + 1)); + + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + + return req->lba_count; +} + +/* Application Tag checked with PRACT setting to 0 */ +static uint32_t dp_without_pract_separate_meta_apptag_test(struct spdk_nvme_ns *ns, + struct io_request *req, + uint32_t *io_flags) +{ + struct spdk_nvme_protection_info *pi; + uint32_t md_size, sector_size; + + req->lba_count = 1; + + /* separate metadata payload for the test case */ + if (spdk_nvme_ns_supports_extended_lba(ns)) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc(sector_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->contig); + + req->metadata = spdk_zmalloc(md_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->metadata); + + req->lba = 0; + req->use_extended_lba = false; + req->apptag_mask = 0xFFFF; + req->apptag = req->lba_count; + + /* last 8 bytes if the metadata size bigger than 8 */ + pi = (struct spdk_nvme_protection_info *)(req->metadata + md_size - 8); + to_be16(&pi->app_tag, req->lba_count); + + *io_flags = SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + + return req->lba_count; +} + +/* + * LBA + Metadata without data protection bits setting, + * separate metadata payload for the test case. + */ +static uint32_t dp_without_flags_separate_meta_test(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *io_flags) +{ + uint32_t md_size, sector_size; + + req->lba_count = 16; + + /* separate metadata payload for the test case */ + if (spdk_nvme_ns_supports_extended_lba(ns)) { + return 0; + } + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + req->contig = spdk_zmalloc(sector_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->contig); + + req->metadata = spdk_zmalloc(md_size * req->lba_count, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + assert(req->metadata); + + req->lba = 0; + req->use_extended_lba = false; + *io_flags = 0; + + return req->lba_count; +} + +typedef uint32_t (*nvme_build_io_req_fn_t)(struct spdk_nvme_ns *ns, struct io_request *req, + uint32_t *lba_count); + +static void +free_req(struct io_request *req) +{ + if (req == NULL) { + return; + } + + if (req->contig) { + spdk_free(req->contig); + } + + if (req->metadata) { + spdk_free(req->metadata); + } + + spdk_free(req); +} + +static int +ns_data_buffer_compare(struct spdk_nvme_ns *ns, struct io_request *req, uint8_t data_pattern) +{ + uint32_t md_size, sector_size; + uint32_t i, j, offset = 0; + uint8_t *buf; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + + for (i = 0; i < req->lba_count; i++) { + if (req->use_extended_lba) { + offset = (sector_size + md_size) * i; + } else { + offset = sector_size * i; + } + + buf = (uint8_t *)req->contig + offset; + for (j = 0; j < sector_size; j++) { + if (buf[j] != data_pattern) { + return -1; + } + } + } + + return 0; +} + +static int +write_read_e2e_dp_tests(struct dev *dev, nvme_build_io_req_fn_t build_io_fn, const char *test_name) +{ + int rc = 0; + uint32_t lba_count; + uint32_t io_flags = 0; + + struct io_request *req; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + const struct spdk_nvme_ns_data *nsdata; + + ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, 1); + if (!ns) { + printf("Null namespace\n"); + return 0; + } + + if (!(spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED)) { + return 0; + } + + nsdata = spdk_nvme_ns_get_data(ns); + if (!nsdata || !spdk_nvme_ns_get_sector_size(ns)) { + fprintf(stderr, "Empty nsdata or wrong sector size\n"); + return -EINVAL; + } + + req = spdk_zmalloc(sizeof(*req), 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + assert(req); + + /* IO parameters setting */ + lba_count = build_io_fn(ns, req, &io_flags); + if (!lba_count) { + printf("%s: %s bypass the test case\n", dev->name, test_name); + free_req(req); + return 0; + } + + qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0); + if (!qpair) { + free_req(req); + return -1; + } + + ns_data_buffer_reset(ns, req, DATA_PATTERN); + if (req->use_extended_lba && req->use_sgl) { + rc = spdk_nvme_ns_cmd_writev(ns, qpair, req->lba, lba_count, io_complete, req, io_flags, + nvme_req_reset_sgl, nvme_req_next_sge); + } else if (req->use_extended_lba) { + rc = spdk_nvme_ns_cmd_write(ns, qpair, req->contig, req->lba, lba_count, + io_complete, req, io_flags); + } else { + rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, req->contig, req->metadata, req->lba, lba_count, + io_complete, req, io_flags, req->apptag_mask, req->apptag); + } + + if (rc != 0) { + fprintf(stderr, "%s: %s write submit failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + io_complete_flag = 0; + + while (!io_complete_flag) { + spdk_nvme_qpair_process_completions(qpair, 1); + } + + if (io_complete_flag != 1) { + fprintf(stderr, "%s: %s write exec failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + /* reset completion flag */ + io_complete_flag = 0; + + ns_data_buffer_reset(ns, req, 0); + if (req->use_extended_lba && req->use_sgl) { + rc = spdk_nvme_ns_cmd_readv(ns, qpair, req->lba, lba_count, io_complete, req, io_flags, + nvme_req_reset_sgl, nvme_req_next_sge); + + } else if (req->use_extended_lba) { + rc = spdk_nvme_ns_cmd_read(ns, qpair, req->contig, req->lba, lba_count, + io_complete, req, io_flags); + } else { + rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, req->contig, req->metadata, req->lba, lba_count, + io_complete, req, io_flags, req->apptag_mask, req->apptag); + } + + if (rc != 0) { + fprintf(stderr, "%s: %s read failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + while (!io_complete_flag) { + spdk_nvme_qpair_process_completions(qpair, 1); + } + + if (io_complete_flag != 1) { + fprintf(stderr, "%s: %s read failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + rc = ns_data_buffer_compare(ns, req, DATA_PATTERN); + if (rc < 0) { + fprintf(stderr, "%s: %s write/read success, but memcmp Failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + printf("%s: %s test passed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &devs[num_devs++]; + + dev->ctrlr = ctrlr; + + snprintf(dev->name, sizeof(dev->name), "%s", + trid->traddr); + + printf("Attached to %s\n", dev->name); +} + +int main(int argc, char **argv) +{ + struct dev *iter; + int rc, i; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "nvme_dp"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("NVMe Write/Read with End-to-End data protection test\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "nvme_probe() failed\n"); + exit(1); + } + + rc = 0; + foreach_dev(iter) { +#define TEST(x) write_read_e2e_dp_tests(iter, x, #x) + if (TEST(dp_with_pract_test) + || TEST(dp_guard_check_extended_lba_test) + || TEST(dp_without_pract_extended_lba_test) + || TEST(dp_without_flags_extended_lba_test) + || TEST(dp_without_pract_separate_meta_test) + || TEST(dp_without_pract_separate_meta_apptag_test) + || TEST(dp_without_flags_separate_meta_test)) { +#undef TEST + rc = 1; + printf("%s: failed End-to-End data protection tests\n", iter->name); + } + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + + spdk_nvme_detach(dev->ctrlr); + } + + return rc; +} diff --git a/src/spdk/test/nvme/err_injection/.gitignore b/src/spdk/test/nvme/err_injection/.gitignore new file mode 100644 index 000000000..3572a8e78 --- /dev/null +++ b/src/spdk/test/nvme/err_injection/.gitignore @@ -0,0 +1 @@ +err_injection diff --git a/src/spdk/test/nvme/err_injection/Makefile b/src/spdk/test/nvme/err_injection/Makefile new file mode 100644 index 000000000..579fb5440 --- /dev/null +++ b/src/spdk/test/nvme/err_injection/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = err_injection + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/err_injection/err_injection.c b/src/spdk/test/nvme/err_injection/err_injection.c new file mode 100644 index 000000000..73b42786b --- /dev/null +++ b/src/spdk/test/nvme/err_injection/err_injection.c @@ -0,0 +1,279 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/env.h" + +#define MAX_DEVS 64 + +struct dev { + bool error_expected; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + void *data; + char name[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int outstanding_commands = 0; +static int failed = 0; + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + uint32_t nsid; + + /* add to dev list */ + dev = &devs[num_devs++]; + if (num_devs >= MAX_DEVS) { + return; + } + + dev->ctrlr = ctrlr; + nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); + dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + + dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); + if (dev->qpair == NULL) { + failed = 1; + return; + } + + snprintf(dev->name, sizeof(dev->name), "%s", + trid->traddr); + + printf("Attached to %s\n", dev->name); +} + +static void +get_feature_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + outstanding_commands--; + + if (spdk_nvme_cpl_is_error(cpl) && dev->error_expected) { + if (cpl->status.sct != SPDK_NVME_SCT_GENERIC || + cpl->status.sc != SPDK_NVME_SC_INVALID_FIELD) { + failed = 1; + } + printf("%s: get features failed as expected\n", dev->name); + return; + } + + if (!spdk_nvme_cpl_is_error(cpl) && !dev->error_expected) { + printf("%s: get features successfully as expected\n", dev->name); + return; + } + + failed = 1; +} + +static void +get_feature_test(bool error_expected) +{ + struct dev *dev; + struct spdk_nvme_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_GET_FEATURES; + cmd.cdw10_bits.get_features.fid = SPDK_NVME_FEAT_NUMBER_OF_QUEUES; + + foreach_dev(dev) { + dev->error_expected = error_expected; + if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, + get_feature_test_cb, dev) != 0) { + printf("Error: failed to send Get Features command for dev=%p\n", dev); + failed = 1; + goto cleanup; + } + outstanding_commands++; + } + +cleanup: + + while (outstanding_commands) { + foreach_dev(dev) { + spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr); + } + } +} + +static void +read_test_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + struct dev *dev = cb_arg; + + outstanding_commands--; + spdk_free(dev->data); + + if (spdk_nvme_cpl_is_error(cpl) && dev->error_expected) { + if (cpl->status.sct != SPDK_NVME_SCT_MEDIA_ERROR || + cpl->status.sc != SPDK_NVME_SC_UNRECOVERED_READ_ERROR) { + failed = 1; + } + printf("%s: read failed as expected\n", dev->name); + return; + } + + if (!spdk_nvme_cpl_is_error(cpl) && !dev->error_expected) { + printf("%s: read successfully as expected\n", dev->name); + return; + } + + failed = 1; +} + +static void +read_test(bool error_expected) +{ + struct dev *dev; + + foreach_dev(dev) { + if (dev->ns == NULL) { + continue; + } + + dev->error_expected = error_expected; + dev->data = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!dev->data) { + failed = 1; + goto cleanup; + } + + if (spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, dev->data, + 0, 1, read_test_cb, dev, 0) != 0) { + printf("Error: failed to send Read command for dev=%p\n", dev); + failed = 1; + goto cleanup; + } + + outstanding_commands++; + } + +cleanup: + + while (outstanding_commands) { + foreach_dev(dev) { + spdk_nvme_qpair_process_completions(dev->qpair, 0); + } + } +} + +int main(int argc, char **argv) +{ + struct dev *dev; + int i; + struct spdk_env_opts opts; + int rc; + + spdk_env_opts_init(&opts); + opts.name = "err_injection"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("NVMe Error Injection test\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (failed) { + goto exit; + } + + if (!num_devs) { + printf("No NVMe controller found, %s exiting\n", argv[0]); + return 1; + } + + foreach_dev(dev) { + /* Admin error injection at submission path */ + rc = spdk_nvme_qpair_add_cmd_error_injection(dev->ctrlr, NULL, + SPDK_NVME_OPC_GET_FEATURES, true, 5000, 1, + SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_INVALID_FIELD); + failed += rc; + /* IO error injection at completion path */ + rc = spdk_nvme_qpair_add_cmd_error_injection(dev->ctrlr, dev->qpair, + SPDK_NVME_OPC_READ, false, 0, 1, + SPDK_NVME_SCT_MEDIA_ERROR, SPDK_NVME_SC_UNRECOVERED_READ_ERROR); + failed += rc; + } + + if (failed) { + goto exit; + } + + /* Admin Get Feature, expect error return */ + get_feature_test(true); + /* Admin Get Feature, expect successful return */ + get_feature_test(false); + /* Read, expect error return */ + read_test(true); + /* Read, expect successful return */ + read_test(false); + +exit: + printf("Cleaning up...\n"); + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return failed; +} diff --git a/src/spdk/test/nvme/hotplug.sh b/src/spdk/test/nvme/hotplug.sh new file mode 100755 index 000000000..13011e193 --- /dev/null +++ b/src/spdk/test/nvme/hotplug.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/test/common/autotest_common.sh + +if [ -z "${DEPENDENCY_DIR}" ]; then + echo DEPENDENCY_DIR not defined! + exit 1 +fi + +function ssh_vm() { + xtrace_disable + sshpass -p "$password" ssh -o PubkeyAuthentication=no \ + -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 10022 root@localhost "$@" + xtrace_restore +} + +function monitor_cmd() { + echo "$@" | nc localhost 4444 | tail --lines=+2 | (grep -v '^(qemu) ' || true) +} + +function get_online_devices_count() { + ssh_vm "lspci | grep -c NVM" +} + +function wait_for_devices_ready() { + count=$(get_online_devices_count) + + while [ $count -ne 4 ]; do + echo "waitting for all devices online" + count=$(get_online_devices_count) + done +} + +function insert_devices() { + for i in {0..3}; do + monitor_cmd "device_add nvme,drive=drive$i,id=nvme$i,serial=nvme$i" + done + wait_for_devices_ready + ssh_vm "scripts/setup.sh" +} + +function remove_devices() { + for i in {0..3}; do + monitor_cmd "device_del nvme$i" + done +} + +function devices_delete() { + for i in {0..3}; do + rm "$SPDK_TEST_STORAGE/nvme$i.img" + done +} + +password=$1 +base_img=${DEPENDENCY_DIR}/fedora-hotplug.qcow2 +test_img=${DEPENDENCY_DIR}/fedora-hotplug-test.qcow2 +qemu_pidfile=${DEPENDENCY_DIR}/qemupid + +if [ ! -e "$base_img" ]; then + echo "Hotplug VM image not found; skipping test" + exit 0 +fi + +timing_enter start_qemu + +qemu-img create -b "$base_img" -f qcow2 "$test_img" + +for i in {0..3}; do + dd if=/dev/zero of="$SPDK_TEST_STORAGE/nvme$i.img" bs=1M count=1024 +done + +qemu-system-x86_64 \ + -daemonize -display none -m 8192 \ + -pidfile "$qemu_pidfile" \ + -hda "$test_img" \ + -net user,hostfwd=tcp::10022-:22 \ + -net nic \ + -cpu host \ + -smp cores=16,sockets=1 \ + --enable-kvm \ + -chardev socket,id=mon0,host=localhost,port=4444,server,nowait \ + -mon chardev=mon0,mode=readline \ + -drive format=raw,file="$SPDK_TEST_STORAGE/nvme0.img",if=none,id=drive0 \ + -drive format=raw,file="$SPDK_TEST_STORAGE/nvme1.img",if=none,id=drive1 \ + -drive format=raw,file="$SPDK_TEST_STORAGE/nvme2.img",if=none,id=drive2 \ + -drive format=raw,file="$SPDK_TEST_STORAGE/nvme3.img",if=none,id=drive3 + +timing_exit start_qemu + +timing_enter wait_for_vm +ssh_vm 'echo ready' +timing_exit wait_for_vm + +timing_enter copy_repo +files_to_copy="scripts " +files_to_copy+="include/spdk/pci_ids.h " +files_to_copy+="build/examples/hotplug " +files_to_copy+="build/lib " +files_to_copy+="dpdk/build/lib " +( + cd "$rootdir" + tar -cf - $files_to_copy +) | (ssh_vm "tar -xf -") +timing_exit copy_repo + +insert_devices + +timing_enter hotplug_test + +ssh_vm "LD_LIBRARY_PATH=/root//build/lib:/root/dpdk/build/lib:$LD_LIBRARY_PATH build/examples/hotplug -i 0 -t 25 -n 4 -r 8" & +example_pid=$! + +sleep 6 +remove_devices +sleep 4 +insert_devices +sleep 6 +remove_devices +devices_delete + +timing_enter wait_for_example +wait $example_pid +timing_exit wait_for_example + +trap - SIGINT SIGTERM EXIT + +qemupid=$(awk '{printf $0}' "$qemu_pidfile") +kill -9 $qemupid +rm "$qemu_pidfile" +rm "$test_img" + +timing_exit hotplug_test diff --git a/src/spdk/test/nvme/hw_hotplug.sh b/src/spdk/test/nvme/hw_hotplug.sh new file mode 100755 index 000000000..ba9c59463 --- /dev/null +++ b/src/spdk/test/nvme/hw_hotplug.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/test/common/autotest_common.sh + +export SPDK_LIB_DIR="$rootdir/build/lib" +export DPDK_LIB_DIR="$rootdir/dpdk/build/lib" +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SPDK_LIB_DIR:$DPDK_LIB_DIR + +function insert_device() { + ssh root@$ip 'Beetle --SetGpio "$gpio" HIGH' + waitforblk $name + DRIVER_OVERRIDE=$driver $rootdir/scripts/setup.sh +} + +function remove_device() { + ssh root@$ip 'Beetle --SetGpio "$gpio" LOW' +} + +ip=$1 +gpio=$2 +driver=$3 +declare -i io_time=5 +declare -i kernel_hotplug_time=7 + +timing_enter hotplug_hw_cfg + +# Configure microcontroller +ssh root@$ip 'Beetle --SetGpioDirection "$gpio" OUT' + +# Get blk dev name connected to interposer +ssh root@$ip 'Beetle --SetGpio "$gpio" HIGH' +sleep $kernel_hotplug_time +$rootdir/scripts/setup.sh reset +blk_list1=$(lsblk -d --output NAME | grep "^nvme") +remove_device +sleep $kernel_hotplug_time +blk_list2=$(lsblk -d --output NAME | grep "^nvme") || true +name=${blk_list1#"$blk_list2"} + +insert_device + +timing_exit hotplug_hw_cfg + +timing_enter hotplug_hw_test + +$SPDK_EXAMPLE_DIR/hotplug -i 0 -t 100 -n 2 -r 2 2>&1 | tee -a log.txt & +example_pid=$! +trap 'killprocess $example_pid; exit 1' SIGINT SIGTERM EXIT + +i=0 +while ! grep "Starting I/O" log.txt; do + [ $i -lt 20 ] || break + i=$((i + 1)) + sleep 1 +done + +if ! grep "Starting I/O" log.txt; then + return 1 +fi + +# Add and remove NVMe with delays between to give some time for IO to proceed +remove_device +sleep $io_time +insert_device +sleep $io_time +remove_device +sleep $io_time +insert_device +sleep $io_time + +timing_enter wait_for_example +wait $example_pid +timing_exit wait_for_example + +trap - SIGINT SIGTERM EXIT + +timing_exit hotplug_hw_test diff --git a/src/spdk/test/nvme/nvme.sh b/src/spdk/test/nvme/nvme.sh new file mode 100755 index 000000000..74ba496cb --- /dev/null +++ b/src/spdk/test/nvme/nvme.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +function nvme_identify() { + $SPDK_EXAMPLE_DIR/identify -i 0 + for bdf in $(get_nvme_bdfs); do + $SPDK_EXAMPLE_DIR/identify -r "trtype:PCIe traddr:${bdf}" -i 0 + done + timing_exit identify +} + +function nvme_perf() { + # enable no shutdown notification option + $SPDK_EXAMPLE_DIR/perf -q 128 -w read -o 12288 -t 1 -LL -i 0 -N + $SPDK_EXAMPLE_DIR/perf -q 128 -w write -o 12288 -t 1 -LL -i 0 + if [ -b /dev/ram0 ]; then + # Test perf with AIO device + $SPDK_EXAMPLE_DIR/perf /dev/ram0 -q 128 -w read -o 12288 -t 1 -LL -i 0 + fi +} + +function nvme_fio_test() { + PLUGIN_DIR=$rootdir/examples/nvme/fio_plugin + ran_fio=false + for bdf in $(get_nvme_bdfs); do + if $SPDK_EXAMPLE_DIR/identify -r "trtype:PCIe traddr:${bdf}" | grep -E "^Number of Namespaces" - | grep -q "0" -; then + continue + fi + fio_nvme $PLUGIN_DIR/example_config.fio --filename="trtype=PCIe traddr=${bdf//:/.}" + ran_fio=true + done + $ran_fio || (echo "No valid NVMe drive found. Failing test." && false) +} + +function nvme_multi_secondary() { + $SPDK_EXAMPLE_DIR/perf -i 0 -q 16 -w read -o 4096 -t 3 -c 0x1 & + pid0=$! + $SPDK_EXAMPLE_DIR/perf -i 0 -q 16 -w read -o 4096 -t 3 -c 0x2 & + pid1=$! + $SPDK_EXAMPLE_DIR/perf -i 0 -q 16 -w read -o 4096 -t 3 -c 0x4 + wait $pid0 + wait $pid1 +} + +if [ $(uname) = Linux ]; then + # check that our setup.sh script does not bind NVMe devices to uio/vfio if they + # have an active mountpoint + $rootdir/scripts/setup.sh reset + # give kernel nvme driver some time to create the block devices before we start looking for them + sleep 1 + blkname='' + # first, find an NVMe device that does not have an active mountpoint already; + # this covers rare case where someone is running this test script on a system + # that has a mounted NVMe filesystem + # + # note: more work probably needs to be done to properly handle devices with multiple + # namespaces + for bdf in $(get_nvme_bdfs); do + for name in $(get_nvme_name_from_bdf $bdf); do + if [ "$name" != "" ]; then + mountpoints=$(lsblk /dev/$name --output MOUNTPOINT -n | wc -w) + if [ "$mountpoints" = "0" ]; then + blkname=$name + break 2 + fi + fi + done + done + + # if we found an NVMe block device without an active mountpoint, create and mount + # a filesystem on it for purposes of testing the setup.sh script + if [ "$blkname" != "" ]; then + parted -s /dev/$blkname mklabel gpt + # just create a 100MB partition - this tests our ability to detect mountpoints + # on partitions of the device, not just the device itself; it also is faster + # since we don't trim and initialize the whole namespace + parted -s /dev/$blkname mkpart primary 1 100 + sleep 1 + mkfs.ext4 -F /dev/${blkname}p1 + mkdir -p /tmp/nvmetest + mount /dev/${blkname}p1 /tmp/nvmetest + sleep 1 + $rootdir/scripts/setup.sh + driver=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) + # check that the nvme driver is still loaded against the device + if [ "$driver" != "nvme" ]; then + exit 1 + fi + umount /tmp/nvmetest + rmdir /tmp/nvmetest + # write zeroes to the device to blow away the partition table and filesystem + dd if=/dev/zero of=/dev/$blkname oflag=direct bs=1M count=1 + $rootdir/scripts/setup.sh + driver=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) + # check that the nvme driver is not loaded against the device + if [ "$driver" = "nvme" ]; then + exit 1 + fi + else + $rootdir/scripts/setup.sh + fi +fi + +if [ $(uname) = Linux ]; then + trap "kill_stub -9; exit 1" SIGINT SIGTERM EXIT + start_stub "-s 4096 -i 0 -m 0xE" +fi + +run_test "nvme_reset" $testdir/reset/reset -q 64 -w write -s 4096 -t 5 +run_test "nvme_identify" nvme_identify +run_test "nvme_perf" nvme_perf +run_test "nvme_hello_world" $SPDK_EXAMPLE_DIR/hello_world +run_test "nvme_deallocated_value" $testdir/deallocated_value/deallocated_value +run_test "nvme_sgl" $testdir/sgl/sgl +run_test "nvme_e2edp" $testdir/e2edp/nvme_dp +run_test "nvme_reserve" $testdir/reserve/reserve +run_test "nvme_err_injection" $testdir/err_injection/err_injection +run_test "nvme_overhead" $testdir/overhead/overhead -s 4096 -t 1 -H +run_test "nvme_arbitration" $SPDK_EXAMPLE_DIR/arbitration -t 3 -i 0 + +if [ $(uname) != "FreeBSD" ]; then + run_test "nvme_startup" $testdir/startup/startup -t 1000000 + run_test "nvme_multi_secondary" nvme_multi_secondary + trap - SIGINT SIGTERM EXIT + kill_stub +fi + +if [[ $CONFIG_FIO_PLUGIN == y ]]; then + run_test "nvme_fio" nvme_fio_test +fi diff --git a/src/spdk/test/nvme/nvme_opal.sh b/src/spdk/test/nvme/nvme_opal.sh new file mode 100755 index 000000000..1aee2be5a --- /dev/null +++ b/src/spdk/test/nvme/nvme_opal.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash + +set -e + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +rpc_py="$rootdir/scripts/rpc.py" +source "$rootdir/scripts/common.sh" +source "$rootdir/test/common/autotest_common.sh" + +# The OPAL CI tests is only used for P4510 devices. +mapfile -t bdfs < <(get_nvme_bdfs_by_id 0x0a59) +if [[ -z ${bdfs[0]} ]]; then + echo "No P4510 device found, exit the tests" + exit 1 +fi + +bdf=${bdfs[0]} + +function opal_revert_and_init() { + $SPDK_BIN_DIR/spdk_tgt & + spdk_tgt_pid=$! + waitforlisten $spdk_tgt_pid + + $rootdir/scripts/rpc.py bdev_nvme_attach_controller -b "nvme0" -t "pcie" -a ${bdf} + # Ignore if this fails. + $rootdir/scripts/rpc.py bdev_nvme_opal_revert -b nvme0 -p test || true + sleep 1 + $rpc_py bdev_nvme_opal_init -b nvme0 -p test + $rpc_py bdev_nvme_detach_controller nvme0 + + killprocess $spdk_tgt_pid +} + +function test_opal_cmds() { + $rpc_py bdev_nvme_attach_controller -b "nvme0" -t "pcie" -a ${bdf} + + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 1 -s 0 -l 1024 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 2 -s 1024 -l 512 -p test + $rpc_py bdev_opal_get_info -b nvme0n1r1 -p test + + $rpc_py bdev_opal_delete -b nvme0n1r1 -p test + $rpc_py bdev_opal_delete -b nvme0n1r2 -p test + + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 1 -s 0 -l 1024 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 2 -s 1024 -l 512 -p test + + $rpc_py bdev_opal_delete -b nvme0n1r2 -p test + $rpc_py bdev_opal_delete -b nvme0n1r1 -p test + + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 3 -s 4096 -l 4096 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 1 -s 0 -l 1024 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 2 -s 1024 -l 512 -p test + + $rpc_py bdev_opal_new_user -b nvme0n1r3 -p test -i 3 -u tester3 + $rpc_py bdev_opal_get_info -b nvme0n1r3 -p test + $rpc_py bdev_opal_set_lock_state -b nvme0n1r3 -i 3 -p tester3 -l readonly + $rpc_py bdev_opal_get_info -b nvme0n1r3 -p test + $rpc_py bdev_opal_set_lock_state -b nvme0n1r1 -i 0 -p test -l rwlock + + $rpc_py bdev_opal_delete -b nvme0n1r2 -p test + $rpc_py bdev_opal_delete -b nvme0n1r3 -p test + $rpc_py bdev_opal_delete -b nvme0n1r1 -p test + + $rpc_py bdev_nvme_detach_controller nvme0 +} + +function setup_test_environment() { + $rpc_py bdev_nvme_attach_controller -b "nvme0" -t "pcie" -a ${bdf} + + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 1 -s 0 -l 1024 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 2 -s 1024 -l 512 -p test + $rpc_py bdev_opal_create -b nvme0 -n 1 -i 3 -s 4096 -l 4096 -p test + + $rpc_py bdev_opal_new_user -b nvme0n1r1 -p test -i 1 -u tester1 + $rpc_py bdev_opal_set_lock_state -b nvme0n1r1 -i 1 -p tester1 -l readwrite + $rpc_py bdev_opal_new_user -b nvme0n1r3 -p test -i 3 -u tester3 + $rpc_py bdev_opal_set_lock_state -b nvme0n1r3 -i 3 -p tester3 -l readwrite + + $rpc_py bdev_opal_set_lock_state -b nvme0n1r2 -i 0 -p test -l readwrite +} + +function clean_up() { + $rpc_py bdev_opal_delete -b nvme0n1r1 -p test + $rpc_py bdev_opal_delete -b nvme0n1r2 -p test + $rpc_py bdev_opal_delete -b nvme0n1r3 -p test +} + +function revert() { + $rpc_py bdev_nvme_opal_revert -b nvme0 -p test +} + +function opal_spdk_tgt() { + $SPDK_BIN_DIR/spdk_tgt & + spdk_tgt_pid=$! + trap 'killprocess $spdk_tgt_pid; exit 1' SIGINT SIGTERM EXIT + waitforlisten $spdk_tgt_pid + test_opal_cmds + killprocess $spdk_tgt_pid +} + +function opal_bdevio() { + $rootdir/test/bdev/bdevio/bdevio -w & + bdevio_pid=$! + trap 'killprocess $bdevio_pid; exit 1' SIGINT SIGTERM EXIT + waitforlisten $bdevio_pid + setup_test_environment + $rootdir/test/bdev/bdevio/tests.py perform_tests + clean_up + $rpc_py bdev_nvme_detach_controller nvme0 + trap - SIGINT SIGTERM EXIT + killprocess $bdevio_pid +} + +function opal_bdevperf() { + $rootdir/test/bdev/bdevperf/bdevperf -z -q 8 -o 4096 -w verify -t 10 & + bdevperf_pid=$! + trap 'revert; killprocess $bdevperf_pid; exit 1' SIGINT SIGTERM EXIT + waitforlisten $bdevperf_pid + setup_test_environment + $rootdir/test/bdev/bdevperf/bdevperf.py perform_tests + clean_up + revert + $rpc_py bdev_nvme_detach_controller nvme0 + trap - SIGINT SIGTERM EXIT + killprocess $bdevperf_pid +} + +opal_revert_and_init + +run_test "nvme_opal_spdk_tgt" opal_spdk_tgt +run_test "nvme_opal_bdevio" opal_bdevio +run_test "nvme_opal_bdevperf" opal_bdevperf diff --git a/src/spdk/test/nvme/nvme_rpc.sh b/src/spdk/test/nvme/nvme_rpc.sh new file mode 100755 index 000000000..da7cf50d3 --- /dev/null +++ b/src/spdk/test/nvme/nvme_rpc.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +rpc_py=$rootdir/scripts/rpc.py + +bdf=$(get_first_nvme_bdf) + +$SPDK_BIN_DIR/spdk_tgt -m 0x3 & +spdk_tgt_pid=$! +trap 'kill -9 ${spdk_tgt_pid}; exit 1' SIGINT SIGTERM EXIT + +waitforlisten $spdk_tgt_pid + +$rpc_py bdev_nvme_attach_controller -b Nvme0 -t PCIe -a ${bdf} + +# 1) Test bdev_nvme_apply_firmware RPC +# NOTE: We don't want to do real firmware update on CI + +# Make sure that used firmware file doesn't exist +if [ -f non_existing_file ]; then + exit 1 +fi + +# a) Try to apply firmware from non existing file +$rpc_py bdev_nvme_apply_firmware non_existing_file Nvme0n1 || rv=$? +if [ -z "$rv" ]; then + exit 1 +fi + +$rpc_py bdev_nvme_detach_controller Nvme0 + +trap - SIGINT SIGTERM EXIT +killprocess $spdk_tgt_pid diff --git a/src/spdk/test/nvme/overhead/.gitignore b/src/spdk/test/nvme/overhead/.gitignore new file mode 100644 index 000000000..d5a7d6f41 --- /dev/null +++ b/src/spdk/test/nvme/overhead/.gitignore @@ -0,0 +1 @@ +overhead diff --git a/src/spdk/test/nvme/overhead/Makefile b/src/spdk/test/nvme/overhead/Makefile new file mode 100644 index 000000000..1d050d96d --- /dev/null +++ b/src/spdk/test/nvme/overhead/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = overhead + +ifeq ($(OS),Linux) +SYS_LIBS += -laio +CFLAGS += -DHAVE_LIBAIO +endif + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/overhead/README b/src/spdk/test/nvme/overhead/README new file mode 100644 index 000000000..b88c42176 --- /dev/null +++ b/src/spdk/test/nvme/overhead/README @@ -0,0 +1,24 @@ +This application measures the software overhead of I/O submission +and completion for both the SPDK NVMe driver and an AIO file handle. +It runs a random read, queue depth = 1 workload to a single device, +and captures TSC as follows: + +* Submission: capture TSC before and after the I/O submission + call (SPDK or AIO). +* Completion: capture TSC before and after the I/O completion + check. Only record the TSC delta if the I/O completion check + resulted in a completed I/O. Also use heuristics in the AIO + case to account for time spent in interrupt handling outside + of the actual I/O completion check. + +Usage: + +To test software overhead for a 4KB I/O over a 10 second period: + +SPDK: overhead -s 4096 -t 10 +AIO: overhead -s 4096 -t 10 /dev/nvme0n1 + +Note that for the SPDK case, it will only use the first namespace +on the first controller found by SPDK. If a different namespace is +desired, attach controllers individually to the kernel NVMe driver +to ensure they will not be enumerated by SPDK. diff --git a/src/spdk/test/nvme/overhead/overhead.c b/src/spdk/test/nvme/overhead/overhead.c new file mode 100644 index 000000000..553f1a545 --- /dev/null +++ b/src/spdk/test/nvme/overhead/overhead.c @@ -0,0 +1,730 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/barrier.h" +#include "spdk/fd.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/nvme_intel.h" +#include "spdk/histogram_data.h" + +#if HAVE_LIBAIO +#include <libaio.h> +#endif + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +enum entry_type { + ENTRY_TYPE_NVME_NS, + ENTRY_TYPE_AIO_FILE, +}; + +struct ns_entry { + enum entry_type type; + + union { + struct { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + } nvme; +#if HAVE_LIBAIO + struct { + int fd; + struct io_event *events; + io_context_t ctx; + } aio; +#endif + } u; + + uint32_t io_size_blocks; + uint64_t size_in_ios; + bool is_draining; + uint32_t current_queue_depth; + char name[1024]; + struct ns_entry *next; + + struct spdk_histogram_data *submit_histogram; + struct spdk_histogram_data *complete_histogram; +}; + +struct perf_task { + void *buf; + uint64_t submit_tsc; +#if HAVE_LIBAIO + struct iocb iocb; +#endif +}; + +static bool g_enable_histogram = false; + +static struct ctrlr_entry *g_ctrlr = NULL; +static struct ns_entry *g_ns = NULL; + +static uint64_t g_tsc_rate; + +static uint32_t g_io_size_bytes; +static int g_time_in_sec; + +static int g_aio_optind; /* Index of first AIO filename in argv */ + +struct perf_task *g_task; +uint64_t g_tsc_submit = 0; +uint64_t g_tsc_submit_min = UINT64_MAX; +uint64_t g_tsc_submit_max = 0; +uint64_t g_tsc_complete = 0; +uint64_t g_tsc_complete_min = UINT64_MAX; +uint64_t g_tsc_complete_max = 0; +uint64_t g_io_completed = 0; + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n", + cdata->mn, cdata->sn, + spdk_nvme_ns_get_id(ns)); + return; + } + + if (spdk_nvme_ns_get_size(ns) < g_io_size_bytes || + spdk_nvme_ns_get_sector_size(ns) > g_io_size_bytes) { + printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid " + "ns size %" PRIu64 " / block size %u for I/O size %u\n", + cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes); + return; + } + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + entry->type = ENTRY_TYPE_NVME_NS; + entry->u.nvme.ctrlr = ctrlr; + entry->u.nvme.ns = ns; + + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / + g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + entry->submit_histogram = spdk_histogram_data_alloc(); + entry->complete_histogram = spdk_histogram_data_alloc(); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->next = g_ns; + g_ns = entry; +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + int num_ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + + entry->next = g_ctrlr; + g_ctrlr = entry; + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + /* Only register the first namespace. */ + if (num_ns < 1) { + fprintf(stderr, "controller found with no namespaces\n"); + return; + } + + register_ns(ctrlr, spdk_nvme_ctrlr_get_ns(ctrlr, 1)); +} + +#if HAVE_LIBAIO +static int +register_aio_file(const char *path) +{ + struct ns_entry *entry; + + int fd; + uint64_t size; + uint32_t blklen; + + fd = open(path, O_RDWR | O_DIRECT); + if (fd < 0) { + fprintf(stderr, "Could not open AIO device %s: %s\n", path, strerror(errno)); + return -1; + } + + size = spdk_fd_get_size(fd); + if (size == 0) { + fprintf(stderr, "Could not determine size of AIO device %s\n", path); + close(fd); + return -1; + } + + blklen = spdk_fd_get_blocklen(fd); + if (blklen == 0) { + fprintf(stderr, "Could not determine block size of AIO device %s\n", path); + close(fd); + return -1; + } + + entry = calloc(1, sizeof(struct ns_entry)); + if (entry == NULL) { + close(fd); + perror("aio ns_entry malloc"); + return -1; + } + + entry->type = ENTRY_TYPE_AIO_FILE; + entry->u.aio.fd = fd; + entry->size_in_ios = size / g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / blklen; + entry->submit_histogram = spdk_histogram_data_alloc(); + entry->complete_histogram = spdk_histogram_data_alloc(); + + snprintf(entry->name, sizeof(entry->name), "%s", path); + + g_ns = entry; + + return 0; +} + +static int +aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, void *buf, + unsigned long nbytes, uint64_t offset, void *cb_ctx) +{ + iocb->aio_fildes = fd; + iocb->aio_reqprio = 0; + iocb->aio_lio_opcode = cmd; + iocb->u.c.buf = buf; + iocb->u.c.nbytes = nbytes; + iocb->u.c.offset = offset; + iocb->data = cb_ctx; + + if (io_submit(aio_ctx, 1, &iocb) < 0) { + printf("io_submit"); + return -1; + } + + return 0; +} + +static void +aio_check_io(void) +{ + int count, i; + struct timespec timeout; + + timeout.tv_sec = 0; + timeout.tv_nsec = 0; + + count = io_getevents(g_ns->u.aio.ctx, 1, 1, g_ns->u.aio.events, &timeout); + if (count < 0) { + fprintf(stderr, "io_getevents error\n"); + exit(1); + } + + for (i = 0; i < count; i++) { + g_ns->current_queue_depth--; + } +} +#endif /* HAVE_LIBAIO */ + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static __thread unsigned int seed = 0; + +static void +submit_single_io(void) +{ + uint64_t offset_in_ios; + uint64_t start; + int rc; + struct ns_entry *entry = g_ns; + uint64_t tsc_submit; + + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + + start = spdk_get_ticks(); + spdk_rmb(); +#if HAVE_LIBAIO + if (entry->type == ENTRY_TYPE_AIO_FILE) { + rc = aio_submit(g_ns->u.aio.ctx, &g_task->iocb, entry->u.aio.fd, IO_CMD_PREAD, g_task->buf, + g_io_size_bytes, offset_in_ios * g_io_size_bytes, g_task); + } else +#endif + { + rc = spdk_nvme_ns_cmd_read(entry->u.nvme.ns, g_ns->u.nvme.qpair, g_task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, g_task, 0); + } + + spdk_rmb(); + tsc_submit = spdk_get_ticks() - start; + g_tsc_submit += tsc_submit; + if (tsc_submit < g_tsc_submit_min) { + g_tsc_submit_min = tsc_submit; + } + if (tsc_submit > g_tsc_submit_max) { + g_tsc_submit_max = tsc_submit; + } + if (g_enable_histogram) { + spdk_histogram_data_tally(entry->submit_histogram, tsc_submit); + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } else { + g_ns->current_queue_depth++; + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + g_ns->current_queue_depth--; +} + +uint64_t g_complete_tsc_start; + +static uint64_t +check_io(void) +{ + uint64_t end, tsc_complete; + + spdk_rmb(); +#if HAVE_LIBAIO + if (g_ns->type == ENTRY_TYPE_AIO_FILE) { + aio_check_io(); + } else +#endif + { + spdk_nvme_qpair_process_completions(g_ns->u.nvme.qpair, 0); + } + spdk_rmb(); + end = spdk_get_ticks(); + if (g_ns->current_queue_depth == 1) { + /* + * Account for race condition in AIO case where interrupt occurs + * after checking for queue depth. If the timestamp capture + * is too big compared to the last capture, assume that an + * interrupt fired, and do not bump the start tsc forward. This + * will ensure this extra time is accounted for next time through + * when we see current_queue_depth drop to 0. + */ + if (g_ns->type == ENTRY_TYPE_NVME_NS || (end - g_complete_tsc_start) < 500) { + g_complete_tsc_start = end; + } + } else { + tsc_complete = end - g_complete_tsc_start; + g_tsc_complete += tsc_complete; + if (tsc_complete < g_tsc_complete_min) { + g_tsc_complete_min = tsc_complete; + } + if (tsc_complete > g_tsc_complete_max) { + g_tsc_complete_max = tsc_complete; + } + if (g_enable_histogram) { + spdk_histogram_data_tally(g_ns->complete_histogram, tsc_complete); + } + g_io_completed++; + if (!g_ns->is_draining) { + submit_single_io(); + } + end = g_complete_tsc_start = spdk_get_ticks(); + } + + return end; +} + +static void +drain_io(void) +{ + g_ns->is_draining = true; + while (g_ns->current_queue_depth > 0) { + check_io(); + } +} + +static int +init_ns_worker_ctx(void) +{ + if (g_ns->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + g_ns->u.aio.events = calloc(1, sizeof(struct io_event)); + if (!g_ns->u.aio.events) { + return -1; + } + g_ns->u.aio.ctx = 0; + if (io_setup(1, &g_ns->u.aio.ctx) < 0) { + free(g_ns->u.aio.events); + perror("io_setup"); + return -1; + } +#endif + } else { + /* + * TODO: If a controller has multiple namespaces, they could all use the same queue. + * For now, give each namespace/thread combination its own queue. + */ + g_ns->u.nvme.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_ns->u.nvme.ctrlr, NULL, 0); + if (!g_ns->u.nvme.qpair) { + printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n"); + return -1; + } + } + + return 0; +} + +static void +cleanup_ns_worker_ctx(void) +{ + if (g_ns->type == ENTRY_TYPE_AIO_FILE) { +#ifdef HAVE_LIBAIO + io_destroy(g_ns->u.aio.ctx); + free(g_ns->u.aio.events); +#endif + } else { + spdk_nvme_ctrlr_free_io_qpair(g_ns->u.nvme.qpair); + } +} + +static int +work_fn(void) +{ + uint64_t tsc_end, current; + + /* Allocate a queue pair for each namespace. */ + if (init_ns_worker_ctx() != 0) { + printf("ERROR: init_ns_worker_ctx() failed\n"); + return 1; + } + + tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + + /* Submit initial I/O for each namespace. */ + submit_single_io(); + g_complete_tsc_start = spdk_get_ticks(); + + while (1) { + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + current = check_io(); + + if (current > tsc_end) { + break; + } + } + + drain_io(); + cleanup_ns_worker_ctx(); + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); +#if HAVE_LIBAIO + printf(" [AIO device(s)]..."); +#endif + printf("\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-t time in seconds]\n"); + printf("\t\t(default: 1)]\n"); + printf("\t[-H enable histograms]\n"); +} + +static void +print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, + uint64_t total, uint64_t so_far) +{ + double so_far_pct; + + if (count == 0) { + return; + } + + so_far_pct = (double)so_far * 100 / total; + + printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", + (double)start * 1000 * 1000 / g_tsc_rate, + (double)end * 1000 * 1000 / g_tsc_rate, + so_far_pct, count); +} + +static void +print_stats(void) +{ + double divisor = (double)g_tsc_rate / (1000 * 1000 * 1000); + + printf("submit (in ns) avg, min, max = %8.1f, %8.1f, %8.1f\n", + (double)g_tsc_submit / g_io_completed / divisor, + (double)g_tsc_submit_min / divisor, + (double)g_tsc_submit_max / divisor); + printf("complete (in ns) avg, min, max = %8.1f, %8.1f, %8.1f\n", + (double)g_tsc_complete / g_io_completed / divisor, + (double)g_tsc_complete_min / divisor, + (double)g_tsc_complete_max / divisor); + + if (!g_enable_histogram) { + return; + } + + printf("\n"); + printf("Submit histogram\n"); + printf("================\n"); + printf(" Range in us Cumulative Count\n"); + spdk_histogram_data_iterate(g_ns->submit_histogram, print_bucket, NULL); + printf("\n"); + + printf("Complete histogram\n"); + printf("==================\n"); + printf(" Range in us Cumulative Count\n"); + spdk_histogram_data_iterate(g_ns->complete_histogram, print_bucket, NULL); + printf("\n"); + +} + +static int +parse_args(int argc, char **argv) +{ + int op; + long int val; + + /* default value */ + g_io_size_bytes = 0; + g_time_in_sec = 0; + + while ((op = getopt(argc, argv, "hs:t:H")) != -1) { + switch (op) { + case 'h': + usage(argv[0]); + exit(0); + break; + case 's': + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Invalid io size\n"); + return val; + } + g_io_size_bytes = (uint32_t)val; + break; + case 't': + g_time_in_sec = spdk_strtol(optarg, 10); + if (g_time_in_sec < 0) { + fprintf(stderr, "Invalid run time\n"); + return g_time_in_sec; + } + break; + case 'H': + g_enable_histogram = true; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (!g_io_size_bytes) { + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + g_aio_optind = optind; + + return 0; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + static uint32_t ctrlr_found = 0; + + if (ctrlr_found == 1) { + fprintf(stderr, "only attaching to one controller, so skipping\n"); + fprintf(stderr, " controller at PCI address %s\n", + trid->traddr); + return false; + } + ctrlr_found = 1; + + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attached to %s\n", trid->traddr); + + register_ctrlr(ctrlr); +} + +static int +register_controllers(void) +{ + printf("Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + if (g_ns == NULL) { + fprintf(stderr, "no NVMe controller found - check that device is bound to uio/vfio\n"); + return 1; + } + + return 0; +} + +static void +cleanup(void) +{ + struct ns_entry *ns_entry = g_ns; + struct ctrlr_entry *ctrlr_entry = g_ctrlr; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + + spdk_histogram_data_free(ns_entry->submit_histogram); + spdk_histogram_data_free(ns_entry->complete_histogram); + free(ns_entry); + ns_entry = next; + } + + while (ctrlr_entry) { + struct ctrlr_entry *next = ctrlr_entry->next; + + spdk_nvme_detach(ctrlr_entry->ctrlr); + free(ctrlr_entry); + ctrlr_entry = next; + } +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "overhead"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + g_task = spdk_zmalloc(sizeof(struct perf_task), 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (g_task == NULL) { + fprintf(stderr, "g_task alloc failed\n"); + exit(1); + } + + g_task->buf = spdk_zmalloc(g_io_size_bytes, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (g_task->buf == NULL) { + fprintf(stderr, "g_task->buf spdk_zmalloc failed\n"); + exit(1); + } + + g_tsc_rate = spdk_get_ticks_hz(); + +#if HAVE_LIBAIO + if (g_aio_optind < argc) { + printf("Measuring overhead for AIO device %s.\n", argv[g_aio_optind]); + if (register_aio_file(argv[g_aio_optind]) != 0) { + cleanup(); + return -1; + } + } else +#endif + { + if (register_controllers() != 0) { + cleanup(); + return -1; + } + } + + printf("Initialization complete. Launching workers.\n"); + + rc = work_fn(); + + print_stats(); + + cleanup(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/test/nvme/perf/README.md b/src/spdk/test/nvme/perf/README.md new file mode 100644 index 000000000..3e0b4aa30 --- /dev/null +++ b/src/spdk/test/nvme/perf/README.md @@ -0,0 +1,103 @@ +# Automated script for NVMe performance test + +## Compile SPDK with LTO + +The link time optimization (lto) gcc flag allows the linker to run a post-link optimization pass on the code. During that pass the linker inlines thin wrappers such as those around DPDK calls which results in a shallow call stack and significantly improves performance. Therefore, we recommend compiling SPDK with the lto flag prior to running this benchmark script to archieve optimal performance. +Link time optimization can be enabled in SPDK by doing the following: + +~{.sh} +./configure --enable-lto +~ + +## Configuration + +Test is configured by using command-line options. + +### Available options + +#### -h, --help + +Prints available commands and help. + +#### --run-time + +Tell fio to terminate processing after the specified period of time. Value in seconds. + +#### --ramp-time + +Fio will run the specified workload for this amount of time before logging any performance numbers. +Value in seconds. + +#### --fio-bin + +Path to fio binary. + +#### --driver + +Select between SPDK driver and kernel driver. The Linux Kernel driver has three configurations: +Default mode, Hybrid Polling and Classic Polling. The SPDK driver supports 2 fio_plugin modes: bdev and NVMe PMD. Before running test with spdk, you will need to bind NVMe devics to the Linux uio_pci_generic or vfio-pci driver. When running test with the Kernel driver, NVMe devices use the Kernel driver. The 5 valid values for this option are: +'bdev', 'nvme', 'kernel-libaio', 'kernel-classic-polling' and 'kernel-hybrid-polling'. + +#### --max-disk + +This option will run multiple fio jobs with varying number of NVMe devices. First it will start with +max-disk number of devices then decrease number of disk by two until there are no more devices. +If set to 'all' then max-disk number will be set to all available devices. +Only one of the max-disk or disk-no option can be used. + +#### --disk-no + +This option will run fio job on specified number of NVMe devices. If set to 'all' then max-disk number +will be set to all available devices. Only one of the max-disk or disk-no option can be used. + +#### --cpu-allowed + +Specifies the CPU cores that will be used by fio to execute the performance test cases. When spdk driver is chosen, Nthe script attempts to assign NVMe devices to CPU cores on the same NUMA node. The script will try to align each core with devices matching +core's NUMA first but if the is no devices left within the CPU core NUMA then it will use devices from the other +NUMA node. It is important to choose cores that will ensure best NUMA node alignment. For example: +On System with 8 devices on NUMA node 0 and 8 devices on NUMA node 1, cores 0-27 on numa node 0 and 28-55 +on numa node 1, if test is set to use 16 disk and four cores then "--cpu-allowed=1,2,28,29" can be used +resulting with 4 devices with node0 per core 1 and 2 and 4 devices with node1 per core 28 and 29. If 10 cores +are required then best option would be "--cpu-allowed=1,2,3,4,28,29,30,31,32,33" because cores 1-4 will be +aligned with 2 devices on numa0 per core and cores 28-33 will be aligned with 1 device on numa1 per core. +If kernel driver is chosen then for each job with NVME device, all cpu cores with corresponding NUMA node are picked. + +#### --rw + +Type of I/O pattern. Accepted values are: randrw, rw + +#### --rwmixread + +Percentage of a mixed workload that should be reads. + +#### --iodepth + +Number of I/O units to keep in flight against each file. + +#### --block-size + +The block size in bytes used for I/O units. + +#### --numjobs + +Create the specified number of clones of a job. + +#### --repeat-no + +Specifies how many times run each workload. End results are averages of these workloads + +#### --no-preconditioning + +By default disks are preconditioned before test using fio with parameters: size=100%, loops=2, bs=1M, w=write, +iodepth=32, ioengine=spdk. It can be skiped when this option is set. + +#### "--no-io-scaling" + +For SPDK fio plugin iodepth is multiplied by number of devices. When this option is set this multiplication will be disabled. + +## Results + +Results are stored in "results" folder. After each workload, to this folder are copied files with: +fio configuration file, json files with fio results and logs with latiencies with sampling interval 250 ms. +Number of copied files depends from number of repeats of each workload. Additionall csv file is created with averaged +results of all workloads. diff --git a/src/spdk/test/nvme/perf/common.sh b/src/spdk/test/nvme/perf/common.sh new file mode 100755 index 000000000..ddd01ec52 --- /dev/null +++ b/src/spdk/test/nvme/perf/common.sh @@ -0,0 +1,471 @@ +#!/usr/bin/env bash + +function discover_bdevs() { + local rootdir=$1 + local config_file=$2 + local cfg_type=$3 + local wait_for_spdk_bdev=${4:-30} + local rpc_server=/var/tmp/spdk-discover-bdevs.sock + + if [ ! -e $config_file ]; then + echo "Invalid Configuration File: $config_file" + return 1 + fi + + if [ -z $cfg_type ]; then + cfg_type="-c" + fi + + # Start the bdev service to query for the list of available + # bdevs. + $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 \ + $cfg_type $config_file &> /dev/null & + stubpid=$! + while ! [ -e /var/run/spdk_bdev0 ]; do + # If this counter drops to zero, errexit will be caught to abort the test + ((wait_for_spdk_bdev--)) + sleep 1 + done + + # Get all of the bdevs + $rootdir/scripts/rpc.py -s "$rpc_server" bdev_get_bdevs + + # Shut down the bdev service + kill $stubpid + wait $stubpid + rm -f /var/run/spdk_bdev0 +} + +function create_spdk_bdev_conf() { + local output + local disk_cfg + local bdev_io_cache_size=$1 + local bdev_io_pool_size=$2 + local bdev_json_cfg=() + local bdev_opts=() + + disk_cfg=($(grep -vP "^\s*#" "$DISKCFG")) + + if [[ -n "$bdev_io_cache_size" ]]; then + bdev_opts+=("\"bdev_io_cache_size\": $bdev_io_cache_size") + fi + + if [[ -n "$bdev_io_pool_size" ]]; then + bdev_opts+=("\"bdev_io_pool_size\": $bdev_io_pool_size") + fi + + local IFS="," + if [[ ${#bdev_opts[@]} -gt 0 ]]; then + bdev_json_cfg+=("$( + cat <<- JSON + { + "method": "bdev_set_options", + "params": { + ${bdev_opts[*]} + } + } + JSON + )") + fi + + for i in "${!disk_cfg[@]}"; do + bdev_json_cfg+=("$( + cat <<- JSON + { + "method": "bdev_nvme_attach_controller", + "params": { + "trtype": "PCIe", + "name":"Nvme${i}", + "traddr":"${disk_cfg[i]}" + } + } + JSON + )") + done + + local IFS="," + jq -r '.' <<- JSON > $testdir/bdev.conf + { + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + ${bdev_json_cfg[*]} + ] + } + ] + } + JSON +} + +function is_bdf_not_mounted() { + local bdf=$1 + local blkname + local mountpoints + blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}') + mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w) + return $mountpoints +} + +function get_cores() { + local cpu_list="$1" + for cpu in ${cpu_list//,/ }; do + echo $cpu + done +} + +function get_cores_numa_node() { + local cores=$1 + for core in $cores; do + lscpu -p=cpu,node | grep "^$core\b" | awk -F ',' '{print $2}' + done +} + +function get_numa_node() { + local plugin=$1 + local disks=$2 + if [[ "$plugin" =~ "nvme" ]]; then + for bdf in $disks; do + local driver + driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') + # Use this check to ommit blacklisted devices ( not binded to driver with setup.sh script ) + if [ "$driver" = "vfio-pci" ] || [ "$driver" = "uio_pci_generic" ]; then + cat /sys/bus/pci/devices/$bdf/numa_node + fi + done + elif [[ "$plugin" =~ "bdev" ]]; then + local bdevs + bdevs=$(discover_bdevs $rootdir $testdir/bdev.conf --json) + for name in $disks; do + local bdev_bdf + bdev_bdf=$(jq -r ".[] | select(.name==\"$name\").driver_specific.nvme.pci_address" <<< $bdevs) + cat /sys/bus/pci/devices/$bdev_bdf/numa_node + done + else + for name in $disks; do + local bdf + # Not reading directly from /sys/block/nvme* because of a kernel bug + # which results in NUMA 0 always getting reported. + bdf=$(cat /sys/block/$name/device/address) + cat /sys/bus/pci/devices/$bdf/numa_node + done + fi +} + +function get_disks() { + local plugin=$1 + local disk_cfg + + disk_cfg=($(grep -vP "^\s*#" "$DISKCFG")) + if [[ "$plugin" =~ "nvme" ]]; then + # PCI BDF address is enough for nvme-perf and nvme-fio-plugin, + # so just print them from configuration file + echo "${disk_cfg[*]}" + elif [[ "$plugin" =~ "bdev" ]]; then + # Generate NvmeXn1 bdev name configuration file for bdev-perf + # and bdev-fio-plugin + local bdevs + local disk_no + disk_no=${#disk_cfg[@]} + eval echo "Nvme{0..$((disk_no - 1))}n1" + else + # Find nvme block devices and only use the ones which + # are not mounted + for bdf in "${disk_cfg[@]}"; do + if is_bdf_not_mounted $bdf; then + local blkname + blkname=$(ls -l /sys/block/ | grep $bdf | awk '{print $9}') + echo $blkname + fi + done + fi +} + +function get_disks_on_numa() { + local devs=($1) + local numas=($2) + local numa_no=$3 + local disks_on_numa="" + local i + + for ((i = 0; i < ${#devs[@]}; i++)); do + if [ ${numas[$i]} = $numa_no ]; then + disks_on_numa=$((disks_on_numa + 1)) + fi + done + echo $disks_on_numa +} + +function create_fio_config() { + local disk_no=$1 + local plugin=$2 + local disks=($3) + local disks_numa=($4) + local cores=($5) + local total_disks=${#disks[@]} + local fio_job_section=() + local num_cores=${#cores[@]} + local disks_per_core=$((disk_no / num_cores)) + local disks_per_core_mod=$((disk_no % num_cores)) + local cores_numa + cores_numa=($(get_cores_numa_node "${cores[*]}")) + + # Following part of this function still leverages global variables a lot. + # It's a mix of local variables passed as aruments to function with global variables. This is messy. + # TODO: Modify this to be consistent with how variables are used here. Aim for using only + # local variables to get rid of globals as much as possible. + desc="\"Test io_plugin=$PLUGIN Blocksize=${BLK_SIZE} Workload=$RW MIX=${MIX} qd=${IODEPTH}\"" + cp "$testdir/config.fio.tmp" "$testdir/config.fio" + cat <<- EOF >> $testdir/config.fio + description=$desc + + rw=$RW + rwmixread=$MIX + bs=$BLK_SIZE + runtime=$RUNTIME + ramp_time=$RAMP_TIME + numjobs=$NUMJOBS + log_avg_msec=$SAMPLING_INT + EOF + + if $GTOD_REDUCE; then + echo "gtod_reduce=1" >> $testdir/config.fio + fi + + for i in "${!cores[@]}"; do + local m=0 #Counter of disks per NUMA node + local n=0 #Counter of all disks in test + core_numa=${cores_numa[$i]} + + total_disks_per_core=$disks_per_core + # Check how many "stray" disks are unassigned to CPU cores + # Assign one disk to current CPU core and substract it from the total of + # unassigned disks + if [[ "$disks_per_core_mod" -gt "0" ]]; then + total_disks_per_core=$((disks_per_core + 1)) + disks_per_core_mod=$((disks_per_core_mod - 1)) + fi + # SPDK fio plugin supports submitting/completing I/Os to multiple SSDs from a single thread. + # Therefore, the per thread queue depth is set to the desired IODEPTH/device X the number of devices per thread. + QD=$IODEPTH + if [[ "$NOIOSCALING" = false ]]; then + QD=$((IODEPTH * total_disks_per_core)) + fi + + fio_job_section+=("") + fio_job_section+=("[filename${i}]") + fio_job_section+=("iodepth=$QD") + fio_job_section+=("cpus_allowed=${cores[$i]} #CPU NUMA Node ${cores_numa[$i]}") + + while [[ "$m" -lt "$total_disks_per_core" ]]; do + # Try to add disks to job section if it's NUMA node matches NUMA + # for currently selected CPU + if [[ "${disks_numa[$n]}" == "$core_numa" ]]; then + if [[ "$plugin" == "spdk-plugin-nvme" ]]; then + fio_job_section+=("filename=trtype=PCIe traddr=${disks[$n]//:/.} ns=1 #NVMe NUMA Node ${disks_numa[$n]}") + elif [[ "$plugin" == "spdk-plugin-bdev" ]]; then + fio_job_section+=("filename=${disks[$n]} #NVMe NUMA Node ${disks_numa[$n]}") + elif [[ "$plugin" =~ "kernel" ]]; then + fio_job_section+=("filename=/dev/${disks[$n]} #NVMe NUMA Node ${disks_numa[$n]}") + fi + m=$((m + 1)) + + #Mark numa of n'th disk as "x" to mark it as claimed for next loop iterations + disks_numa[$n]="x" + fi + n=$((n + 1)) + + # If there is no more disks with numa node same as cpu numa node, switch to + # other numa node, go back to start of loop and try again. + if [[ $n -ge $total_disks ]]; then + echo "WARNING! Cannot assign any more NVMes for CPU ${cores[$i]}" + echo "NVMe assignment for this CPU will be cross-NUMA." + if [[ "$core_numa" == "1" ]]; then + core_numa=0 + else + core_numa=1 + fi + n=0 + fi + done + done + + printf "%s\n" "${fio_job_section[@]}" >> $testdir/config.fio + echo "INFO: Generated fio configuration file:" + cat $testdir/config.fio +} + +function preconditioning() { + local dev_name="" + local filename="" + local nvme_list + + HUGEMEM=8192 $rootdir/scripts/setup.sh + cp $testdir/config.fio.tmp $testdir/config.fio + echo "[Preconditioning]" >> $testdir/config.fio + + # Generate filename argument for FIO. + # We only want to target NVMes not bound to nvme driver. + # If they're still bound to nvme that means they were skipped by + # setup.sh on purpose. + nvme_list=$(get_disks nvme) + for nvme in $nvme_list; do + dev_name='trtype=PCIe traddr='${nvme//:/.}' ns=1' + filename+=$(printf %s":" "$dev_name") + done + echo "** Preconditioning disks, this can take a while, depending on the size of disks." + run_spdk_nvme_fio "spdk-plugin-nvme" --filename="$filename" --size=100% --loops=2 --bs=1M \ + --rw=write --iodepth=32 --output-format=normal + rm -f $testdir/config.fio +} + +function get_results() { + local reads_pct + local writes_pct + + reads_pct=$(bc -l <<< "scale=3; $2/100") + writes_pct=$(bc -l <<< "scale=3; 1-$reads_pct") + case "$1" in + iops) + iops=$(jq -r '.jobs[] | .read.iops + .write.iops' $TMP_RESULT_FILE) + iops=${iops%.*} + echo $iops + ;; + mean_lat_usec) + mean_lat=$(jq -r ".jobs[] | (.read.lat_ns.mean * $reads_pct + .write.lat_ns.mean * $writes_pct)" $TMP_RESULT_FILE) + mean_lat=${mean_lat%.*} + echo $((mean_lat / 1000)) + ;; + p99_lat_usec) + p99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.000000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.000000\" // 0 * $writes_pct)" $TMP_RESULT_FILE) + p99_lat=${p99_lat%.*} + echo $((p99_lat / 1000)) + ;; + p99_99_lat_usec) + p99_99_lat=$(jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.990000\" // 0 * $reads_pct + .write.clat_ns.percentile.\"99.990000\" // 0 * $writes_pct)" $TMP_RESULT_FILE) + p99_99_lat=${p99_99_lat%.*} + echo $((p99_99_lat / 1000)) + ;; + stdev_usec) + stdev=$(jq -r ".jobs[] | (.read.clat_ns.stddev * $reads_pct + .write.clat_ns.stddev * $writes_pct)" $TMP_RESULT_FILE) + stdev=${stdev%.*} + echo $((stdev / 1000)) + ;; + mean_slat_usec) + mean_slat=$(jq -r ".jobs[] | (.read.slat_ns.mean * $reads_pct + .write.slat_ns.mean * $writes_pct)" $TMP_RESULT_FILE) + mean_slat=${mean_slat%.*} + echo $((mean_slat / 1000)) + ;; + mean_clat_usec) + mean_clat=$(jq -r ".jobs[] | (.read.clat_ns.mean * $reads_pct + .write.clat_ns.mean * $writes_pct)" $TMP_RESULT_FILE) + mean_clat=${mean_clat%.*} + echo $((mean_clat / 1000)) + ;; + bw_Kibs) + bw=$(jq -r ".jobs[] | (.read.bw + .write.bw)" $TMP_RESULT_FILE) + bw=${bw%.*} + echo $((bw)) + ;; + esac +} + +function get_bdevperf_results() { + case "$1" in + iops) + iops=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $2}') + iops=${iops%.*} + echo $iops + ;; + bw_Kibs) + bw_MBs=$(grep Total $TMP_RESULT_FILE | awk -F 'Total' '{print $2}' | awk '{print $4}') + bw_MBs=${bw_MBs%.*} + echo $((bw_MBs * 1024)) + ;; + esac +} + +function get_nvmeperf_results() { + local iops + local bw_MBs + local mean_lat_usec + local max_lat_usec + local min_lat_usec + + read -r iops bw_MBs mean_lat_usec min_lat_usec max_lat_usec <<< $(tr -s " " < $TMP_RESULT_FILE | grep -oP "(?<=Total : )(.*+)") + + # We need to get rid of the decimal spaces due + # to use of arithmetic expressions instead of "bc" for calculations + iops=${iops%.*} + bw_MBs=${bw_MBs%.*} + mean_lat_usec=${mean_lat_usec%.*} + min_lat_usec=${min_lat_usec%.*} + max_lat_usec=${max_lat_usec%.*} + + echo "$iops $(bc <<< "$bw_MBs * 1024") $mean_lat_usec $min_lat_usec $max_lat_usec" +} + +function run_spdk_nvme_fio() { + local plugin=$1 + echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting." + if [[ "$plugin" = "spdk-plugin-nvme" ]]; then + LD_PRELOAD=$plugin_dir/spdk_nvme $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk + elif [[ "$plugin" = "spdk-plugin-bdev" ]]; then + LD_PRELOAD=$plugin_dir/spdk_bdev $FIO_BIN $testdir/config.fio --output-format=json "${@:2}" --ioengine=spdk_bdev --spdk_json_conf=$testdir/bdev.conf --spdk_mem=4096 + fi + + sleep 1 +} + +function run_nvme_fio() { + echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting." + $FIO_BIN $testdir/config.fio --output-format=json "$@" + sleep 1 +} + +function run_bdevperf() { + echo "** Running bdevperf test, this can take a while, depending on the run-time setting." + $bdevperf_dir/bdevperf --json $testdir/bdev.conf -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -m "[$CPUS_ALLOWED]" -r /var/tmp/spdk.sock + sleep 1 +} + +function run_nvmeperf() { + # Prepare -r argument string for nvme perf command + local r_opt + local disks + + # Limit the number of disks to $1 if needed + disks=($(get_disks nvme)) + disks=("${disks[@]:0:$1}") + r_opt=$(printf -- ' -r "trtype:PCIe traddr:%s"' "${disks[@]}") + + echo "** Running nvme perf test, this can take a while, depending on the run-time setting." + + # Run command in separate shell as this solves quoting issues related to r_opt var + $SHELL -c "$nvmeperf_dir/perf $r_opt -q $IODEPTH -o $BLK_SIZE -w $RW -M $MIX -t $RUNTIME -c [$CPUS_ALLOWED]" + sleep 1 +} + +function wait_for_nvme_reload() { + local nvmes=$1 + + shopt -s extglob + for disk in $nvmes; do + cmd="ls /sys/block/$disk/queue/*@(iostats|rq_affinity|nomerges|io_poll_delay)*" + until $cmd 2> /dev/null; do + echo "Waiting for full nvme driver reload..." + sleep 0.5 + done + done + shopt -q extglob +} + +function verify_disk_number() { + # Check if we have appropriate number of disks to carry out the test + disks=($(get_disks $PLUGIN)) + if [[ $DISKNO == "ALL" ]] || [[ $DISKNO == "all" ]]; then + DISKNO=${#disks[@]} + elif [[ $DISKNO -gt ${#disks[@]} ]] || [[ ! $DISKNO =~ ^[0-9]+$ ]]; then + echo "error: Required devices number ($DISKNO) is not a valid number or it's larger than the number of devices found (${#disks[@]})" + false + fi +} diff --git a/src/spdk/test/nvme/perf/config.fio.tmp b/src/spdk/test/nvme/perf/config.fio.tmp new file mode 100644 index 000000000..dfaea5df5 --- /dev/null +++ b/src/spdk/test/nvme/perf/config.fio.tmp @@ -0,0 +1,6 @@ +[global] +direct=1 +thread=1 +norandommap=1 +group_reporting=1 +time_based=1 diff --git a/src/spdk/test/nvme/perf/run_perf.sh b/src/spdk/test/nvme/perf/run_perf.sh new file mode 100755 index 000000000..133aaa75c --- /dev/null +++ b/src/spdk/test/nvme/perf/run_perf.sh @@ -0,0 +1,374 @@ +#!/usr/bin/env bash +set -e + +# Dir variables and sourcing common files +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +plugin_dir=$rootdir/build/fio +bdevperf_dir=$rootdir/test/bdev/bdevperf +nvmeperf_dir=$rootdir/build/examples +source $testdir/common.sh +source $rootdir/scripts/common.sh || exit 1 +source $rootdir/test/common/autotest_common.sh + +# Global & default variables +declare -A KERNEL_ENGINES +KERNEL_ENGINES=( + ["kernel-libaio"]="--ioengine=libaio" + ["kernel-classic-polling"]="--ioengine=pvsync2 --hipri=100" + ["kernel-hybrid-polling"]="--ioengine=pvsync2 --hipri=100" + ["kernel-io-uring"]="--ioengine=io_uring") + +RW=randrw +MIX=100 +IODEPTH=256 +BLK_SIZE=4096 +RUNTIME=600 +RAMP_TIME=30 +NUMJOBS=1 +REPEAT_NO=3 +GTOD_REDUCE=false +SAMPLING_INT=0 +FIO_BIN=$CONFIG_FIO_SOURCE_DIR/fio +TMP_RESULT_FILE=$testdir/result.json +PLUGIN="nvme" +DISKCFG="" +BDEV_CACHE="" +BDEV_POOL="" +DISKNO="ALL" +CPUS_ALLOWED=1 +NOIOSCALING=false +PRECONDITIONING=true +CPUFREQ="" +PERFTOP=false +DPDKMEM=false +DATE="$(date +'%m_%d_%Y_%H%M%S')" + +function usage() { + set +x + [[ -n $2 ]] && ( + echo "$2" + echo "" + ) + echo "Run NVMe PMD/BDEV performance test. Change options for easier debug and setup configuration" + echo "Usage: $(basename $1) [options]" + echo "-h, --help Print help and exit" + echo + echo "Workload parameters:" + echo " --rw=STR Type of I/O pattern. Accepted values are randrw,rw. [default=$RW]" + echo " --rwmixread=INT Percentage of a mixed workload that should be reads. [default=$MIX]" + echo " --iodepth=INT Number of I/Os to keep in flight against the file. [default=$IODEPTH]" + echo " --block-size=INT The block size in bytes used for I/O units. [default=$BLK_SIZE]" + echo " --run-time=TIME[s] Tell fio to run the workload for the specified period of time. [default=$RUNTIME]" + echo " --ramp-time=TIME[s] Fio will run the specified workload for this amount of time before" + echo " logging any performance numbers. [default=$RAMP_TIME]. Applicable only for fio-based tests." + echo " --numjobs=INT Create the specified number of clones of this job. [default=$NUMJOBS]" + echo " Applicable only for fio-based tests." + echo " --repeat-no=INT How many times to repeat workload test. [default=$REPEAT_NO]" + echo " Test result will be an average of repeated test runs." + echo " --gtod-reduce Enable fio gtod_reduce option. [default=$GTOD_REDUCE]" + echo " --sampling-int=INT Value for fio log_avg_msec parameters [default=$SAMPLING_INT]" + echo " --fio-bin=PATH Path to fio binary. [default=$FIO_BIN]" + echo " Applicable only for fio-based tests." + echo + echo "Test setup parameters:" + echo " --driver=STR Selects tool used for testing. Choices available:" + echo " - spdk-perf-nvme (SPDK nvme perf)" + echo " - spdk-perf-bdev (SPDK bdev perf)" + echo " - spdk-plugin-nvme (SPDK nvme fio plugin)" + echo " - spdk-plugin-bdev (SPDK bdev fio plugin)" + echo " - kernel-classic-polling" + echo " - kernel-hybrid-polling" + echo " - kernel-libaio" + echo " - kernel-io-uring" + echo " --disk-config Configuration file containing PCI BDF addresses of NVMe disks to use in test." + echo " It consists a single column of PCI addresses. SPDK Bdev names will be assigned" + echo " and Kernel block device names detected." + echo " Lines starting with # are ignored as comments." + echo " --bdev-io-cache-size Set IO cache size for for SPDK bdev subsystem." + echo " --bdev-io-pool-size Set IO pool size for for SPDK bdev subsystem." + echo " --max-disk=INT,ALL Number of disks to test on, this will run multiple workloads with increasing number of disk each run." + echo " If =ALL then test on all found disk. [default=$DISKNO]" + echo " --cpu-allowed=INT/PATH Comma-separated list of CPU cores used to run the workload. Ranges allowed." + echo " Can also point to a file containing list of CPUs. [default=$CPUS_ALLOWED]" + echo " --no-preconditioning Skip preconditioning" + echo " --no-io-scaling Do not scale iodepth for each device in SPDK fio plugin. [default=$NOIOSCALING]" + echo " --cpu-frequency=INT Run tests with CPUs set to a desired frequency. 'intel_pstate=disable' must be set in" + echo " GRUB options. You can use 'cpupower frequency-info' and 'cpupower frequency-set' to" + echo " check list of available frequencies. Example: --cpu-frequency=1100000." + echo + echo "Other options:" + echo " --perftop Run perftop measurements on the same CPU cores as specified in --cpu-allowed option." + echo " --dpdk-mem-stats Dump DPDK memory stats during the test." + set -x +} + +while getopts 'h-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) + usage $0 + exit 0 + ;; + rw=*) RW="${OPTARG#*=}" ;; + rwmixread=*) MIX="${OPTARG#*=}" ;; + iodepth=*) IODEPTH="${OPTARG#*=}" ;; + block-size=*) BLK_SIZE="${OPTARG#*=}" ;; + run-time=*) RUNTIME="${OPTARG#*=}" ;; + ramp-time=*) RAMP_TIME="${OPTARG#*=}" ;; + numjobs=*) NUMJOBS="${OPTARG#*=}" ;; + repeat-no=*) REPEAT_NO="${OPTARG#*=}" ;; + gtod-reduce) GTOD_REDUCE=true ;; + sampling-int=*) SAMPLING_INT="${OPTARG#*=}" ;; + fio-bin=*) FIO_BIN="${OPTARG#*=}" ;; + driver=*) PLUGIN="${OPTARG#*=}" ;; + disk-config=*) + DISKCFG="${OPTARG#*=}" + if [[ ! -f "$DISKCFG" ]]; then + echo "Disk confiuration file $DISKCFG does not exist!" + exit 1 + fi + ;; + bdev-io-cache-size=*) BDEV_CACHE="${OPTARG#*=}" ;; + bdev-io-pool-size=*) BDEV_POOL="${OPTARG#*=}" ;; + max-disk=*) DISKNO="${OPTARG#*=}" ;; + cpu-allowed=*) + CPUS_ALLOWED="${OPTARG#*=}" + if [[ -f "$CPUS_ALLOWED" ]]; then + CPUS_ALLOWED=$(cat "$CPUS_ALLOWED") + fi + ;; + no-preconditioning) PRECONDITIONING=false ;; + no-io-scaling) NOIOSCALING=true ;; + cpu-frequency=*) CPUFREQ="${OPTARG#*=}" ;; + perftop) PERFTOP=true ;; + dpdk-mem-stats) DPDKMEM=true ;; + *) + usage $0 echo "Invalid argument '$OPTARG'" + exit 1 + ;; + esac + ;; + h) + usage $0 + exit 0 + ;; + *) + usage $0 "Invalid argument '$optchar'" + exit 1 + ;; + esac +done + +result_dir=$testdir/results/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE} +result_file=$result_dir/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.csv +mkdir -p $result_dir +unset iops_disks bw mean_lat_disks_usec p99_lat_disks_usec p99_99_lat_disks_usec stdev_disks_usec +echo "run-time,ramp-time,fio-plugin,QD,block-size,num-cpu-cores,workload,workload-mix" > $result_file +printf "%s,%s,%s,%s,%s,%s,%s,%s\n" $RUNTIME $RAMP_TIME $PLUGIN $IODEPTH $BLK_SIZE $NO_CORES $RW $MIX >> $result_file +echo "num_of_disks,iops,avg_lat[usec],p99[usec],p99.99[usec],stdev[usec],avg_slat[usec],avg_clat[usec],bw[Kib/s]" >> $result_file + +trap 'rm -f *.state $testdir/bdev.conf; kill $perf_pid; wait $dpdk_mem_pid; print_backtrace' ERR SIGTERM SIGABRT + +if [[ "$PLUGIN" =~ "bdev" ]]; then + create_spdk_bdev_conf "$BDEV_CACHE" "$BDEV_POOL" +fi +verify_disk_number +DISK_NAMES=$(get_disks $PLUGIN) +DISKS_NUMA=$(get_numa_node $PLUGIN "$DISK_NAMES") +CORES=$(get_cores "$CPUS_ALLOWED") +NO_CORES_ARRAY=($CORES) +NO_CORES=${#NO_CORES_ARRAY[@]} + +if $PRECONDITIONING; then + preconditioning +fi + +if [[ "$PLUGIN" =~ "kernel" ]]; then + $rootdir/scripts/setup.sh reset + fio_ioengine_opt="${KERNEL_ENGINES[$PLUGIN]}" + + if [[ $PLUGIN = "kernel-classic-polling" ]]; then + for disk in $DISK_NAMES; do + echo -1 > /sys/block/$disk/queue/io_poll_delay + done + elif [[ $PLUGIN = "kernel-hybrid-polling" ]]; then + for disk in $DISK_NAMES; do + echo 0 > /sys/block/$disk/queue/io_poll_delay + done + elif [[ $PLUGIN = "kernel-io-uring" ]]; then + modprobe -rv nvme + modprobe nvme poll_queues=8 + wait_for_nvme_reload $DISK_NAMES + + backup_dir="/tmp/nvme_param_bak" + mkdir -p $backup_dir + + for disk in $DISK_NAMES; do + echo "INFO: Backing up device parameters for $disk" + sysfs=/sys/block/$disk/queue + mkdir -p $backup_dir/$disk + cat $sysfs/iostats > $backup_dir/$disk/iostats + cat $sysfs/rq_affinity > $backup_dir/$disk/rq_affinity + cat $sysfs/nomerges > $backup_dir/$disk/nomerges + cat $sysfs/io_poll_delay > $backup_dir/$disk/io_poll_delay + done + + for disk in $DISK_NAMES; do + echo "INFO: Setting device parameters for $disk" + sysfs=/sys/block/$disk/queue + echo 0 > $sysfs/iostats + echo 0 > $sysfs/rq_affinity + echo 2 > $sysfs/nomerges + echo 0 > $sysfs/io_poll_delay + done + fi +fi + +if [[ -n "$CPUFREQ" ]]; then + if [[ ! "$(cat /proc/cmdline)" =~ "intel_pstate=disable" ]]; then + echo "ERROR: Cannot set custom CPU frequency for test. intel_pstate=disable not in boot options." + false + else + cpu_governor="$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)" + cpupower frequency-set -g userspace + cpupower frequency-set -f $CPUFREQ + fi +fi + +if $PERFTOP; then + echo "INFO: starting perf record on cores $CPUS_ALLOWED" + perf record -C $CPUS_ALLOWED -o "$testdir/perf.data" & + perf_pid=$! +fi + +if $DPDKMEM; then + echo "INFO: waiting to generate DPDK memory usage" + wait_time=$((RUNTIME / 2)) + if [[ ! "$PLUGIN" =~ "perf" ]]; then + wait_time=$((wait_time + RAMP_TIME)) + fi + ( + sleep $wait_time + echo "INFO: generating DPDK memory usage" + $rootdir/scripts/rpc.py env_dpdk_get_mem_stats + ) & + dpdk_mem_pid=$! +fi + +#Run each workolad $REPEAT_NO times +for ((j = 0; j < REPEAT_NO; j++)); do + if [ $PLUGIN = "spdk-perf-bdev" ]; then + run_bdevperf > $TMP_RESULT_FILE + iops_disks=$((iops_disks + $(get_bdevperf_results iops))) + bw=$((bw + $(get_bdevperf_results bw_Kibs))) + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output + elif [ $PLUGIN = "spdk-perf-nvme" ]; then + run_nvmeperf $DISKNO > $TMP_RESULT_FILE + read -r iops bandwidth mean_lat min_lat max_lat <<< $(get_nvmeperf_results) + + iops_disks=$((iops_disks + iops)) + bw=$((bw + bandwidth)) + mean_lat_disks_usec=$((mean_lat_disks_usec + mean_lat)) + min_lat_disks_usec=$((min_lat_disks_usec + min_lat)) + max_lat_disks_usec=$((max_lat_disks_usec + max_lat)) + + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output + else + create_fio_config $DISKNO $PLUGIN "$DISK_NAMES" "$DISKS_NUMA" "$CORES" + + if [[ "$PLUGIN" =~ "spdk-plugin" ]]; then + run_spdk_nvme_fio $PLUGIN "--output=$TMP_RESULT_FILE" \ + "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" + else + run_nvme_fio $fio_ioengine_opt "--output=$TMP_RESULT_FILE" \ + "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" + fi + + #Store values for every number of used disks + #Use recalculated value for mixread param in case rw mode is not rw. + rwmixread=$MIX + if [[ $RW = *"read"* ]]; then + rwmixread=100 + elif [[ $RW = *"write"* ]]; then + rwmixread=0 + fi + iops_disks=$((iops_disks + $(get_results iops $rwmixread))) + mean_lat_disks_usec=$((mean_lat_disks_usec + $(get_results mean_lat_usec $rwmixread))) + p99_lat_disks_usec=$((p99_lat_disks_usec + $(get_results p99_lat_usec $rwmixread))) + p99_99_lat_disks_usec=$((p99_99_lat_disks_usec + $(get_results p99_99_lat_usec $rwmixread))) + stdev_disks_usec=$((stdev_disks_usec + $(get_results stdev_usec $rwmixread))) + + mean_slat_disks_usec=$((mean_slat_disks_usec + $(get_results mean_slat_usec $rwmixread))) + mean_clat_disks_usec=$((mean_clat_disks_usec + $(get_results mean_clat_usec $rwmixread))) + bw=$((bw + $(get_results bw_Kibs $rwmixread))) + + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.json + cp $testdir/config.fio $result_dir/config_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.fio + rm -f $testdir/config.fio + fi +done + +if $PERFTOP; then + echo "INFO: Stopping perftop measurements." + kill $perf_pid + wait $perf_pid || true + perf report -i "$testdir/perf.data" > $result_dir/perftop_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt + rm -f "$testdir/perf.data" +fi + +if $DPDKMEM; then + mv "/tmp/spdk_mem_dump.txt" $result_dir/spdk_mem_dump_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt + echo "INFO: DPDK memory usage saved in $result_dir" +fi + +#Write results to csv file +iops_disks=$((iops_disks / REPEAT_NO)) +bw=$((bw / REPEAT_NO)) +if [[ "$PLUGIN" =~ "plugin" ]]; then + mean_lat_disks_usec=$((mean_lat_disks_usec / REPEAT_NO)) + p99_lat_disks_usec=$((p99_lat_disks_usec / REPEAT_NO)) + p99_99_lat_disks_usec=$((p99_99_lat_disks_usec / REPEAT_NO)) + stdev_disks_usec=$((stdev_disks_usec / REPEAT_NO)) + mean_slat_disks_usec=$((mean_slat_disks_usec / REPEAT_NO)) + mean_clat_disks_usec=$((mean_clat_disks_usec / REPEAT_NO)) +elif [[ "$PLUGIN" == "spdk-perf-bdev" ]]; then + mean_lat_disks_usec=0 + p99_lat_disks_usec=0 + p99_99_lat_disks_usec=0 + stdev_disks_usec=0 + mean_slat_disks_usec=0 + mean_clat_disks_usec=0 +elif [[ "$PLUGIN" == "spdk-perf-nvme" ]]; then + mean_lat_disks_usec=$((mean_lat_disks_usec / REPEAT_NO)) + p99_lat_disks_usec=0 + p99_99_lat_disks_usec=0 + stdev_disks_usec=0 + mean_slat_disks_usec=0 + mean_clat_disks_usec=0 +fi + +printf "%s,%s,%s,%s,%s,%s,%s,%s,%s\n" ${DISKNO} ${iops_disks} ${mean_lat_disks_usec} ${p99_lat_disks_usec} \ + ${p99_99_lat_disks_usec} ${stdev_disks_usec} ${mean_slat_disks_usec} ${mean_clat_disks_usec} ${bw} >> $result_file + +if [[ -n "$CPUFREQ" ]]; then + cpupower frequency-set -g $cpu_governor +fi + +if [ $PLUGIN = "kernel-io-uring" ]; then + # Reload the nvme driver so that other test runs are not affected + modprobe -rv nvme + modprobe nvme + wait_for_nvme_reload $DISK_NAMES + + for disk in $DISK_NAMES; do + echo "INFO: Restoring device parameters for $disk" + sysfs=/sys/block/$disk/queue + cat $backup_dir/$disk/iostats > $sysfs/iostats + cat $backup_dir/$disk/rq_affinity > $sysfs/rq_affinity + cat $backup_dir/$disk/nomerges > $sysfs/nomerges + cat $backup_dir/$disk/io_poll_delay > $sysfs/io_poll_delay + done +fi +rm -f $testdir/bdev.conf $testdir/config.fio diff --git a/src/spdk/test/nvme/reserve/.gitignore b/src/spdk/test/nvme/reserve/.gitignore new file mode 100644 index 000000000..c58b368cf --- /dev/null +++ b/src/spdk/test/nvme/reserve/.gitignore @@ -0,0 +1 @@ +reserve diff --git a/src/spdk/test/nvme/reserve/Makefile b/src/spdk/test/nvme/reserve/Makefile new file mode 100644 index 000000000..a3e62138b --- /dev/null +++ b/src/spdk/test/nvme/reserve/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = reserve + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/reserve/reserve.c b/src/spdk/test/nvme/reserve/reserve.c new file mode 100644 index 000000000..9bb9230cf --- /dev/null +++ b/src/spdk/test/nvme/reserve/reserve.c @@ -0,0 +1,457 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/log.h" + +#define MAX_DEVS 64 + +struct dev { + struct spdk_pci_addr pci_addr; + struct spdk_nvme_ctrlr *ctrlr; + char name[100]; +}; + +static struct dev g_devs[MAX_DEVS]; +static int g_num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = g_devs; iter - g_devs < g_num_devs; iter++) + +static int g_outstanding_commands; +static int g_reserve_command_result; +static bool g_feat_host_id_successful; + +#define HOST_ID 0xABABABABCDCDCDCD +#define EXT_HOST_ID ((uint8_t[]){0x0f, 0x97, 0xcd, 0x74, 0x8c, 0x80, 0x41, 0x42, \ + 0x99, 0x0f, 0x65, 0xc4, 0xf0, 0x39, 0x24, 0x20}) + +#define CR_KEY 0xDEADBEAF5A5A5A5B + +static void +feat_host_id_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + fprintf(stdout, "Get/Set Features - Host Identifier failed\n"); + g_feat_host_id_successful = false; + } else { + g_feat_host_id_successful = true; + } + g_outstanding_commands--; +} + +static int +get_host_identifier(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + uint8_t host_id[16]; + uint32_t host_id_size; + uint32_t cdw11; + + if (spdk_nvme_ctrlr_get_data(ctrlr)->ctratt.host_id_exhid_supported) { + host_id_size = 16; + cdw11 = 1; + printf("Using 128-bit extended host identifier\n"); + } else { + host_id_size = 8; + cdw11 = 0; + printf("Using 64-bit host identifier\n"); + } + + g_outstanding_commands = 0; + ret = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_HOST_IDENTIFIER, cdw11, host_id, + host_id_size, + feat_host_id_completion, NULL); + if (ret) { + fprintf(stdout, "Get Feature: Failed\n"); + return -1; + } + + g_outstanding_commands++; + g_feat_host_id_successful = false; + + while (g_outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (g_feat_host_id_successful) { + spdk_log_dump(stdout, "Get Feature: Host Identifier:", host_id, host_id_size); + return 0; + } + + return -1; +} + +static int +set_host_identifier(struct spdk_nvme_ctrlr *ctrlr) +{ + int ret; + uint8_t host_id[16] = {}; + uint32_t host_id_size; + uint32_t cdw11; + + if (spdk_nvme_ctrlr_get_data(ctrlr)->ctratt.host_id_exhid_supported) { + host_id_size = 16; + cdw11 = 1; + printf("Using 128-bit extended host identifier\n"); + memcpy(host_id, EXT_HOST_ID, host_id_size); + } else { + host_id_size = 8; + cdw11 = 0; + to_be64(host_id, HOST_ID); + printf("Using 64-bit host identifier\n"); + } + + g_outstanding_commands = 0; + ret = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_HOST_IDENTIFIER, cdw11, 0, host_id, + host_id_size, feat_host_id_completion, NULL); + if (ret) { + fprintf(stdout, "Set Feature: Failed\n"); + return -1; + } + + g_outstanding_commands++; + g_feat_host_id_successful = false; + + while (g_outstanding_commands) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + } + + if (g_feat_host_id_successful) { + spdk_log_dump(stdout, "Set Feature: Host Identifier:", host_id, host_id_size); + return 0; + } + + fprintf(stderr, "Set Feature: Host Identifier Failed\n"); + return -1; +} + +static void +reservation_ns_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + g_reserve_command_result = -1; + } else { + g_reserve_command_result = 0; + } + + g_outstanding_commands--; +} + +static int +reservation_ns_register(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + uint32_t ns_id, bool reg) +{ + int ret; + struct spdk_nvme_reservation_register_data rr_data; + enum spdk_nvme_reservation_register_action action; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + if (reg) { + rr_data.crkey = 0; + rr_data.nrkey = CR_KEY; + action = SPDK_NVME_RESERVE_REGISTER_KEY; + } else { + rr_data.crkey = CR_KEY; + rr_data.nrkey = 0; + action = SPDK_NVME_RESERVE_UNREGISTER_KEY; + } + + g_outstanding_commands = 0; + g_reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_register(ns, qpair, &rr_data, true, + action, + SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Register Failed\n"); + return -1; + } + + g_outstanding_commands++; + while (g_outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (g_reserve_command_result) { + fprintf(stderr, "Reservation Register Failed\n"); + return -1; + } + + return 0; +} + +static int +reservation_ns_report(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret, i; + uint8_t *payload; + struct spdk_nvme_reservation_status_data *status; + struct spdk_nvme_registered_ctrlr_data *cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + + g_outstanding_commands = 0; + g_reserve_command_result = -1; + + payload = spdk_dma_zmalloc(0x1000, 0x1000, NULL); + if (!payload) { + fprintf(stderr, "DMA Buffer Allocation Failed\n"); + return -1; + } + + ret = spdk_nvme_ns_cmd_reservation_report(ns, qpair, payload, 0x1000, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return -1; + } + + g_outstanding_commands++; + while (g_outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (g_reserve_command_result) { + fprintf(stderr, "Reservation Report Failed\n"); + spdk_dma_free(payload); + return -1; + } + + status = (struct spdk_nvme_reservation_status_data *)payload; + fprintf(stdout, "Reservation Generation Counter %u\n", status->gen); + fprintf(stdout, "Reservation type %u\n", status->rtype); + fprintf(stdout, "Reservation Number of Registered Controllers %u\n", status->regctl); + fprintf(stdout, "Reservation Persist Through Power Loss State %u\n", status->ptpls); + for (i = 0; i < status->regctl; i++) { + cdata = (struct spdk_nvme_registered_ctrlr_data *)(payload + + sizeof(struct spdk_nvme_reservation_status_data) + + sizeof(struct spdk_nvme_registered_ctrlr_data) * i); + fprintf(stdout, "Controller ID %u\n", cdata->cntlid); + fprintf(stdout, "Controller Reservation Status %u\n", cdata->rcsts.status); + fprintf(stdout, "Controller Host ID 0x%"PRIx64"\n", cdata->hostid); + fprintf(stdout, "Controller Reservation Key 0x%"PRIx64"\n", cdata->rkey); + } + + spdk_dma_free(payload); + return 0; +} + +static int +reservation_ns_acquire(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_acquire_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + cdata.prkey = 0; + + g_outstanding_commands = 0; + g_reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_acquire(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_ACQUIRE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Acquire Failed\n"); + return -1; + } + + g_outstanding_commands++; + while (g_outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (g_reserve_command_result) { + fprintf(stderr, "Reservation Acquire Failed\n"); + return -1; + } + + return 0; +} + +static int +reservation_ns_release(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint32_t ns_id) +{ + int ret; + struct spdk_nvme_reservation_key_data cdata; + struct spdk_nvme_ns *ns; + + ns = spdk_nvme_ctrlr_get_ns(ctrlr, ns_id); + cdata.crkey = CR_KEY; + + g_outstanding_commands = 0; + g_reserve_command_result = -1; + + ret = spdk_nvme_ns_cmd_reservation_release(ns, qpair, &cdata, + false, + SPDK_NVME_RESERVE_RELEASE, + SPDK_NVME_RESERVE_WRITE_EXCLUSIVE, + reservation_ns_completion, NULL); + if (ret) { + fprintf(stderr, "Reservation Release Failed\n"); + return -1; + } + + g_outstanding_commands++; + while (g_outstanding_commands) { + spdk_nvme_qpair_process_completions(qpair, 100); + } + + if (g_reserve_command_result) { + fprintf(stderr, "Reservation Release Failed\n"); + return -1; + } + + return 0; +} + +static int +reserve_controller(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, + const struct spdk_pci_addr *pci_addr) +{ + const struct spdk_nvme_ctrlr_data *cdata; + int ret; + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + printf("=====================================================\n"); + printf("NVMe Controller at PCI bus %d, device %d, function %d\n", + pci_addr->bus, pci_addr->dev, pci_addr->func); + printf("=====================================================\n"); + + printf("Reservations: %s\n", + cdata->oncs.reservations ? "Supported" : "Not Supported"); + + if (!cdata->oncs.reservations) { + return 0; + } + + ret = set_host_identifier(ctrlr); + if (ret) { + return ret; + } + + ret = get_host_identifier(ctrlr); + if (ret) { + return ret; + } + + /* tested 1 namespace */ + ret += reservation_ns_register(ctrlr, qpair, 1, 1); + ret += reservation_ns_acquire(ctrlr, qpair, 1); + ret += reservation_ns_release(ctrlr, qpair, 1); + ret += reservation_ns_register(ctrlr, qpair, 1, 0); + ret += reservation_ns_report(ctrlr, qpair, 1); + + return ret; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &g_devs[g_num_devs++]; + spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); + dev->ctrlr = ctrlr; +} + +int main(int argc, char **argv) +{ + struct dev *iter; + int i; + struct spdk_env_opts opts; + int ret = 0; + + spdk_env_opts_init(&opts); + opts.name = "reserve"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + foreach_dev(iter) { + struct spdk_nvme_qpair *qpair; + + qpair = spdk_nvme_ctrlr_alloc_io_qpair(iter->ctrlr, NULL, 0); + if (!qpair) { + fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + ret = 1; + } else { + ret = reserve_controller(iter->ctrlr, qpair, &iter->pci_addr); + } + + if (ret) { + break; + } + } + + printf("Reservation test %s\n", ret ? "failed" : "passed"); + + for (i = 0; i < g_num_devs; i++) { + struct dev *dev = &g_devs[i]; + spdk_nvme_detach(dev->ctrlr); + } + + return ret; +} diff --git a/src/spdk/test/nvme/reset/.gitignore b/src/spdk/test/nvme/reset/.gitignore new file mode 100644 index 000000000..a16781b1b --- /dev/null +++ b/src/spdk/test/nvme/reset/.gitignore @@ -0,0 +1 @@ +reset diff --git a/src/spdk/test/nvme/reset/Makefile b/src/spdk/test/nvme/reset/Makefile new file mode 100644 index 000000000..dd1774bcd --- /dev/null +++ b/src/spdk/test/nvme/reset/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = reset + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/reset/reset.c b/src/spdk/test/nvme/reset/reset.c new file mode 100644 index 000000000..70d44db39 --- /dev/null +++ b/src/spdk/test/nvme/reset/reset.c @@ -0,0 +1,716 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/pci_ids.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ns *ns; + struct spdk_nvme_ctrlr *ctrlr; + struct ns_entry *next; + uint32_t io_size_blocks; + uint64_t size_in_ios; + char name[1024]; +}; + +struct ns_worker_ctx { + struct ns_entry *entry; + struct spdk_nvme_qpair *qpair; + uint64_t io_completed; + uint64_t io_completed_error; + uint64_t io_submitted; + uint64_t current_queue_depth; + uint64_t offset_in_ios; + bool is_draining; + + struct ns_worker_ctx *next; +}; + +struct reset_task { + struct ns_worker_ctx *ns_ctx; + void *buf; +}; + +struct worker_thread { + struct ns_worker_ctx *ns_ctx; + unsigned lcore; +}; + +static struct spdk_mempool *task_pool; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static int g_num_namespaces = 0; +static struct worker_thread *g_workers = NULL; +static bool g_qemu_ssd_found = false; + +static uint64_t g_tsc_rate; + +static int g_io_size_bytes; +static int g_rw_percentage; +static int g_is_random; +static int g_queue_depth; +static int g_time_in_sec; + +#define TASK_POOL_NUM 8192 + +static void +register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) +{ + struct ns_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + if (!spdk_nvme_ns_is_active(ns)) { + printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns)); + return; + } + + entry = malloc(sizeof(struct ns_entry)); + if (entry == NULL) { + perror("ns_entry malloc"); + exit(1); + } + + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + entry->ns = ns; + entry->ctrlr = ctrlr; + entry->size_in_ios = spdk_nvme_ns_get_size(ns) / + g_io_size_bytes; + entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns); + + snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + g_num_namespaces++; + entry->next = g_namespaces; + g_namespaces = entry; +} + +static void +register_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + int nsid, num_ns; + struct spdk_nvme_ns *ns; + struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry)); + + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; + + num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + for (nsid = 1; nsid <= num_ns; nsid++) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + register_ns(ctrlr, ns); + } +} + +static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); + +static __thread unsigned int seed = 0; + +static void +submit_single_io(struct ns_worker_ctx *ns_ctx) +{ + struct reset_task *task = NULL; + uint64_t offset_in_ios; + int rc; + struct ns_entry *entry = ns_ctx->entry; + + task = spdk_mempool_get(task_pool); + if (!task) { + fprintf(stderr, "Failed to get task from task_pool\n"); + exit(1); + } + + task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!task->buf) { + spdk_free(task->buf); + fprintf(stderr, "task->buf spdk_zmalloc failed\n"); + exit(1); + } + + task->ns_ctx = ns_ctx; + task->ns_ctx->io_submitted++; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % entry->size_in_ios; + } else { + offset_in_ios = ns_ctx->offset_in_ios++; + if (ns_ctx->offset_in_ios == entry->size_in_ios) { + ns_ctx->offset_in_ios = 0; + } + } + + if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { + rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } else { + rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf, + offset_in_ios * entry->io_size_blocks, + entry->io_size_blocks, io_complete, task, 0); + } + + if (rc != 0) { + fprintf(stderr, "starting I/O failed\n"); + } else { + ns_ctx->current_queue_depth++; + } +} + +static void +task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion) +{ + struct ns_worker_ctx *ns_ctx; + + ns_ctx = task->ns_ctx; + ns_ctx->current_queue_depth--; + + if (spdk_nvme_cpl_is_error(completion)) { + ns_ctx->io_completed_error++; + } else { + ns_ctx->io_completed++; + } + + spdk_free(task->buf); + spdk_mempool_put(task_pool, task); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!ns_ctx->is_draining) { + submit_single_io(ns_ctx); + } +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *completion) +{ + task_complete((struct reset_task *)ctx, completion); +} + +static void +check_io(struct ns_worker_ctx *ns_ctx) +{ + spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0); +} + +static void +submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth) +{ + while (queue_depth-- > 0) { + submit_single_io(ns_ctx); + } +} + +static void +drain_io(struct ns_worker_ctx *ns_ctx) +{ + ns_ctx->is_draining = true; + while (ns_ctx->current_queue_depth > 0) { + check_io(ns_ctx); + } +} + +static int +work_fn(void *arg) +{ + uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; + struct worker_thread *worker = (struct worker_thread *)arg; + struct ns_worker_ctx *ns_ctx = NULL; + bool did_reset = false; + + printf("Starting thread on core %u\n", worker->lcore); + + /* Submit initial I/O for each namespace. */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, NULL, 0); + if (ns_ctx->qpair == NULL) { + fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore); + return -1; + } + submit_io(ns_ctx, g_queue_depth); + ns_ctx = ns_ctx->next; + } + + while (1) { + if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) { + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) { + fprintf(stderr, "nvme reset failed.\n"); + return -1; + } + ns_ctx = ns_ctx->next; + } + did_reset = true; + } + + /* + * Check for completed I/O for each controller. A new + * I/O will be submitted in the io_complete callback + * to replace each I/O that is completed. + */ + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + check_io(ns_ctx); + ns_ctx = ns_ctx->next; + } + + if (spdk_get_ticks() > tsc_end) { + break; + } + } + + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + drain_io(ns_ctx); + spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair); + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void usage(char *program_name) +{ + printf("%s options", program_name); + printf("\n"); + printf("\t[-q io depth]\n"); + printf("\t[-s io size in bytes]\n"); + printf("\t[-w io pattern type, must be one of\n"); + printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n"); + printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n"); + printf("\t[-t time in seconds(should be larger than 15 seconds)]\n"); + printf("\t[-m max completions per poll]\n"); + printf("\t\t(default:0 - unlimited)\n"); +} + +static int +print_stats(void) +{ + uint64_t io_completed, io_submitted, io_completed_error; + uint64_t total_completed_io, total_submitted_io, total_completed_err_io; + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + total_completed_io = 0; + total_submitted_io = 0; + total_completed_err_io = 0; + + worker = g_workers; + ns_ctx = worker->ns_ctx; + while (ns_ctx) { + io_completed = ns_ctx->io_completed; + io_submitted = ns_ctx->io_submitted; + io_completed_error = ns_ctx->io_completed_error; + total_completed_io += io_completed; + total_submitted_io += io_submitted; + total_completed_err_io += io_completed_error; + ns_ctx = ns_ctx->next; + } + + printf("========================================================\n"); + printf("%16lu IO completed successfully\n", total_completed_io); + printf("%16lu IO completed with error\n", total_completed_err_io); + printf("--------------------------------------------------------\n"); + printf("%16lu IO completed total\n", total_completed_io + total_completed_err_io); + printf("%16lu IO submitted\n", total_submitted_io); + + if (total_submitted_io != (total_completed_io + total_completed_err_io)) { + fprintf(stderr, "Some IO are missing......\n"); + return -1; + } + + return 0; +} + +static int +parse_args(int argc, char **argv) +{ + const char *workload_type; + int op; + bool mix_specified = false; + long int val; + + /* default value */ + g_queue_depth = 0; + g_io_size_bytes = 0; + workload_type = NULL; + g_time_in_sec = 0; + g_rw_percentage = -1; + + while ((op = getopt(argc, argv, "m:q:s:t:w:M:")) != -1) { + if (op == 'w') { + workload_type = optarg; + } else if (op == '?') { + usage(argv[0]); + return -EINVAL; + } else { + val = spdk_strtol(optarg, 10); + if (val < 0) { + fprintf(stderr, "Converting a string to integer failed\n"); + return val; + } + switch (op) { + case 'q': + g_queue_depth = val; + break; + case 's': + g_io_size_bytes = val; + break; + case 't': + g_time_in_sec = val; + break; + case 'M': + g_rw_percentage = val; + mix_specified = true; + break; + default: + usage(argv[0]); + return -EINVAL; + } + } + } + + if (!g_queue_depth) { + usage(argv[0]); + return 1; + } + if (!g_io_size_bytes) { + usage(argv[0]); + return 1; + } + if (!workload_type) { + usage(argv[0]); + return 1; + } + if (!g_time_in_sec) { + usage(argv[0]); + return 1; + } + + if (strcmp(workload_type, "read") && + strcmp(workload_type, "write") && + strcmp(workload_type, "randread") && + strcmp(workload_type, "randwrite") && + strcmp(workload_type, "rw") && + strcmp(workload_type, "randrw")) { + fprintf(stderr, + "io pattern type must be one of\n" + "(read, write, randread, randwrite, rw, randrw)\n"); + return 1; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread")) { + g_rw_percentage = 100; + } + + if (!strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + g_rw_percentage = 0; + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "randread") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "randwrite")) { + if (mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(workload_type, "rw") || + !strcmp(workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + if (!strcmp(workload_type, "read") || + !strcmp(workload_type, "write") || + !strcmp(workload_type, "rw")) { + g_is_random = 0; + } else { + g_is_random = 1; + } + + return 0; +} + +static int +register_workers(void) +{ + struct worker_thread *worker; + + worker = malloc(sizeof(struct worker_thread)); + if (worker == NULL) { + perror("worker_thread malloc"); + return -1; + } + + memset(worker, 0, sizeof(struct worker_thread)); + worker->lcore = spdk_env_get_current_core(); + + g_workers = worker; + + return 0; +} + + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + opts->disable_error_logging = true; + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { + struct spdk_pci_device *dev = spdk_nvme_ctrlr_get_pci_device(ctrlr); + + /* QEMU emulated SSDs can't handle this test, so we will skip + * them. QEMU NVMe SSDs report themselves as VID == Intel. So we need + * to check this specific 0x5845 device ID to know whether it's QEMU + * or not. + */ + if (spdk_pci_device_get_vendor_id(dev) == SPDK_PCI_VID_INTEL && + spdk_pci_device_get_device_id(dev) == 0x5845) { + g_qemu_ssd_found = true; + printf("Skipping QEMU NVMe SSD at %s\n", trid->traddr); + return; + } + } + + register_ctrlr(ctrlr); +} + +static int +register_controllers(void) +{ + printf("Initializing NVMe Controllers\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + return 0; +} + +static void +unregister_controllers(void) +{ + struct ctrlr_entry *entry = g_controllers; + + while (entry) { + struct ctrlr_entry *next = entry->next; + spdk_nvme_detach(entry->ctrlr); + free(entry); + entry = next; + } +} + +static int +associate_workers_with_ns(void) +{ + struct ns_entry *entry = g_namespaces; + struct worker_thread *worker = g_workers; + struct ns_worker_ctx *ns_ctx; + int i, count; + + count = g_num_namespaces; + + for (i = 0; i < count; i++) { + if (entry == NULL) { + break; + } + ns_ctx = malloc(sizeof(struct ns_worker_ctx)); + if (!ns_ctx) { + return -1; + } + memset(ns_ctx, 0, sizeof(*ns_ctx)); + + printf("Associating %s with lcore %d\n", entry->name, worker->lcore); + ns_ctx->entry = entry; + ns_ctx->next = worker->ns_ctx; + worker->ns_ctx = ns_ctx; + + worker = g_workers; + + entry = entry->next; + if (entry == NULL) { + entry = g_namespaces; + } + } + + return 0; +} + +static int +run_nvme_reset_cycle(void) +{ + struct worker_thread *worker; + struct ns_worker_ctx *ns_ctx; + + if (work_fn(g_workers) != 0) { + return -1; + } + + if (print_stats() != 0) { + return -1; + } + + worker = g_workers; + ns_ctx = worker->ns_ctx; + while (ns_ctx != NULL) { + ns_ctx->io_completed = 0; + ns_ctx->io_completed_error = 0; + ns_ctx->io_submitted = 0; + ns_ctx->is_draining = false; + ns_ctx = ns_ctx->next; + } + + return 0; +} + +static void +spdk_reset_free_tasks(void) +{ + if (spdk_mempool_count(task_pool) != TASK_POOL_NUM) { + fprintf(stderr, "task_pool count is %zu but should be %d\n", + spdk_mempool_count(task_pool), TASK_POOL_NUM); + } + spdk_mempool_free(task_pool); +} + +int main(int argc, char **argv) +{ + int rc; + int i; + struct spdk_env_opts opts; + + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + spdk_env_opts_init(&opts); + opts.name = "reset"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + if (register_controllers() != 0) { + return 1; + } + + if (!g_controllers) { + printf("No NVMe controller found, %s exiting\n", argv[0]); + return g_qemu_ssd_found ? 0 : 1; + } + + task_pool = spdk_mempool_create("task_pool", TASK_POOL_NUM, + sizeof(struct reset_task), + 64, SPDK_ENV_SOCKET_ID_ANY); + if (!task_pool) { + fprintf(stderr, "Cannot create task pool\n"); + return 1; + } + + g_tsc_rate = spdk_get_ticks_hz(); + + if (register_workers() != 0) { + return 1; + } + + if (associate_workers_with_ns() != 0) { + rc = 1; + goto cleanup; + } + + printf("Initialization complete. Launching workers.\n"); + + for (i = 2; i >= 0; i--) { + rc = run_nvme_reset_cycle(); + if (rc != 0) { + goto cleanup; + } + } + +cleanup: + unregister_controllers(); + spdk_reset_free_tasks(); + + if (rc != 0) { + fprintf(stderr, "%s: errors occured\n", argv[0]); + } + + return rc; +} diff --git a/src/spdk/test/nvme/sgl/.gitignore b/src/spdk/test/nvme/sgl/.gitignore new file mode 100644 index 000000000..d1cebd688 --- /dev/null +++ b/src/spdk/test/nvme/sgl/.gitignore @@ -0,0 +1 @@ +sgl diff --git a/src/spdk/test/nvme/sgl/Makefile b/src/spdk/test/nvme/sgl/Makefile new file mode 100644 index 000000000..fe57e6147 --- /dev/null +++ b/src/spdk/test/nvme/sgl/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = sgl + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/sgl/sgl.c b/src/spdk/test/nvme/sgl/sgl.c new file mode 100644 index 000000000..09794681f --- /dev/null +++ b/src/spdk/test/nvme/sgl/sgl.c @@ -0,0 +1,545 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/util.h" + +#define MAX_DEVS 64 + +#define MAX_IOVS 128 + +#define DATA_PATTERN 0x5A + +#define BASE_LBA_START 0x100000 + +struct dev { + struct spdk_nvme_ctrlr *ctrlr; + char name[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +}; + +static struct dev devs[MAX_DEVS]; +static int num_devs = 0; + +#define foreach_dev(iter) \ + for (iter = devs; iter - devs < num_devs; iter++) + +static int io_complete_flag = 0; + +struct sgl_element { + void *base; + size_t offset; + size_t len; +}; + +struct io_request { + uint32_t current_iov_index; + uint32_t current_iov_bytes_left; + struct sgl_element iovs[MAX_IOVS]; + uint32_t nseg; + uint32_t misalign; +}; + +static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset) +{ + uint32_t i; + uint32_t offset = 0; + struct sgl_element *iov; + struct io_request *req = (struct io_request *)cb_arg; + + for (i = 0; i < req->nseg; i++) { + iov = &req->iovs[i]; + offset += iov->len; + if (offset > sgl_offset) { + break; + } + } + req->current_iov_index = i; + req->current_iov_bytes_left = offset - sgl_offset; + return; +} + +static int nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length) +{ + struct io_request *req = (struct io_request *)cb_arg; + struct sgl_element *iov; + + if (req->current_iov_index >= req->nseg) { + *length = 0; + *address = NULL; + return 0; + } + + iov = &req->iovs[req->current_iov_index]; + + if (req->current_iov_bytes_left) { + *address = iov->base + iov->offset + iov->len - req->current_iov_bytes_left; + *length = req->current_iov_bytes_left; + req->current_iov_bytes_left = 0; + } else { + *address = iov->base + iov->offset; + *length = iov->len; + } + + req->current_iov_index++; + + return 0; +} + +static void +io_complete(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + if (spdk_nvme_cpl_is_error(cpl)) { + io_complete_flag = 2; + } else { + io_complete_flag = 1; + } +} + +static void build_io_request_0(struct io_request *req) +{ + req->nseg = 1; + + req->iovs[0].base = spdk_zmalloc(0x800, 4, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x800; +} + +static void build_io_request_1(struct io_request *req) +{ + req->nseg = 1; + + /* 512B for 1st sge */ + req->iovs[0].base = spdk_zmalloc(0x200, 0x200, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x200; +} + +static void build_io_request_2(struct io_request *req) +{ + req->nseg = 1; + + /* 256KB for 1st sge */ + req->iovs[0].base = spdk_zmalloc(0x40000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x40000; +} + +static void build_io_request_3(struct io_request *req) +{ + req->nseg = 3; + + /* 2KB for 1st sge, make sure the iov address start at 0x800 boundary, + * and end with 0x1000 boundary */ + req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].offset = 0x800; + req->iovs[0].len = 0x800; + + /* 4KB for 2th sge */ + req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[1].len = 0x1000; + + /* 12KB for 3th sge */ + req->iovs[2].base = spdk_zmalloc(0x3000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[2].len = 0x3000; +} + +static void build_io_request_4(struct io_request *req) +{ + uint32_t i; + + req->nseg = 32; + + /* 4KB for 1st sge */ + req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x1000; + + /* 8KB for the rest 31 sge */ + for (i = 1; i < req->nseg; i++) { + req->iovs[i].base = spdk_zmalloc(0x2000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[i].len = 0x2000; + } +} + +static void build_io_request_5(struct io_request *req) +{ + req->nseg = 1; + + /* 8KB for 1st sge */ + req->iovs[0].base = spdk_zmalloc(0x2000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x2000; +} + +static void build_io_request_6(struct io_request *req) +{ + req->nseg = 2; + + /* 4KB for 1st sge */ + req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].len = 0x1000; + + /* 4KB for 2st sge */ + req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[1].len = 0x1000; +} + +static void build_io_request_7(struct io_request *req) +{ + uint8_t *base; + + req->nseg = 1; + + /* + * Create a 64KB sge, but ensure it is *not* aligned on a 4KB + * boundary. This is valid for single element buffers with PRP. + */ + base = spdk_zmalloc(0x11000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->misalign = 64; + req->iovs[0].base = base + req->misalign; + req->iovs[0].len = 0x10000; +} + +static void build_io_request_8(struct io_request *req) +{ + req->nseg = 2; + + /* + * 1KB for 1st sge, make sure the iov address does not start and end + * at 0x1000 boundary + */ + req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[0].offset = 0x400; + req->iovs[0].len = 0x400; + + /* + * 1KB for 1st sge, make sure the iov address does not start and end + * at 0x1000 boundary + */ + req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + req->iovs[1].offset = 0x400; + req->iovs[1].len = 0x400; +} + +static void build_io_request_9(struct io_request *req) +{ + /* + * Check if mixed PRP complaint and not complaint requests are handled + * properly by splitting them into subrequests. + * Construct buffers with following theme: + */ + const size_t req_len[] = { 2048, 4096, 2048, 4096, 2048, 1024 }; + const size_t req_off[] = { 0x800, 0x0, 0x0, 0x100, 0x800, 0x800 }; + struct sgl_element *iovs = req->iovs; + uint32_t i; + req->nseg = SPDK_COUNTOF(req_len); + assert(SPDK_COUNTOF(req_len) == SPDK_COUNTOF(req_off)); + + for (i = 0; i < req->nseg; i++) { + iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + iovs[i].offset = req_off[i]; + iovs[i].len = req_len[i]; + } +} + +static void build_io_request_10(struct io_request *req) +{ + /* + * Test the case where we have a valid PRP list, but the first and last + * elements are not exact multiples of the logical block size. + */ + const size_t req_len[] = { 4004, 4096, 92 }; + const size_t req_off[] = { 0x5c, 0x0, 0x0 }; + struct sgl_element *iovs = req->iovs; + uint32_t i; + req->nseg = SPDK_COUNTOF(req_len); + assert(SPDK_COUNTOF(req_len) == SPDK_COUNTOF(req_off)); + + for (i = 0; i < req->nseg; i++) { + iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + iovs[i].offset = req_off[i]; + iovs[i].len = req_len[i]; + } +} + +static void build_io_request_11(struct io_request *req) +{ + /* This test case focuses on the last element not starting on a page boundary. */ + const size_t req_len[] = { 512, 512 }; + const size_t req_off[] = { 0xe00, 0x800 }; + struct sgl_element *iovs = req->iovs; + uint32_t i; + req->nseg = SPDK_COUNTOF(req_len); + assert(SPDK_COUNTOF(req_len) == SPDK_COUNTOF(req_off)); + + for (i = 0; i < req->nseg; i++) { + iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL, SPDK_ENV_LCORE_ID_ANY, + SPDK_MALLOC_DMA); + iovs[i].offset = req_off[i]; + iovs[i].len = req_len[i]; + } +} + +typedef void (*nvme_build_io_req_fn_t)(struct io_request *req); + +static void +free_req(struct io_request *req) +{ + uint32_t i; + + if (req == NULL) { + return; + } + + for (i = 0; i < req->nseg; i++) { + spdk_free(req->iovs[i].base - req->misalign); + } + + spdk_free(req); +} + +static int +writev_readv_tests(struct dev *dev, nvme_build_io_req_fn_t build_io_fn, const char *test_name) +{ + int rc = 0; + uint32_t len, lba_count; + uint32_t i, j, nseg, remainder; + char *buf; + + struct io_request *req; + struct spdk_nvme_ns *ns; + struct spdk_nvme_qpair *qpair; + const struct spdk_nvme_ns_data *nsdata; + + ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, 1); + if (!ns) { + fprintf(stderr, "Null namespace\n"); + return 0; + } + nsdata = spdk_nvme_ns_get_data(ns); + if (!nsdata || !spdk_nvme_ns_get_sector_size(ns)) { + fprintf(stderr, "Empty nsdata or wrong sector size\n"); + return 0; + } + + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + return 0; + } + + req = spdk_zmalloc(sizeof(*req), 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!req) { + fprintf(stderr, "Allocate request failed\n"); + return 0; + } + + /* IO parameters setting */ + build_io_fn(req); + + len = 0; + for (i = 0; i < req->nseg; i++) { + struct sgl_element *sge = &req->iovs[i]; + + len += sge->len; + } + + lba_count = len / spdk_nvme_ns_get_sector_size(ns); + remainder = len % spdk_nvme_ns_get_sector_size(ns); + if (!lba_count || remainder || (BASE_LBA_START + lba_count > (uint32_t)nsdata->nsze)) { + fprintf(stderr, "%s: %s Invalid IO length parameter\n", dev->name, test_name); + free_req(req); + return 0; + } + + qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0); + if (!qpair) { + free_req(req); + return -1; + } + + nseg = req->nseg; + for (i = 0; i < nseg; i++) { + memset(req->iovs[i].base + req->iovs[i].offset, DATA_PATTERN, req->iovs[i].len); + } + + rc = spdk_nvme_ns_cmd_writev(ns, qpair, BASE_LBA_START, lba_count, + io_complete, req, 0, + nvme_request_reset_sgl, + nvme_request_next_sge); + + if (rc != 0) { + fprintf(stderr, "%s: %s writev failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + io_complete_flag = 0; + + while (!io_complete_flag) { + spdk_nvme_qpair_process_completions(qpair, 1); + } + + if (io_complete_flag != 1) { + fprintf(stderr, "%s: %s writev failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + /* reset completion flag */ + io_complete_flag = 0; + + for (i = 0; i < nseg; i++) { + memset(req->iovs[i].base + req->iovs[i].offset, 0, req->iovs[i].len); + } + + rc = spdk_nvme_ns_cmd_readv(ns, qpair, BASE_LBA_START, lba_count, + io_complete, req, 0, + nvme_request_reset_sgl, + nvme_request_next_sge); + + if (rc != 0) { + fprintf(stderr, "%s: %s readv failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + while (!io_complete_flag) { + spdk_nvme_qpair_process_completions(qpair, 1); + } + + if (io_complete_flag != 1) { + fprintf(stderr, "%s: %s readv failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + + for (i = 0; i < nseg; i++) { + buf = (char *)req->iovs[i].base + req->iovs[i].offset; + for (j = 0; j < req->iovs[i].len; j++) { + if (buf[j] != DATA_PATTERN) { + fprintf(stderr, "%s: %s write/read success, but memcmp Failed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return -1; + } + } + } + + fprintf(stdout, "%s: %s test passed\n", dev->name, test_name); + spdk_nvme_ctrlr_free_io_qpair(qpair); + free_req(req); + return rc; +} + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct dev *dev; + + /* add to dev list */ + dev = &devs[num_devs++]; + + dev->ctrlr = ctrlr; + + snprintf(dev->name, sizeof(dev->name), "%s", + trid->traddr); + + printf("Attached to %s\n", dev->name); +} + +int main(int argc, char **argv) +{ + struct dev *iter; + int rc, i; + struct spdk_env_opts opts; + + spdk_env_opts_init(&opts); + opts.name = "nvme_sgl"; + opts.core_mask = "0x1"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("NVMe Readv/Writev Request test\n"); + + if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "nvme_probe() failed\n"); + exit(1); + } + + rc = 0; + foreach_dev(iter) { +#define TEST(x) writev_readv_tests(iter, x, #x) + if (TEST(build_io_request_0) + || TEST(build_io_request_1) + || TEST(build_io_request_2) + || TEST(build_io_request_3) + || TEST(build_io_request_4) + || TEST(build_io_request_5) + || TEST(build_io_request_6) + || TEST(build_io_request_7) + || TEST(build_io_request_8) + || TEST(build_io_request_9) + || TEST(build_io_request_10) + || TEST(build_io_request_11)) { +#undef TEST + rc = 1; + printf("%s: failed sgl tests\n", iter->name); + } + } + + printf("Cleaning up...\n"); + + for (i = 0; i < num_devs; i++) { + struct dev *dev = &devs[i]; + + spdk_nvme_detach(dev->ctrlr); + } + + return rc; +} diff --git a/src/spdk/test/nvme/spdk_nvme_cli.sh b/src/spdk/test/nvme/spdk_nvme_cli.sh new file mode 100755 index 000000000..516a16f48 --- /dev/null +++ b/src/spdk/test/nvme/spdk_nvme_cli.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/scripts/common.sh +source $rootdir/test/common/autotest_common.sh + +if [[ $(uname) != "Linux" ]]; then + echo "NVMe cuse tests only supported on Linux" + exit 1 +fi + +nvme_cli_build + +trap "kill_stub; exit 1" SIGINT SIGTERM EXIT +start_stub "-s 2048 -i 0 -m 0xF" + +pushd ${DEPENDENCY_DIR}/nvme-cli + +sed -i 's/spdk=0/spdk=1/g' spdk.conf +sed -i 's/shm_id=.*/shm_id=0/g' spdk.conf +for bdf in $(get_nvme_bdfs); do + ./nvme list + ./nvme id-ctrl $bdf + ./nvme list-ctrl $bdf + ./nvme get-ns-id $bdf + ./nvme id-ns $bdf + ./nvme fw-log $bdf + ./nvme smart-log $bdf + ./nvme error-log $bdf + ./nvme list-ns $bdf -n 1 + ./nvme get-feature $bdf -f 1 -s 1 -l 100 + ./nvme get-log $bdf -i 1 -l 100 + ./nvme reset $bdf +done + +popd + +trap - SIGINT SIGTERM EXIT +kill_stub diff --git a/src/spdk/test/nvme/startup/.gitignore b/src/spdk/test/nvme/startup/.gitignore new file mode 100644 index 000000000..efcfc5a6a --- /dev/null +++ b/src/spdk/test/nvme/startup/.gitignore @@ -0,0 +1 @@ +startup diff --git a/src/spdk/test/nvme/startup/Makefile b/src/spdk/test/nvme/startup/Makefile new file mode 100644 index 000000000..06e5824b9 --- /dev/null +++ b/src/spdk/test/nvme/startup/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) + +APP = startup + +include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk diff --git a/src/spdk/test/nvme/startup/startup.c b/src/spdk/test/nvme/startup/startup.c new file mode 100644 index 000000000..2d99803d3 --- /dev/null +++ b/src/spdk/test/nvme/startup/startup.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/nvme.h" +#include "spdk/env.h" +#include "spdk/string.h" + +struct ctrlr_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct ctrlr_entry *next; + char name[1024]; +}; + +struct ns_entry { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ns *ns; + struct ns_entry *next; + struct spdk_nvme_qpair *qpair; +}; + +static struct ctrlr_entry *g_controllers = NULL; +static struct ns_entry *g_namespaces = NULL; +static int g_startup_time = 0; + +static bool +probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) +{ + printf("Attaching to %s\n", trid->traddr); + + return true; +} + +static void +attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + + struct ctrlr_entry *entry; + const struct spdk_nvme_ctrlr_data *cdata; + + entry = malloc(sizeof(struct ctrlr_entry)); + if (entry == NULL) { + perror("ctrlr_entry malloc"); + exit(1); + } + + printf("Attached to %s\n", trid->traddr); + + /* + * spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe + * controller. During initialization, the IDENTIFY data for the + * controller is read using an NVMe admin command, and that data + * can be retrieved using spdk_nvme_ctrlr_get_data() to get + * detailed information on the controller. Refer to the NVMe + * specification for more details on IDENTIFY for NVMe controllers. + */ + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + + snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); + + entry->ctrlr = ctrlr; + entry->next = g_controllers; + g_controllers = entry; +} + +static void +cleanup(void) +{ + struct ns_entry *ns_entry = g_namespaces; + struct ctrlr_entry *ctrlr_entry = g_controllers; + + while (ns_entry) { + struct ns_entry *next = ns_entry->next; + free(ns_entry); + ns_entry = next; + } + + while (ctrlr_entry) { + struct ctrlr_entry *next = ctrlr_entry->next; + + spdk_nvme_detach(ctrlr_entry->ctrlr); + free(ctrlr_entry); + ctrlr_entry = next; + } +} + +static void +usage(const char *program_name) +{ + printf("%s [options]", program_name); + printf("\n"); + printf("options:\n"); + printf(" -t The maximum time needed for startup. The unit is us. The value should be bigger than 0.\n"); +} + +static int +parse_args(int argc, char **argv) +{ + int op; + + while ((op = getopt(argc, argv, "t:")) != -1) { + switch (op) { + case 't': + g_startup_time = spdk_strtol(optarg, 10); + if (g_startup_time < 0) { + fprintf(stderr, "Invalid nvme startup time\n"); + return g_startup_time; + } + break; + default: + usage(argv[0]); + return 1; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int rc; + struct spdk_env_opts opts; + uint64_t start_tsc, end_tsc, tsc_diff; + float time_used_in_usec; + + rc = parse_args(argc, argv); + if (rc != 0) { + return rc; + } + + if (g_startup_time == 0) { + usage(argv[0]); + return 1; + } + + start_tsc = spdk_get_ticks(); + /* + * SPDK relies on an abstraction around the local environment + * named env that handles memory allocation and PCI device operations. + * This library must be initialized first. + * + */ + spdk_env_opts_init(&opts); + opts.name = "startup"; + opts.shm_id = 0; + if (spdk_env_init(&opts) < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + return 1; + } + + printf("Initializing NVMe Controllers\n"); + + + /* + * Start the SPDK NVMe enumeration process. probe_cb will be called + * for each NVMe controller found, giving our application a choice on + * whether to attach to each controller. attach_cb will then be + * called for each controller after the SPDK NVMe driver has completed + * initializing the controller we chose to attach. + */ + rc = spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + cleanup(); + return 1; + } + + if (g_controllers == NULL) { + fprintf(stderr, "no NVMe controllers found\n"); + return 0; + } + + end_tsc = spdk_get_ticks(); + tsc_diff = end_tsc - start_tsc; + time_used_in_usec = ((float)tsc_diff) * 1000 * 1000 / spdk_get_ticks_hz(); + printf("Initialization complete.\n"); + printf("Time used:%-16.3f(us).\n", time_used_in_usec); + if (time_used_in_usec > g_startup_time) { + fprintf(stderr, "Too long time for initialization.\n"); + cleanup(); + return 1; + } + cleanup(); + return 0; +} |