diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/test/bdev | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/test/bdev')
-rw-r--r-- | src/spdk/test/bdev/Makefile | 44 | ||||
-rwxr-xr-x | src/spdk/test/bdev/bdev_raid.sh | 119 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevio/.gitignore | 1 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevio/Makefile | 48 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevio/bdevio.c | 1433 | ||||
-rwxr-xr-x | src/spdk/test/bdev/bdevio/tests.py | 88 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevperf/.gitignore | 1 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevperf/Makefile | 55 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevperf/bdevperf.c | 2137 | ||||
-rwxr-xr-x | src/spdk/test/bdev/bdevperf/bdevperf.py | 86 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevperf/common.sh | 33 | ||||
-rw-r--r-- | src/spdk/test/bdev/bdevperf/conf.json | 25 | ||||
-rwxr-xr-x | src/spdk/test/bdev/bdevperf/test_config.sh | 41 | ||||
-rwxr-xr-x | src/spdk/test/bdev/blockdev.sh | 408 | ||||
-rw-r--r-- | src/spdk/test/bdev/nbd_common.sh | 123 |
15 files changed, 4642 insertions, 0 deletions
diff --git a/src/spdk/test/bdev/Makefile b/src/spdk/test/bdev/Makefile new file mode 100644 index 000000000..cb15bd49a --- /dev/null +++ b/src/spdk/test/bdev/Makefile @@ -0,0 +1,44 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = bdevio bdevperf + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/src/spdk/test/bdev/bdev_raid.sh b/src/spdk/test/bdev/bdev_raid.sh new file mode 100755 index 000000000..c85d33f6e --- /dev/null +++ b/src/spdk/test/bdev/bdev_raid.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +rpc_server=/var/tmp/spdk-raid.sock +rpc_py="$rootdir/scripts/rpc.py -s $rpc_server" +tmp_file=$SPDK_TEST_STORAGE/raidrandtest + +source $rootdir/test/common/autotest_common.sh +source $testdir/nbd_common.sh + +function raid_unmap_data_verify() { + if hash blkdiscard; then + local nbd=$1 + local rpc_server=$2 + local blksize + blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5) + local rw_blk_num=4096 + local rw_len=$((blksize * rw_blk_num)) + local unmap_blk_offs=(0 1028 321) + local unmap_blk_nums=(128 2035 456) + local unmap_off + local unmap_len + + # data write + dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num + dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct + blockdev --flushbufs $nbd + + # confirm random data is written correctly in raid0 device + cmp -b -n $rw_len $tmp_file $nbd + + for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do + unmap_off=$((blksize * ${unmap_blk_offs[$i]})) + unmap_len=$((blksize * ${unmap_blk_nums[$i]})) + + # data unmap on tmp_file + dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc + + # data unmap on raid bdev + blkdiscard -o $unmap_off -l $unmap_len $nbd + blockdev --flushbufs $nbd + + # data verify after unmap + cmp -b -n $rw_len $tmp_file $nbd + done + fi + + return 0 +} + +function on_error_exit() { + if [ -n "$raid_pid" ]; then + killprocess $raid_pid + fi + + rm -f $tmp_file + print_backtrace + exit 1 +} + +function configure_raid_bdev() { + rm -rf $testdir/rpcs.txt + + cat <<- EOL >> $testdir/rpcs.txt + bdev_malloc_create 32 512 -b Base_1 + bdev_malloc_create 32 512 -b Base_2 + bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n raid0 + EOL + $rpc_py < $testdir/rpcs.txt + + rm -rf $testdir/rpcs.txt +} + +function raid_function_test() { + if [ $(uname -s) = Linux ] && modprobe -n nbd; then + local nbd=/dev/nbd0 + local raid_bdev + + modprobe nbd + $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid & + raid_pid=$! + echo "Process raid pid: $raid_pid" + waitforlisten $raid_pid $rpc_server + + configure_raid_bdev + raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1) + if [ $raid_bdev = "" ]; then + echo "No raid0 device in SPDK app" + return 1 + fi + + nbd_start_disks $rpc_server $raid_bdev $nbd + count=$(nbd_get_count $rpc_server) + if [ $count -ne 1 ]; then + return 1 + fi + + raid_unmap_data_verify $nbd $rpc_server + + nbd_stop_disks $rpc_server $nbd + count=$(nbd_get_count $rpc_server) + if [ $count -ne 0 ]; then + return 1 + fi + + killprocess $raid_pid + else + echo "skipping bdev raid tests." + fi + + return 0 +} + +trap 'on_error_exit;' ERR + +raid_function_test + +rm -f $tmp_file diff --git a/src/spdk/test/bdev/bdevio/.gitignore b/src/spdk/test/bdev/bdevio/.gitignore new file mode 100644 index 000000000..1bb55429d --- /dev/null +++ b/src/spdk/test/bdev/bdevio/.gitignore @@ -0,0 +1 @@ +bdevio diff --git a/src/spdk/test/bdev/bdevio/Makefile b/src/spdk/test/bdev/bdevio/Makefile new file mode 100644 index 000000000..83aca58ca --- /dev/null +++ b/src/spdk/test/bdev/bdevio/Makefile @@ -0,0 +1,48 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = bdevio + +C_SRCS := bdevio.c + +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += app_rpc bdev bdev_rpc accel event trace log conf thread util rpc jsonrpc json sock notify + +LIBS += -lcunit + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/test/bdev/bdevio/bdevio.c b/src/spdk/test/bdev/bdevio/bdevio.c new file mode 100644 index 000000000..54d1712e3 --- /dev/null +++ b/src/spdk/test/bdev/bdevio/bdevio.c @@ -0,0 +1,1433 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/accel_engine.h" +#include "spdk/env.h" +#include "spdk/log.h" +#include "spdk/thread.h" +#include "spdk/event.h" +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/string.h" + +#include "CUnit/Basic.h" + +#define BUFFER_IOVS 1024 +#define BUFFER_SIZE 260 * 1024 +#define BDEV_TASK_ARRAY_SIZE 2048 + +pthread_mutex_t g_test_mutex; +pthread_cond_t g_test_cond; + +static struct spdk_thread *g_thread_init; +static struct spdk_thread *g_thread_ut; +static struct spdk_thread *g_thread_io; +static bool g_wait_for_tests = false; +static int g_num_failures = 0; + +struct io_target { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *ch; + struct io_target *next; +}; + +struct bdevio_request { + char *buf; + char *fused_buf; + int data_len; + uint64_t offset; + struct iovec iov[BUFFER_IOVS]; + int iovcnt; + struct iovec fused_iov[BUFFER_IOVS]; + int fused_iovcnt; + struct io_target *target; +}; + +struct io_target *g_io_targets = NULL; +struct io_target *g_current_io_target = NULL; +static void rpc_perform_tests_cb(unsigned num_failures, struct spdk_jsonrpc_request *request); + +static void +execute_spdk_function(spdk_msg_fn fn, void *arg) +{ + pthread_mutex_lock(&g_test_mutex); + spdk_thread_send_msg(g_thread_io, fn, arg); + pthread_cond_wait(&g_test_cond, &g_test_mutex); + pthread_mutex_unlock(&g_test_mutex); +} + +static void +wake_ut_thread(void) +{ + pthread_mutex_lock(&g_test_mutex); + pthread_cond_signal(&g_test_cond); + pthread_mutex_unlock(&g_test_mutex); +} + +static void +__get_io_channel(void *arg) +{ + struct io_target *target = arg; + + target->ch = spdk_bdev_get_io_channel(target->bdev_desc); + assert(target->ch); + wake_ut_thread(); +} + +static int +bdevio_construct_target(struct spdk_bdev *bdev) +{ + struct io_target *target; + int rc; + uint64_t num_blocks = spdk_bdev_get_num_blocks(bdev); + uint32_t block_size = spdk_bdev_get_block_size(bdev); + + target = malloc(sizeof(struct io_target)); + if (target == NULL) { + return -ENOMEM; + } + + rc = spdk_bdev_open(bdev, true, NULL, NULL, &target->bdev_desc); + if (rc != 0) { + free(target); + SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc); + return rc; + } + + printf(" %s: %" PRIu64 " blocks of %" PRIu32 " bytes (%" PRIu64 " MiB)\n", + spdk_bdev_get_name(bdev), + num_blocks, block_size, + (num_blocks * block_size + 1024 * 1024 - 1) / (1024 * 1024)); + + target->bdev = bdev; + target->next = g_io_targets; + execute_spdk_function(__get_io_channel, target); + g_io_targets = target; + + return 0; +} + +static int +bdevio_construct_targets(void) +{ + struct spdk_bdev *bdev; + int rc; + + printf("I/O targets:\n"); + + bdev = spdk_bdev_first_leaf(); + while (bdev != NULL) { + rc = bdevio_construct_target(bdev); + if (rc < 0) { + SPDK_ERRLOG("Could not construct bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc); + return rc; + } + bdev = spdk_bdev_next_leaf(bdev); + } + + if (g_io_targets == NULL) { + SPDK_ERRLOG("No bdevs to perform tests on\n"); + return -1; + } + + return 0; +} + +static void +__put_io_channel(void *arg) +{ + struct io_target *target = arg; + + spdk_put_io_channel(target->ch); + wake_ut_thread(); +} + +static void +bdevio_cleanup_targets(void) +{ + struct io_target *target; + + target = g_io_targets; + while (target != NULL) { + execute_spdk_function(__put_io_channel, target); + spdk_bdev_close(target->bdev_desc); + g_io_targets = target->next; + free(target); + target = g_io_targets; + } +} + +static bool g_completion_success; + +static void +initialize_buffer(char **buf, int pattern, int size) +{ + *buf = spdk_zmalloc(size, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + memset(*buf, pattern, size); +} + +static void +quick_test_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg) +{ + g_completion_success = success; + spdk_bdev_free_io(bdev_io); + wake_ut_thread(); +} + +static void +__blockdev_write(void *arg) +{ + struct bdevio_request *req = arg; + struct io_target *target = req->target; + int rc; + + if (req->iovcnt) { + rc = spdk_bdev_writev(target->bdev_desc, target->ch, req->iov, req->iovcnt, req->offset, + req->data_len, quick_test_complete, NULL); + } else { + rc = spdk_bdev_write(target->bdev_desc, target->ch, req->buf, req->offset, + req->data_len, quick_test_complete, NULL); + } + + if (rc) { + g_completion_success = false; + wake_ut_thread(); + } +} + +static void +__blockdev_write_zeroes(void *arg) +{ + struct bdevio_request *req = arg; + struct io_target *target = req->target; + int rc; + + rc = spdk_bdev_write_zeroes(target->bdev_desc, target->ch, req->offset, + req->data_len, quick_test_complete, NULL); + if (rc) { + g_completion_success = false; + wake_ut_thread(); + } +} + +static void +__blockdev_compare_and_write(void *arg) +{ + struct bdevio_request *req = arg; + struct io_target *target = req->target; + int rc; + + rc = spdk_bdev_comparev_and_writev_blocks(target->bdev_desc, target->ch, req->iov, req->iovcnt, + req->fused_iov, req->fused_iovcnt, req->offset, req->data_len, quick_test_complete, NULL); + + if (rc) { + g_completion_success = false; + wake_ut_thread(); + } +} + +static void +sgl_chop_buffer(struct bdevio_request *req, int iov_len) +{ + int data_len = req->data_len; + char *buf = req->buf; + + req->iovcnt = 0; + if (!iov_len) { + return; + } + + for (; data_len > 0 && req->iovcnt < BUFFER_IOVS; req->iovcnt++) { + if (data_len < iov_len) { + iov_len = data_len; + } + + req->iov[req->iovcnt].iov_base = buf; + req->iov[req->iovcnt].iov_len = iov_len; + + buf += iov_len; + data_len -= iov_len; + } + + CU_ASSERT_EQUAL_FATAL(data_len, 0); +} + +static void +sgl_chop_fused_buffer(struct bdevio_request *req, int iov_len) +{ + int data_len = req->data_len; + char *buf = req->fused_buf; + + req->fused_iovcnt = 0; + if (!iov_len) { + return; + } + + for (; data_len > 0 && req->fused_iovcnt < BUFFER_IOVS; req->fused_iovcnt++) { + if (data_len < iov_len) { + iov_len = data_len; + } + + req->fused_iov[req->fused_iovcnt].iov_base = buf; + req->fused_iov[req->fused_iovcnt].iov_len = iov_len; + + buf += iov_len; + data_len -= iov_len; + } + + CU_ASSERT_EQUAL_FATAL(data_len, 0); +} + +static void +blockdev_write(struct io_target *target, char *tx_buf, + uint64_t offset, int data_len, int iov_len) +{ + struct bdevio_request req; + + req.target = target; + req.buf = tx_buf; + req.data_len = data_len; + req.offset = offset; + sgl_chop_buffer(&req, iov_len); + + g_completion_success = false; + + execute_spdk_function(__blockdev_write, &req); +} + +static void +_blockdev_compare_and_write(struct io_target *target, char *cmp_buf, char *write_buf, + uint64_t offset, int data_len, int iov_len) +{ + struct bdevio_request req; + + req.target = target; + req.buf = cmp_buf; + req.fused_buf = write_buf; + req.data_len = data_len; + req.offset = offset; + sgl_chop_buffer(&req, iov_len); + sgl_chop_fused_buffer(&req, iov_len); + + g_completion_success = false; + + execute_spdk_function(__blockdev_compare_and_write, &req); +} + +static void +blockdev_write_zeroes(struct io_target *target, char *tx_buf, + uint64_t offset, int data_len) +{ + struct bdevio_request req; + + req.target = target; + req.buf = tx_buf; + req.data_len = data_len; + req.offset = offset; + + g_completion_success = false; + + execute_spdk_function(__blockdev_write_zeroes, &req); +} + +static void +__blockdev_read(void *arg) +{ + struct bdevio_request *req = arg; + struct io_target *target = req->target; + int rc; + + if (req->iovcnt) { + rc = spdk_bdev_readv(target->bdev_desc, target->ch, req->iov, req->iovcnt, req->offset, + req->data_len, quick_test_complete, NULL); + } else { + rc = spdk_bdev_read(target->bdev_desc, target->ch, req->buf, req->offset, + req->data_len, quick_test_complete, NULL); + } + + if (rc) { + g_completion_success = false; + wake_ut_thread(); + } +} + +static void +blockdev_read(struct io_target *target, char *rx_buf, + uint64_t offset, int data_len, int iov_len) +{ + struct bdevio_request req; + + req.target = target; + req.buf = rx_buf; + req.data_len = data_len; + req.offset = offset; + req.iovcnt = 0; + sgl_chop_buffer(&req, iov_len); + + g_completion_success = false; + + execute_spdk_function(__blockdev_read, &req); +} + +static int +blockdev_write_read_data_match(char *rx_buf, char *tx_buf, int data_length) +{ + int rc; + rc = memcmp(rx_buf, tx_buf, data_length); + + spdk_free(rx_buf); + spdk_free(tx_buf); + + return rc; +} + +static bool +blockdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t data_length) +{ + if (data_length < spdk_bdev_get_block_size(bdev) || + data_length % spdk_bdev_get_block_size(bdev) || + data_length / spdk_bdev_get_block_size(bdev) > spdk_bdev_get_num_blocks(bdev)) { + return false; + } + + return true; +} + +static void +blockdev_write_read(uint32_t data_length, uint32_t iov_len, int pattern, uint64_t offset, + int expected_rc, bool write_zeroes) +{ + struct io_target *target; + char *tx_buf = NULL; + char *rx_buf = NULL; + int rc; + + target = g_current_io_target; + + if (!blockdev_io_valid_blocks(target->bdev, data_length)) { + return; + } + + if (!write_zeroes) { + initialize_buffer(&tx_buf, pattern, data_length); + initialize_buffer(&rx_buf, 0, data_length); + + blockdev_write(target, tx_buf, offset, data_length, iov_len); + } else { + initialize_buffer(&tx_buf, 0, data_length); + initialize_buffer(&rx_buf, pattern, data_length); + + blockdev_write_zeroes(target, tx_buf, offset, data_length); + } + + + if (expected_rc == 0) { + CU_ASSERT_EQUAL(g_completion_success, true); + } else { + CU_ASSERT_EQUAL(g_completion_success, false); + } + blockdev_read(target, rx_buf, offset, data_length, iov_len); + + if (expected_rc == 0) { + CU_ASSERT_EQUAL(g_completion_success, true); + } else { + CU_ASSERT_EQUAL(g_completion_success, false); + } + + if (g_completion_success) { + rc = blockdev_write_read_data_match(rx_buf, tx_buf, data_length); + /* Assert the write by comparing it with values read + * from each blockdev */ + CU_ASSERT_EQUAL(rc, 0); + } +} + +static void +blockdev_compare_and_write(uint32_t data_length, uint32_t iov_len, uint64_t offset) +{ + struct io_target *target; + char *tx_buf = NULL; + char *write_buf = NULL; + char *rx_buf = NULL; + int rc; + + target = g_current_io_target; + + if (!blockdev_io_valid_blocks(target->bdev, data_length)) { + return; + } + + initialize_buffer(&tx_buf, 0xAA, data_length); + initialize_buffer(&rx_buf, 0, data_length); + initialize_buffer(&write_buf, 0xBB, data_length); + + blockdev_write(target, tx_buf, offset, data_length, iov_len); + CU_ASSERT_EQUAL(g_completion_success, true); + + _blockdev_compare_and_write(target, tx_buf, write_buf, offset, data_length, iov_len); + CU_ASSERT_EQUAL(g_completion_success, true); + + _blockdev_compare_and_write(target, tx_buf, write_buf, offset, data_length, iov_len); + CU_ASSERT_EQUAL(g_completion_success, false); + + blockdev_read(target, rx_buf, offset, data_length, iov_len); + CU_ASSERT_EQUAL(g_completion_success, true); + rc = blockdev_write_read_data_match(rx_buf, write_buf, data_length); + /* Assert the write by comparing it with values read + * from each blockdev */ + CU_ASSERT_EQUAL(rc, 0); +} + +static void +blockdev_write_read_4k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 4K */ + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +blockdev_write_zeroes_read_4k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 4K */ + data_length = 4096; + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write_zeroes and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1); +} + +/* + * This i/o will not have to split at the bdev layer. + */ +static void +blockdev_write_zeroes_read_1m(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 1M */ + data_length = 1048576; + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write_zeroes and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1); +} + +/* + * This i/o will have to split at the bdev layer if + * write-zeroes is not supported by the bdev. + */ +static void +blockdev_write_zeroes_read_3m(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 3M */ + data_length = 3145728; + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write_zeroes and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1); +} + +/* + * This i/o will have to split at the bdev layer if + * write-zeroes is not supported by the bdev. It also + * tests a write size that is not an even multiple of + * the bdev layer zero buffer size. + */ +static void +blockdev_write_zeroes_read_3m_500k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 3.5M */ + data_length = 3670016; + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write_zeroes and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1); +} + +static void +blockdev_writev_readv_4k(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 4K */ + data_length = 4096; + iov_len = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0); +} + +static void +blockdev_comparev_and_writev(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + + data_length = 1; + iov_len = 1; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + + blockdev_compare_and_write(data_length, iov_len, offset); +} + +static void +blockdev_writev_readv_30x4k(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 4K */ + data_length = 4096 * 30; + iov_len = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0); +} + +static void +blockdev_write_read_512Bytes(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 512 */ + data_length = 512; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +blockdev_writev_readv_512Bytes(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 512 */ + data_length = 512; + iov_len = 512; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0); +} + +static void +blockdev_write_read_size_gt_128k(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 132K */ + data_length = 135168; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +blockdev_writev_readv_size_gt_128k(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 132K */ + data_length = 135168; + iov_len = 135168; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0); +} + +static void +blockdev_writev_readv_size_gt_128k_two_iov(void) +{ + uint32_t data_length, iov_len; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 132K */ + data_length = 135168; + iov_len = 128 * 1024; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + + blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0); +} + +static void +blockdev_write_read_invalid_size(void) +{ + uint32_t data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size is not a multiple of the block size */ + data_length = 0x1015; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 8192; + pattern = 0xA3; + /* Params are invalid, hence the expected return value + * of write and read for all blockdevs is < 0 */ + expected_rc = -1; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +blockdev_write_read_offset_plus_nbytes_equals_bdev_size(void) +{ + struct io_target *target; + struct spdk_bdev *bdev; + char *tx_buf = NULL; + char *rx_buf = NULL; + uint64_t offset; + uint32_t block_size; + int rc; + + target = g_current_io_target; + bdev = target->bdev; + + block_size = spdk_bdev_get_block_size(bdev); + + /* The start offset has been set to a marginal value + * such that offset + nbytes == Total size of + * blockdev. */ + offset = ((spdk_bdev_get_num_blocks(bdev) - 1) * block_size); + + initialize_buffer(&tx_buf, 0xA3, block_size); + initialize_buffer(&rx_buf, 0, block_size); + + blockdev_write(target, tx_buf, offset, block_size, 0); + CU_ASSERT_EQUAL(g_completion_success, true); + + blockdev_read(target, rx_buf, offset, block_size, 0); + CU_ASSERT_EQUAL(g_completion_success, true); + + rc = blockdev_write_read_data_match(rx_buf, tx_buf, block_size); + /* Assert the write by comparing it with values read + * from each blockdev */ + CU_ASSERT_EQUAL(rc, 0); +} + +static void +blockdev_write_read_offset_plus_nbytes_gt_bdev_size(void) +{ + struct io_target *target; + struct spdk_bdev *bdev; + char *tx_buf = NULL; + char *rx_buf = NULL; + int data_length; + uint64_t offset; + int pattern; + + /* Tests the overflow condition of the blockdevs. */ + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + pattern = 0xA3; + + target = g_current_io_target; + bdev = target->bdev; + + /* The start offset has been set to a valid value + * but offset + nbytes is greater than the Total size + * of the blockdev. The test should fail. */ + offset = ((spdk_bdev_get_num_blocks(bdev) * spdk_bdev_get_block_size(bdev)) - 1024); + + initialize_buffer(&tx_buf, pattern, data_length); + initialize_buffer(&rx_buf, 0, data_length); + + blockdev_write(target, tx_buf, offset, data_length, 0); + CU_ASSERT_EQUAL(g_completion_success, false); + + blockdev_read(target, rx_buf, offset, data_length, 0); + CU_ASSERT_EQUAL(g_completion_success, false); +} + +static void +blockdev_write_read_max_offset(void) +{ + int data_length; + uint64_t offset; + int pattern; + int expected_rc; + + data_length = 4096; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + /* The start offset has been set to UINT64_MAX such that + * adding nbytes wraps around and points to an invalid address. */ + offset = UINT64_MAX; + pattern = 0xA3; + /* Params are invalid, hence the expected return value + * of write and read for all blockdevs is < 0 */ + expected_rc = -1; + + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +blockdev_overlapped_write_read_8k(void) +{ + int data_length; + uint64_t offset; + int pattern; + int expected_rc; + + /* Data size = 8K */ + data_length = 8192; + CU_ASSERT_TRUE(data_length < BUFFER_SIZE); + offset = 0; + pattern = 0xA3; + /* Params are valid, hence the expected return value + * of write and read for all blockdevs is 0. */ + expected_rc = 0; + /* Assert the write by comparing it with values read + * from the same offset for each blockdev */ + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); + + /* Overwrite the pattern 0xbb of size 8K on an address offset overlapping + * with the address written above and assert the new value in + * the overlapped address range */ + /* Populate 8k with value 0xBB */ + pattern = 0xBB; + /* Offset = 6144; Overlap offset addresses and write value 0xbb */ + offset = 4096; + /* Assert the write by comparing it with values read + * from the overlapped offset for each blockdev */ + blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0); +} + +static void +__blockdev_reset(void *arg) +{ + struct bdevio_request *req = arg; + struct io_target *target = req->target; + int rc; + + rc = spdk_bdev_reset(target->bdev_desc, target->ch, quick_test_complete, NULL); + if (rc < 0) { + g_completion_success = false; + wake_ut_thread(); + } +} + +static void +blockdev_test_reset(void) +{ + struct bdevio_request req; + struct io_target *target; + + target = g_current_io_target; + req.target = target; + + g_completion_success = false; + + execute_spdk_function(__blockdev_reset, &req); + + /* Workaround: NVMe-oF target doesn't support reset yet - so for now + * don't fail the test if it's an NVMe bdev. + */ + if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) { + CU_ASSERT_EQUAL(g_completion_success, true); + } +} + +struct bdevio_passthrough_request { + struct spdk_nvme_cmd cmd; + void *buf; + uint32_t len; + struct io_target *target; + int sct; + int sc; + uint32_t cdw0; +}; + +static void +nvme_pt_test_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg) +{ + struct bdevio_passthrough_request *pt_req = arg; + + spdk_bdev_io_get_nvme_status(bdev_io, &pt_req->cdw0, &pt_req->sct, &pt_req->sc); + spdk_bdev_free_io(bdev_io); + wake_ut_thread(); +} + +static void +__blockdev_nvme_passthru(void *arg) +{ + struct bdevio_passthrough_request *pt_req = arg; + struct io_target *target = pt_req->target; + int rc; + + rc = spdk_bdev_nvme_io_passthru(target->bdev_desc, target->ch, + &pt_req->cmd, pt_req->buf, pt_req->len, + nvme_pt_test_complete, pt_req); + if (rc) { + wake_ut_thread(); + } +} + +static void +blockdev_test_nvme_passthru_rw(void) +{ + struct bdevio_passthrough_request pt_req; + void *write_buf, *read_buf; + struct io_target *target; + + target = g_current_io_target; + + if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) { + return; + } + + memset(&pt_req, 0, sizeof(pt_req)); + pt_req.target = target; + pt_req.cmd.opc = SPDK_NVME_OPC_WRITE; + pt_req.cmd.nsid = 1; + *(uint64_t *)&pt_req.cmd.cdw10 = 4; + pt_req.cmd.cdw12 = 0; + + pt_req.len = spdk_bdev_get_block_size(target->bdev); + write_buf = spdk_malloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + memset(write_buf, 0xA5, pt_req.len); + pt_req.buf = write_buf; + + pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC; + pt_req.sc = SPDK_NVME_SC_INVALID_FIELD; + execute_spdk_function(__blockdev_nvme_passthru, &pt_req); + CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC); + CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS); + + pt_req.cmd.opc = SPDK_NVME_OPC_READ; + read_buf = spdk_zmalloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + pt_req.buf = read_buf; + + pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC; + pt_req.sc = SPDK_NVME_SC_INVALID_FIELD; + execute_spdk_function(__blockdev_nvme_passthru, &pt_req); + CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC); + CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS); + + CU_ASSERT(!memcmp(read_buf, write_buf, pt_req.len)); + spdk_free(read_buf); + spdk_free(write_buf); +} + +static void +blockdev_test_nvme_passthru_vendor_specific(void) +{ + struct bdevio_passthrough_request pt_req; + struct io_target *target; + + target = g_current_io_target; + + if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) { + return; + } + + memset(&pt_req, 0, sizeof(pt_req)); + pt_req.target = target; + pt_req.cmd.opc = 0x7F; /* choose known invalid opcode */ + pt_req.cmd.nsid = 1; + + pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC; + pt_req.sc = SPDK_NVME_SC_SUCCESS; + pt_req.cdw0 = 0xbeef; + execute_spdk_function(__blockdev_nvme_passthru, &pt_req); + CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC); + CU_ASSERT(pt_req.sc == SPDK_NVME_SC_INVALID_OPCODE); + CU_ASSERT(pt_req.cdw0 == 0x0); +} + +static void +__blockdev_nvme_admin_passthru(void *arg) +{ + struct bdevio_passthrough_request *pt_req = arg; + struct io_target *target = pt_req->target; + int rc; + + rc = spdk_bdev_nvme_admin_passthru(target->bdev_desc, target->ch, + &pt_req->cmd, pt_req->buf, pt_req->len, + nvme_pt_test_complete, pt_req); + if (rc) { + wake_ut_thread(); + } +} + +static void +blockdev_test_nvme_admin_passthru(void) +{ + struct io_target *target; + struct bdevio_passthrough_request pt_req; + + target = g_current_io_target; + + if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { + return; + } + + memset(&pt_req, 0, sizeof(pt_req)); + pt_req.target = target; + pt_req.cmd.opc = SPDK_NVME_OPC_IDENTIFY; + pt_req.cmd.nsid = 0; + *(uint64_t *)&pt_req.cmd.cdw10 = SPDK_NVME_IDENTIFY_CTRLR; + + pt_req.len = sizeof(struct spdk_nvme_ctrlr_data); + pt_req.buf = spdk_malloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + pt_req.sct = SPDK_NVME_SCT_GENERIC; + pt_req.sc = SPDK_NVME_SC_SUCCESS; + execute_spdk_function(__blockdev_nvme_admin_passthru, &pt_req); + CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC); + CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS); +} + +static void +__stop_init_thread(void *arg) +{ + unsigned num_failures = g_num_failures; + struct spdk_jsonrpc_request *request = arg; + + g_num_failures = 0; + + bdevio_cleanup_targets(); + if (g_wait_for_tests) { + /* Do not stop the app yet, wait for another RPC */ + rpc_perform_tests_cb(num_failures, request); + return; + } + spdk_app_stop(num_failures); +} + +static void +stop_init_thread(unsigned num_failures, struct spdk_jsonrpc_request *request) +{ + g_num_failures = num_failures; + + spdk_thread_send_msg(g_thread_init, __stop_init_thread, request); +} + +static int +suite_init(void) +{ + if (g_current_io_target == NULL) { + g_current_io_target = g_io_targets; + } + return 0; +} + +static int +suite_fini(void) +{ + g_current_io_target = g_current_io_target->next; + return 0; +} + +#define SUITE_NAME_MAX 64 + +static int +__setup_ut_on_single_target(struct io_target *target) +{ + unsigned rc = 0; + CU_pSuite suite = NULL; + char name[SUITE_NAME_MAX]; + + snprintf(name, sizeof(name), "bdevio tests on: %s", spdk_bdev_get_name(target->bdev)); + suite = CU_add_suite(name, suite_init, suite_fini); + if (suite == NULL) { + CU_cleanup_registry(); + rc = CU_get_error(); + return -rc; + } + + if ( + CU_add_test(suite, "blockdev write read 4k", blockdev_write_read_4k) == NULL + || CU_add_test(suite, "blockdev write zeroes read 4k", blockdev_write_zeroes_read_4k) == NULL + || CU_add_test(suite, "blockdev write zeroes read 1m", blockdev_write_zeroes_read_1m) == NULL + || CU_add_test(suite, "blockdev write zeroes read 3m", blockdev_write_zeroes_read_3m) == NULL + || CU_add_test(suite, "blockdev write zeroes read 3.5m", blockdev_write_zeroes_read_3m_500k) == NULL + || CU_add_test(suite, "blockdev reset", + blockdev_test_reset) == NULL + || CU_add_test(suite, "blockdev write read 512 bytes", + blockdev_write_read_512Bytes) == NULL + || CU_add_test(suite, "blockdev write read size > 128k", + blockdev_write_read_size_gt_128k) == NULL + || CU_add_test(suite, "blockdev write read invalid size", + blockdev_write_read_invalid_size) == NULL + || CU_add_test(suite, "blockdev write read offset + nbytes == size of blockdev", + blockdev_write_read_offset_plus_nbytes_equals_bdev_size) == NULL + || CU_add_test(suite, "blockdev write read offset + nbytes > size of blockdev", + blockdev_write_read_offset_plus_nbytes_gt_bdev_size) == NULL + || CU_add_test(suite, "blockdev write read max offset", + blockdev_write_read_max_offset) == NULL + || CU_add_test(suite, "blockdev write read 8k on overlapped address offset", + blockdev_overlapped_write_read_8k) == NULL + || CU_add_test(suite, "blockdev writev readv 4k", blockdev_writev_readv_4k) == NULL + || CU_add_test(suite, "blockdev writev readv 30 x 4k", + blockdev_writev_readv_30x4k) == NULL + || CU_add_test(suite, "blockdev writev readv 512 bytes", + blockdev_writev_readv_512Bytes) == NULL + || CU_add_test(suite, "blockdev writev readv size > 128k", + blockdev_writev_readv_size_gt_128k) == NULL + || CU_add_test(suite, "blockdev writev readv size > 128k in two iovs", + blockdev_writev_readv_size_gt_128k_two_iov) == NULL + || CU_add_test(suite, "blockdev comparev and writev", blockdev_comparev_and_writev) == NULL + || CU_add_test(suite, "blockdev nvme passthru rw", + blockdev_test_nvme_passthru_rw) == NULL + || CU_add_test(suite, "blockdev nvme passthru vendor specific", + blockdev_test_nvme_passthru_vendor_specific) == NULL + || CU_add_test(suite, "blockdev nvme admin passthru", + blockdev_test_nvme_admin_passthru) == NULL + ) { + CU_cleanup_registry(); + rc = CU_get_error(); + return -rc; + } + return 0; +} + +static void +__run_ut_thread(void *arg) +{ + struct spdk_jsonrpc_request *request = arg; + int rc = 0; + struct io_target *target; + unsigned num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + /* CUnit error, probably won't recover */ + rc = CU_get_error(); + stop_init_thread(-rc, request); + } + + target = g_io_targets; + while (target != NULL) { + rc = __setup_ut_on_single_target(target); + if (rc < 0) { + /* CUnit error, probably won't recover */ + stop_init_thread(-rc, request); + } + target = target->next; + } + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + + stop_init_thread(num_failures, request); +} + +static void +__construct_targets(void *arg) +{ + if (bdevio_construct_targets() < 0) { + spdk_app_stop(-1); + return; + } + + spdk_thread_send_msg(g_thread_ut, __run_ut_thread, NULL); +} + +static void +test_main(void *arg1) +{ + struct spdk_cpuset tmpmask = {}, *appmask; + uint32_t cpu, init_cpu; + + pthread_mutex_init(&g_test_mutex, NULL); + pthread_cond_init(&g_test_cond, NULL); + + appmask = spdk_app_get_core_mask(); + + if (spdk_cpuset_count(appmask) < 3) { + spdk_app_stop(-1); + return; + } + + init_cpu = spdk_env_get_current_core(); + g_thread_init = spdk_get_thread(); + + for (cpu = 0; cpu < SPDK_ENV_LCORE_ID_ANY; cpu++) { + if (cpu != init_cpu && spdk_cpuset_get_cpu(appmask, cpu)) { + spdk_cpuset_zero(&tmpmask); + spdk_cpuset_set_cpu(&tmpmask, cpu, true); + g_thread_ut = spdk_thread_create("ut_thread", &tmpmask); + break; + } + } + + if (cpu == SPDK_ENV_LCORE_ID_ANY) { + spdk_app_stop(-1); + return; + } + + for (cpu++; cpu < SPDK_ENV_LCORE_ID_ANY; cpu++) { + if (cpu != init_cpu && spdk_cpuset_get_cpu(appmask, cpu)) { + spdk_cpuset_zero(&tmpmask); + spdk_cpuset_set_cpu(&tmpmask, cpu, true); + g_thread_io = spdk_thread_create("io_thread", &tmpmask); + break; + } + } + + if (cpu == SPDK_ENV_LCORE_ID_ANY) { + spdk_app_stop(-1); + return; + } + + if (g_wait_for_tests) { + /* Do not perform any tests until RPC is received */ + return; + } + + spdk_thread_send_msg(g_thread_init, __construct_targets, NULL); +} + +static void +bdevio_usage(void) +{ + printf(" -w start bdevio app and wait for RPC to start the tests\n"); +} + +static int +bdevio_parse_arg(int ch, char *arg) +{ + switch (ch) { + case 'w': + g_wait_for_tests = true; + break; + default: + return -EINVAL; + } + return 0; +} + +struct rpc_perform_tests { + char *name; +}; + +static void +free_rpc_perform_tests(struct rpc_perform_tests *r) +{ + free(r->name); +} + +static const struct spdk_json_object_decoder rpc_perform_tests_decoders[] = { + {"name", offsetof(struct rpc_perform_tests, name), spdk_json_decode_string, true}, +}; + +static void +rpc_perform_tests_cb(unsigned num_failures, struct spdk_jsonrpc_request *request) +{ + struct spdk_json_write_ctx *w; + + if (num_failures == 0) { + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_uint32(w, num_failures); + spdk_jsonrpc_end_result(request, w); + } else { + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "%d test cases failed", num_failures); + } +} + +static void +rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) +{ + struct rpc_perform_tests req = {NULL}; + struct spdk_bdev *bdev; + int rc; + + if (params && spdk_json_decode_object(params, rpc_perform_tests_decoders, + SPDK_COUNTOF(rpc_perform_tests_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + goto invalid; + } + + if (req.name) { + bdev = spdk_bdev_get_by_name(req.name); + if (bdev == NULL) { + SPDK_ERRLOG("Bdev '%s' does not exist\n", req.name); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Bdev '%s' does not exist: %s", + req.name, spdk_strerror(ENODEV)); + goto invalid; + } + rc = bdevio_construct_target(bdev); + if (rc < 0) { + SPDK_ERRLOG("Could not construct target for bdev '%s'\n", spdk_bdev_get_name(bdev)); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Could not construct target for bdev '%s': %s", + spdk_bdev_get_name(bdev), spdk_strerror(-rc)); + goto invalid; + } + } else { + rc = bdevio_construct_targets(); + if (rc < 0) { + SPDK_ERRLOG("Could not construct targets for all bdevs\n"); + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "Could not construct targets for all bdevs: %s", + spdk_strerror(-rc)); + goto invalid; + } + } + free_rpc_perform_tests(&req); + + spdk_thread_send_msg(g_thread_ut, __run_ut_thread, request); + + return; + +invalid: + free_rpc_perform_tests(&req); +} +SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME) + +int +main(int argc, char **argv) +{ + int rc; + struct spdk_app_opts opts = {}; + + spdk_app_opts_init(&opts); + opts.name = "bdevio"; + opts.reactor_mask = "0x7"; + + if ((rc = spdk_app_parse_args(argc, argv, &opts, "w", NULL, + bdevio_parse_arg, bdevio_usage)) != + SPDK_APP_PARSE_ARGS_SUCCESS) { + return rc; + } + + rc = spdk_app_start(&opts, test_main, NULL); + spdk_app_fini(); + + return rc; +} diff --git a/src/spdk/test/bdev/bdevio/tests.py b/src/spdk/test/bdev/bdevio/tests.py new file mode 100755 index 000000000..8b46061d0 --- /dev/null +++ b/src/spdk/test/bdev/bdevio/tests.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +import logging +import argparse +import sys +import shlex + +try: + from rpc.client import print_dict, JSONRPCException + import rpc +except ImportError: + print("SPDK RPC library missing. Please add spdk/scripts/ directory to PYTHONPATH:") + print("'export PYTHONPATH=$PYTHONPATH:./spdk/scripts/'") + exit(1) + +try: + from shlex import quote +except ImportError: + from pipes import quote + + +def print_array(a): + print(" ".join((quote(v) for v in a))) + + +def perform_tests_func(client, name=None): + """ + + Args: + name: bdev name to perform bdevio tests on (optional; if omitted, test all bdevs) + + Returns: + Number of failures in tests. 0 means no errors found. + """ + params = {} + if name: + params['name'] = name + return client.call('perform_tests', params) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='SPDK RPC command line interface. NOTE: spdk/scripts/ is expected in PYTHONPATH') + parser.add_argument('-s', dest='server_addr', + help='RPC domain socket path or IP address', default='/var/tmp/spdk.sock') + parser.add_argument('-p', dest='port', + help='RPC port number (if server_addr is IP address)', + default=5260, type=int) + parser.add_argument('-t', dest='timeout', + help='Timeout as a floating point number expressed in seconds waiting for response. Default: 60.0', + default=60.0, type=float) + parser.add_argument('-v', dest='verbose', action='store_const', const="INFO", + help='Set verbose mode to INFO', default="ERROR") + parser.add_argument('--verbose', dest='verbose', choices=['DEBUG', 'INFO', 'ERROR'], + help="""Set verbose level. """) + subparsers = parser.add_subparsers(help='RPC methods') + + def perform_tests(args): + print_dict(perform_tests_func(args.client, name=args.name)) + + p = subparsers.add_parser('perform_tests', help='Perform all bdevio tests on select bdev') + p.add_argument('-b', '--name', help="Name of the Blockdev. Example: Nvme0n1") + p.set_defaults(func=perform_tests) + + def call_rpc_func(args): + try: + args.func(args) + except JSONRPCException as ex: + print(ex.message) + exit(1) + + def execute_script(parser, client, fd): + for rpc_call in map(str.rstrip, fd): + if not rpc_call.strip(): + continue + args = parser.parse_args(shlex.split(rpc_call)) + args.client = client + call_rpc_func(args) + + args = parser.parse_args() + args.client = rpc.client.JSONRPCClient(args.server_addr, args.port, args.timeout, log_level=getattr(logging, args.verbose.upper())) + if hasattr(args, 'func'): + call_rpc_func(args) + elif sys.stdin.isatty(): + # No arguments and no data piped through stdin + parser.print_help() + exit(1) + else: + execute_script(parser, args.client, sys.stdin) diff --git a/src/spdk/test/bdev/bdevperf/.gitignore b/src/spdk/test/bdev/bdevperf/.gitignore new file mode 100644 index 000000000..e14ddd841 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/.gitignore @@ -0,0 +1 @@ +bdevperf diff --git a/src/spdk/test/bdev/bdevperf/Makefile b/src/spdk/test/bdev/bdevperf/Makefile new file mode 100644 index 000000000..689d7fe10 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/Makefile @@ -0,0 +1,55 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = bdevperf + +C_SRCS := bdevperf.c + +SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) +SPDK_LIB_LIST += bdev accel event trace log conf thread util sock notify +SPDK_LIB_LIST += rpc jsonrpc json app_rpc log_rpc bdev_rpc + +ifeq ($(OS),Linux) +SPDK_LIB_LIST += event_nbd nbd +endif + +ifeq ($(SPDK_ROOT_DIR)/lib/env_dpdk,$(CONFIG_ENV)) +SPDK_LIB_LIST += env_dpdk_rpc +endif + +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/src/spdk/test/bdev/bdevperf/bdevperf.c b/src/spdk/test/bdev/bdevperf/bdevperf.c new file mode 100644 index 000000000..adcdf31cb --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/bdevperf.c @@ -0,0 +1,2137 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/accel_engine.h" +#include "spdk/endian.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/util.h" +#include "spdk/thread.h" +#include "spdk/string.h" +#include "spdk/rpc.h" +#include "spdk/bit_array.h" +#include "spdk/conf.h" + +#define BDEVPERF_CONFIG_MAX_FILENAME 1024 +#define BDEVPERF_CONFIG_UNDEFINED -1 +#define BDEVPERF_CONFIG_ERROR -2 + +struct bdevperf_task { + struct iovec iov; + struct bdevperf_job *job; + struct spdk_bdev_io *bdev_io; + void *buf; + void *md_buf; + uint64_t offset_blocks; + struct bdevperf_task *task_to_abort; + enum spdk_bdev_io_type io_type; + TAILQ_ENTRY(bdevperf_task) link; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +static const char *g_workload_type = NULL; +static int g_io_size = 0; +/* initialize to invalid value so we can detect if user overrides it. */ +static int g_rw_percentage = -1; +static bool g_verify = false; +static bool g_reset = false; +static bool g_continue_on_failure = false; +static bool g_abort = false; +static int g_queue_depth = 0; +static uint64_t g_time_in_usec; +static int g_show_performance_real_time = 0; +static uint64_t g_show_performance_period_in_usec = 1000000; +static uint64_t g_show_performance_period_num = 0; +static uint64_t g_show_performance_ema_period = 0; +static int g_run_rc = 0; +static bool g_shutdown = false; +static uint64_t g_shutdown_tsc; +static bool g_zcopy = true; +static struct spdk_thread *g_master_thread; +static int g_time_in_sec = 0; +static bool g_mix_specified = false; +static const char *g_job_bdev_name; +static bool g_wait_for_tests = false; +static struct spdk_jsonrpc_request *g_request = NULL; +static bool g_multithread_mode = false; +static int g_timeout_in_sec; +static struct spdk_conf *g_bdevperf_conf = NULL; +static const char *g_bdevperf_conf_file = NULL; + +static struct spdk_cpuset g_all_cpuset; +static struct spdk_poller *g_perf_timer = NULL; + +static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task); +static void rpc_perform_tests_cb(void); + +struct bdevperf_job { + char *name; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *ch; + TAILQ_ENTRY(bdevperf_job) link; + struct spdk_thread *thread; + + const char *workload_type; + int io_size; + int rw_percentage; + bool is_random; + bool verify; + bool reset; + bool continue_on_failure; + bool unmap; + bool write_zeroes; + bool flush; + bool abort; + int queue_depth; + + uint64_t io_completed; + uint64_t io_failed; + uint64_t io_timeout; + uint64_t prev_io_completed; + double ema_io_per_second; + int current_queue_depth; + uint64_t size_in_ios; + uint64_t ios_base; + uint64_t offset_in_ios; + uint64_t io_size_blocks; + uint64_t buf_size; + uint32_t dif_check_flags; + bool is_draining; + struct spdk_poller *run_timer; + struct spdk_poller *reset_timer; + struct spdk_bit_array *outstanding; + TAILQ_HEAD(, bdevperf_task) task_list; +}; + +struct spdk_bdevperf { + TAILQ_HEAD(, bdevperf_job) jobs; + uint32_t running_jobs; +}; + +static struct spdk_bdevperf g_bdevperf = { + .jobs = TAILQ_HEAD_INITIALIZER(g_bdevperf.jobs), + .running_jobs = 0, +}; + +enum job_config_rw { + JOB_CONFIG_RW_READ = 0, + JOB_CONFIG_RW_WRITE, + JOB_CONFIG_RW_RANDREAD, + JOB_CONFIG_RW_RANDWRITE, + JOB_CONFIG_RW_RW, + JOB_CONFIG_RW_RANDRW, + JOB_CONFIG_RW_VERIFY, + JOB_CONFIG_RW_RESET, + JOB_CONFIG_RW_UNMAP, + JOB_CONFIG_RW_FLUSH, + JOB_CONFIG_RW_WRITE_ZEROES, +}; + +/* Storing values from a section of job config file */ +struct job_config { + const char *name; + const char *filename; + struct spdk_cpuset cpumask; + int bs; + int iodepth; + int rwmixread; + int offset; + int length; + enum job_config_rw rw; + TAILQ_ENTRY(job_config) link; +}; + +TAILQ_HEAD(, job_config) job_config_list + = TAILQ_HEAD_INITIALIZER(job_config_list); + +static bool g_performance_dump_active = false; + +struct bdevperf_aggregate_stats { + struct bdevperf_job *current_job; + uint64_t io_time_in_usec; + uint64_t ema_period; + double total_io_per_second; + double total_mb_per_second; + double total_failed_per_second; + double total_timeout_per_second; +}; + +static struct bdevperf_aggregate_stats g_stats = {}; + +/* + * Cumulative Moving Average (CMA): average of all data up to current + * Exponential Moving Average (EMA): weighted mean of the previous n data and more weight is given to recent + * Simple Moving Average (SMA): unweighted mean of the previous n data + * + * Bdevperf supports CMA and EMA. + */ +static double +get_cma_io_per_second(struct bdevperf_job *job, uint64_t io_time_in_usec) +{ + return (double)job->io_completed * 1000000 / io_time_in_usec; +} + +static double +get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period) +{ + double io_completed, io_per_second; + + io_completed = job->io_completed; + io_per_second = (double)(io_completed - job->prev_io_completed) * 1000000 + / g_show_performance_period_in_usec; + job->prev_io_completed = io_completed; + + job->ema_io_per_second += (io_per_second - job->ema_io_per_second) * 2 + / (ema_period + 1); + return job->ema_io_per_second; +} + +static void +performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job) +{ + double io_per_second, mb_per_second, failed_per_second, timeout_per_second; + + printf("\r Thread name: %s\n", spdk_thread_get_name(job->thread)); + printf("\r Core Mask: 0x%s\n", spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread))); + + if (stats->ema_period == 0) { + io_per_second = get_cma_io_per_second(job, stats->io_time_in_usec); + } else { + io_per_second = get_ema_io_per_second(job, stats->ema_period); + } + mb_per_second = io_per_second * job->io_size / (1024 * 1024); + failed_per_second = (double)job->io_failed * 1000000 / stats->io_time_in_usec; + timeout_per_second = (double)job->io_timeout * 1000000 / stats->io_time_in_usec; + + printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n", + job->name, io_per_second, mb_per_second); + if (failed_per_second != 0) { + printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n", + "", failed_per_second, timeout_per_second); + } + stats->total_io_per_second += io_per_second; + stats->total_mb_per_second += mb_per_second; + stats->total_failed_per_second += failed_per_second; + stats->total_timeout_per_second += timeout_per_second; +} + +static void +generate_data(void *buf, int buf_len, int block_size, void *md_buf, int md_size, + int num_blocks, int seed) +{ + int offset_blocks = 0, md_offset, data_block_size; + + if (buf_len < num_blocks * block_size) { + return; + } + + if (md_buf == NULL) { + data_block_size = block_size - md_size; + md_buf = (char *)buf + data_block_size; + md_offset = block_size; + } else { + data_block_size = block_size; + md_offset = md_size; + } + + while (offset_blocks < num_blocks) { + memset(buf, seed, data_block_size); + memset(md_buf, seed, md_size); + buf += block_size; + md_buf += md_offset; + offset_blocks++; + } +} + +static bool +copy_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, + void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks) +{ + if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { + return false; + } + + assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); + + memcpy(wr_buf, rd_buf, block_size * num_blocks); + + if (wr_md_buf != NULL) { + memcpy(wr_md_buf, rd_md_buf, md_size * num_blocks); + } + + return true; +} + +static bool +verify_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, + void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks, bool md_check) +{ + int offset_blocks = 0, md_offset, data_block_size; + + if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { + return false; + } + + assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); + + if (wr_md_buf == NULL) { + data_block_size = block_size - md_size; + wr_md_buf = (char *)wr_buf + data_block_size; + rd_md_buf = (char *)rd_buf + data_block_size; + md_offset = block_size; + } else { + data_block_size = block_size; + md_offset = md_size; + } + + while (offset_blocks < num_blocks) { + if (memcmp(wr_buf, rd_buf, data_block_size) != 0) { + return false; + } + + wr_buf += block_size; + rd_buf += block_size; + + if (md_check) { + if (memcmp(wr_md_buf, rd_md_buf, md_size) != 0) { + return false; + } + + wr_md_buf += md_offset; + rd_md_buf += md_offset; + } + + offset_blocks++; + } + + return true; +} + +static void +free_job_config(void) +{ + struct job_config *config, *tmp; + + spdk_conf_free(g_bdevperf_conf); + g_bdevperf_conf = NULL; + + TAILQ_FOREACH_SAFE(config, &job_config_list, link, tmp) { + TAILQ_REMOVE(&job_config_list, config, link); + free(config); + } +} + +static void +bdevperf_test_done(void *ctx) +{ + struct bdevperf_job *job, *jtmp; + struct bdevperf_task *task, *ttmp; + + if (g_time_in_usec && !g_run_rc) { + g_stats.io_time_in_usec = g_time_in_usec; + + if (g_performance_dump_active) { + spdk_thread_send_msg(spdk_get_thread(), bdevperf_test_done, NULL); + return; + } + } else { + printf("Job run time less than one microsecond, no performance data will be shown\n"); + } + + if (g_show_performance_real_time) { + spdk_poller_unregister(&g_perf_timer); + } + + if (g_shutdown) { + g_time_in_usec = g_shutdown_tsc * 1000000 / spdk_get_ticks_hz(); + printf("Received shutdown signal, test time was about %.6f seconds\n", + (double)g_time_in_usec / 1000000); + } + + TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { + TAILQ_REMOVE(&g_bdevperf.jobs, job, link); + + performance_dump_job(&g_stats, job); + + TAILQ_FOREACH_SAFE(task, &job->task_list, link, ttmp) { + TAILQ_REMOVE(&job->task_list, task, link); + spdk_free(task->buf); + spdk_free(task->md_buf); + free(task); + } + + if (job->verify) { + spdk_bit_array_free(&job->outstanding); + } + + free(job->name); + free(job); + } + + printf("\r =====================================================\n"); + printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n", + "Total", g_stats.total_io_per_second, g_stats.total_mb_per_second); + if (g_stats.total_failed_per_second != 0 || g_stats.total_timeout_per_second != 0) { + printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n", + "", g_stats.total_failed_per_second, g_stats.total_timeout_per_second); + } + fflush(stdout); + + if (g_request && !g_shutdown) { + rpc_perform_tests_cb(); + } else { + spdk_app_stop(g_run_rc); + } +} + +static void +bdevperf_job_end(void *ctx) +{ + assert(g_master_thread == spdk_get_thread()); + + if (--g_bdevperf.running_jobs == 0) { + bdevperf_test_done(NULL); + } +} + +static void +bdevperf_queue_io_wait_with_cb(struct bdevperf_task *task, spdk_bdev_io_wait_cb cb_fn) +{ + struct bdevperf_job *job = task->job; + + task->bdev_io_wait.bdev = job->bdev; + task->bdev_io_wait.cb_fn = cb_fn; + task->bdev_io_wait.cb_arg = task; + spdk_bdev_queue_io_wait(job->bdev, job->ch, &task->bdev_io_wait); +} + +static int +bdevperf_job_drain(void *ctx) +{ + struct bdevperf_job *job = ctx; + + spdk_poller_unregister(&job->run_timer); + if (job->reset) { + spdk_poller_unregister(&job->reset_timer); + } + + job->is_draining = true; + + return -1; +} + +static void +bdevperf_abort_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bdevperf_task *task = cb_arg; + struct bdevperf_job *job = task->job; + + job->current_queue_depth--; + + if (success) { + job->io_completed++; + } else { + job->io_failed++; + if (!job->continue_on_failure) { + bdevperf_job_drain(job); + g_run_rc = -1; + } + } + + spdk_bdev_free_io(bdev_io); + + /* Return task to free list because abort is submitted on demand. */ + TAILQ_INSERT_TAIL(&job->task_list, task, link); + + if (job->is_draining) { + if (job->current_queue_depth == 0) { + spdk_put_io_channel(job->ch); + spdk_bdev_close(job->bdev_desc); + spdk_thread_send_msg(g_master_thread, bdevperf_job_end, NULL); + } + } +} + +static void +bdevperf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bdevperf_job *job; + struct bdevperf_task *task = cb_arg; + struct iovec *iovs; + int iovcnt; + bool md_check; + uint64_t offset_in_ios; + + job = task->job; + md_check = spdk_bdev_get_dif_type(job->bdev) == SPDK_DIF_DISABLE; + + if (!success) { + if (!job->reset && !job->continue_on_failure) { + bdevperf_job_drain(job); + g_run_rc = -1; + printf("task offset: %lu on job bdev=%s fails\n", + task->offset_blocks, job->name); + } + } else if (job->verify || job->reset) { + spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); + assert(iovcnt == 1); + assert(iovs != NULL); + if (!verify_data(task->buf, job->buf_size, iovs[0].iov_base, iovs[0].iov_len, + spdk_bdev_get_block_size(job->bdev), + task->md_buf, spdk_bdev_io_get_md_buf(bdev_io), + spdk_bdev_get_md_size(job->bdev), + job->io_size_blocks, md_check)) { + printf("Buffer mismatch! Target: %s Disk Offset: %lu\n", job->name, task->offset_blocks); + printf(" First dword expected 0x%x got 0x%x\n", *(int *)task->buf, *(int *)iovs[0].iov_base); + bdevperf_job_drain(job); + g_run_rc = -1; + } + } + + job->current_queue_depth--; + + if (success) { + job->io_completed++; + } else { + job->io_failed++; + } + + if (job->verify) { + assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); + offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; + + assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); + spdk_bit_array_clear(job->outstanding, offset_in_ios); + } + + spdk_bdev_free_io(bdev_io); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!job->is_draining) { + bdevperf_submit_single(job, task); + } else { + TAILQ_INSERT_TAIL(&job->task_list, task, link); + if (job->current_queue_depth == 0) { + spdk_put_io_channel(job->ch); + spdk_bdev_close(job->bdev_desc); + spdk_thread_send_msg(g_master_thread, bdevperf_job_end, NULL); + } + } +} + +static void +bdevperf_verify_submit_read(void *cb_arg) +{ + struct bdevperf_job *job; + struct bdevperf_task *task = cb_arg; + int rc; + + job = task->job; + + /* Read the data back in */ + if (spdk_bdev_is_md_separate(job->bdev)) { + rc = spdk_bdev_read_blocks_with_md(job->bdev_desc, job->ch, NULL, NULL, + task->offset_blocks, job->io_size_blocks, + bdevperf_complete, task); + } else { + rc = spdk_bdev_read_blocks(job->bdev_desc, job->ch, NULL, + task->offset_blocks, job->io_size_blocks, + bdevperf_complete, task); + } + + if (rc == -ENOMEM) { + bdevperf_queue_io_wait_with_cb(task, bdevperf_verify_submit_read); + } else if (rc != 0) { + printf("Failed to submit read: %d\n", rc); + bdevperf_job_drain(job); + g_run_rc = rc; + } +} + +static void +bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + if (success) { + spdk_bdev_free_io(bdev_io); + bdevperf_verify_submit_read(cb_arg); + } else { + bdevperf_complete(bdev_io, success, cb_arg); + } +} + +static void +bdevperf_zcopy_populate_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + if (!success) { + bdevperf_complete(bdev_io, success, cb_arg); + return; + } + + spdk_bdev_zcopy_end(bdev_io, false, bdevperf_complete, cb_arg); +} + +static int +bdevperf_generate_dif(struct bdevperf_task *task) +{ + struct bdevperf_job *job = task->job; + struct spdk_bdev *bdev = job->bdev; + struct spdk_dif_ctx dif_ctx; + int rc; + + rc = spdk_dif_ctx_init(&dif_ctx, + spdk_bdev_get_block_size(bdev), + spdk_bdev_get_md_size(bdev), + spdk_bdev_is_md_interleaved(bdev), + spdk_bdev_is_dif_head_of_md(bdev), + spdk_bdev_get_dif_type(bdev), + job->dif_check_flags, + task->offset_blocks, 0, 0, 0, 0); + if (rc != 0) { + fprintf(stderr, "Initialization of DIF context failed\n"); + return rc; + } + + if (spdk_bdev_is_md_interleaved(bdev)) { + rc = spdk_dif_generate(&task->iov, 1, job->io_size_blocks, &dif_ctx); + } else { + struct iovec md_iov = { + .iov_base = task->md_buf, + .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks, + }; + + rc = spdk_dix_generate(&task->iov, 1, &md_iov, job->io_size_blocks, &dif_ctx); + } + + if (rc != 0) { + fprintf(stderr, "Generation of DIF/DIX failed\n"); + } + + return rc; +} + +static void +bdevperf_submit_task(void *arg) +{ + struct bdevperf_task *task = arg; + struct bdevperf_job *job = task->job; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + spdk_bdev_io_completion_cb cb_fn; + uint64_t offset_in_ios; + int rc = 0; + + desc = job->bdev_desc; + ch = job->ch; + + switch (task->io_type) { + case SPDK_BDEV_IO_TYPE_WRITE: + if (spdk_bdev_get_md_size(job->bdev) != 0 && job->dif_check_flags != 0) { + rc = bdevperf_generate_dif(task); + } + if (rc == 0) { + cb_fn = (job->verify || job->reset) ? bdevperf_verify_write_complete : bdevperf_complete; + + if (g_zcopy) { + spdk_bdev_zcopy_end(task->bdev_io, true, cb_fn, task); + return; + } else { + if (spdk_bdev_is_md_separate(job->bdev)) { + rc = spdk_bdev_writev_blocks_with_md(desc, ch, &task->iov, 1, + task->md_buf, + task->offset_blocks, + job->io_size_blocks, + cb_fn, task); + } else { + rc = spdk_bdev_writev_blocks(desc, ch, &task->iov, 1, + task->offset_blocks, + job->io_size_blocks, + cb_fn, task); + } + } + } + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + rc = spdk_bdev_flush_blocks(desc, ch, task->offset_blocks, + job->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_UNMAP: + rc = spdk_bdev_unmap_blocks(desc, ch, task->offset_blocks, + job->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + rc = spdk_bdev_write_zeroes_blocks(desc, ch, task->offset_blocks, + job->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_READ: + if (g_zcopy) { + rc = spdk_bdev_zcopy_start(desc, ch, task->offset_blocks, job->io_size_blocks, + true, bdevperf_zcopy_populate_complete, task); + } else { + if (spdk_bdev_is_md_separate(job->bdev)) { + rc = spdk_bdev_read_blocks_with_md(desc, ch, task->buf, task->md_buf, + task->offset_blocks, + job->io_size_blocks, + bdevperf_complete, task); + } else { + rc = spdk_bdev_read_blocks(desc, ch, task->buf, task->offset_blocks, + job->io_size_blocks, bdevperf_complete, task); + } + } + break; + case SPDK_BDEV_IO_TYPE_ABORT: + rc = spdk_bdev_abort(desc, ch, task->task_to_abort, bdevperf_abort_complete, task); + break; + default: + assert(false); + rc = -EINVAL; + break; + } + + if (rc == -ENOMEM) { + bdevperf_queue_io_wait_with_cb(task, bdevperf_submit_task); + return; + } else if (rc != 0) { + printf("Failed to submit bdev_io: %d\n", rc); + if (job->verify) { + assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); + offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; + + assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); + spdk_bit_array_clear(job->outstanding, offset_in_ios); + } + bdevperf_job_drain(job); + g_run_rc = rc; + return; + } + + job->current_queue_depth++; +} + +static void +bdevperf_zcopy_get_buf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bdevperf_task *task = cb_arg; + struct bdevperf_job *job = task->job; + struct iovec *iovs; + int iovcnt; + + if (!success) { + bdevperf_job_drain(job); + g_run_rc = -1; + return; + } + + task->bdev_io = bdev_io; + task->io_type = SPDK_BDEV_IO_TYPE_WRITE; + + if (job->verify || job->reset) { + /* When job->verify or job->reset is enabled, task->buf is used for + * verification of read after write. For write I/O, when zcopy APIs + * are used, task->buf cannot be used, and data must be written to + * the data buffer allocated underneath bdev layer instead. + * Hence we copy task->buf to the allocated data buffer here. + */ + spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); + assert(iovcnt == 1); + assert(iovs != NULL); + + copy_data(iovs[0].iov_base, iovs[0].iov_len, task->buf, job->buf_size, + spdk_bdev_get_block_size(job->bdev), + spdk_bdev_io_get_md_buf(bdev_io), task->md_buf, + spdk_bdev_get_md_size(job->bdev), job->io_size_blocks); + } + + bdevperf_submit_task(task); +} + +static void +bdevperf_prep_zcopy_write_task(void *arg) +{ + struct bdevperf_task *task = arg; + struct bdevperf_job *job = task->job; + int rc; + + rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch, + task->offset_blocks, job->io_size_blocks, + false, bdevperf_zcopy_get_buf_complete, task); + if (rc != 0) { + assert(rc == -ENOMEM); + bdevperf_queue_io_wait_with_cb(task, bdevperf_prep_zcopy_write_task); + return; + } + + job->current_queue_depth++; +} + +static struct bdevperf_task * +bdevperf_job_get_task(struct bdevperf_job *job) +{ + struct bdevperf_task *task; + + task = TAILQ_FIRST(&job->task_list); + if (!task) { + printf("Task allocation failed\n"); + abort(); + } + + TAILQ_REMOVE(&job->task_list, task, link); + return task; +} + +static __thread unsigned int seed = 0; + +static void +bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task) +{ + uint64_t offset_in_ios; + + if (job->is_random) { + offset_in_ios = rand_r(&seed) % job->size_in_ios; + } else { + offset_in_ios = job->offset_in_ios++; + if (job->offset_in_ios == job->size_in_ios) { + job->offset_in_ios = 0; + } + + /* Increment of offset_in_ios if there's already an outstanding IO + * to that location. We only need this with job->verify as random + * offsets are not supported with job->verify at this time. + */ + if (job->verify) { + assert(spdk_bit_array_find_first_clear(job->outstanding, 0) != UINT32_MAX); + + while (spdk_bit_array_get(job->outstanding, offset_in_ios)) { + offset_in_ios = job->offset_in_ios++; + if (job->offset_in_ios == job->size_in_ios) { + job->offset_in_ios = 0; + } + } + spdk_bit_array_set(job->outstanding, offset_in_ios); + } + } + + /* For multi-thread to same job, offset_in_ios is relative + * to the LBA range assigned for that job. job->offset_blocks + * is absolute (entire bdev LBA range). + */ + task->offset_blocks = (offset_in_ios + job->ios_base) * job->io_size_blocks; + + if (job->verify || job->reset) { + generate_data(task->buf, job->buf_size, + spdk_bdev_get_block_size(job->bdev), + task->md_buf, spdk_bdev_get_md_size(job->bdev), + job->io_size_blocks, rand_r(&seed) % 256); + if (g_zcopy) { + bdevperf_prep_zcopy_write_task(task); + return; + } else { + task->iov.iov_base = task->buf; + task->iov.iov_len = job->buf_size; + task->io_type = SPDK_BDEV_IO_TYPE_WRITE; + } + } else if (job->flush) { + task->io_type = SPDK_BDEV_IO_TYPE_FLUSH; + } else if (job->unmap) { + task->io_type = SPDK_BDEV_IO_TYPE_UNMAP; + } else if (job->write_zeroes) { + task->io_type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; + } else if ((job->rw_percentage == 100) || + (job->rw_percentage != 0 && ((rand_r(&seed) % 100) < job->rw_percentage))) { + task->io_type = SPDK_BDEV_IO_TYPE_READ; + } else { + if (g_zcopy) { + bdevperf_prep_zcopy_write_task(task); + return; + } else { + task->iov.iov_base = task->buf; + task->iov.iov_len = job->buf_size; + task->io_type = SPDK_BDEV_IO_TYPE_WRITE; + } + } + + bdevperf_submit_task(task); +} + +static int reset_job(void *arg); + +static void +reset_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bdevperf_task *task = cb_arg; + struct bdevperf_job *job = task->job; + + if (!success) { + printf("Reset blockdev=%s failed\n", spdk_bdev_get_name(job->bdev)); + bdevperf_job_drain(job); + g_run_rc = -1; + } + + TAILQ_INSERT_TAIL(&job->task_list, task, link); + spdk_bdev_free_io(bdev_io); + + job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, + 10 * 1000000); +} + +static int +reset_job(void *arg) +{ + struct bdevperf_job *job = arg; + struct bdevperf_task *task; + int rc; + + spdk_poller_unregister(&job->reset_timer); + + /* Do reset. */ + task = bdevperf_job_get_task(job); + rc = spdk_bdev_reset(job->bdev_desc, job->ch, + reset_cb, task); + if (rc) { + printf("Reset failed: %d\n", rc); + bdevperf_job_drain(job); + g_run_rc = -1; + } + + return -1; +} + +static void +bdevperf_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io) +{ + struct bdevperf_job *job = cb_arg; + struct bdevperf_task *task; + + job->io_timeout++; + + if (job->is_draining || !job->abort || + !spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ABORT)) { + return; + } + + task = bdevperf_job_get_task(job); + if (task == NULL) { + return; + } + + task->task_to_abort = spdk_bdev_io_get_cb_arg(bdev_io); + task->io_type = SPDK_BDEV_IO_TYPE_ABORT; + + bdevperf_submit_task(task); +} + +static void +bdevperf_job_run(void *ctx) +{ + struct bdevperf_job *job = ctx; + struct bdevperf_task *task; + int i; + + /* Submit initial I/O for this job. Each time one + * completes, another will be submitted. */ + + /* Start a timer to stop this I/O chain when the run is over */ + job->run_timer = SPDK_POLLER_REGISTER(bdevperf_job_drain, job, g_time_in_usec); + if (job->reset) { + job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, + 10 * 1000000); + } + + spdk_bdev_set_timeout(job->bdev_desc, g_timeout_in_sec, bdevperf_timeout_cb, job); + + for (i = 0; i < job->queue_depth; i++) { + task = bdevperf_job_get_task(job); + bdevperf_submit_single(job, task); + } +} + +static void +_performance_dump_done(void *ctx) +{ + struct bdevperf_aggregate_stats *stats = ctx; + + printf("\r =====================================================\n"); + printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n", + "Total", stats->total_io_per_second, stats->total_mb_per_second); + if (stats->total_failed_per_second != 0 || stats->total_timeout_per_second != 0) { + printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n", + "", stats->total_failed_per_second, stats->total_timeout_per_second); + } + fflush(stdout); + + g_performance_dump_active = false; + + free(stats); +} + +static void +_performance_dump(void *ctx) +{ + struct bdevperf_aggregate_stats *stats = ctx; + + performance_dump_job(stats, stats->current_job); + + /* This assumes the jobs list is static after start up time. + * That's true right now, but if that ever changed this would need a lock. */ + stats->current_job = TAILQ_NEXT(stats->current_job, link); + if (stats->current_job == NULL) { + spdk_thread_send_msg(g_master_thread, _performance_dump_done, stats); + } else { + spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); + } +} + +static int +performance_statistics_thread(void *arg) +{ + struct bdevperf_aggregate_stats *stats; + + if (g_performance_dump_active) { + return -1; + } + + g_performance_dump_active = true; + + stats = calloc(1, sizeof(*stats)); + if (stats == NULL) { + return -1; + } + + g_show_performance_period_num++; + + stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec; + stats->ema_period = g_show_performance_ema_period; + + /* Iterate all of the jobs to gather stats + * These jobs will not get removed here until a final performance dump is run, + * so this should be safe without locking. + */ + stats->current_job = TAILQ_FIRST(&g_bdevperf.jobs); + if (stats->current_job == NULL) { + spdk_thread_send_msg(g_master_thread, _performance_dump_done, stats); + } else { + spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); + } + + return -1; +} + +static void +bdevperf_test(void) +{ + struct bdevperf_job *job; + + printf("Running I/O for %" PRIu64 " seconds...\n", g_time_in_usec / 1000000); + fflush(stdout); + + /* Start a timer to dump performance numbers */ + g_shutdown_tsc = spdk_get_ticks(); + if (g_show_performance_real_time) { + g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL, + g_show_performance_period_in_usec); + } + + /* Iterate jobs to start all I/O */ + TAILQ_FOREACH(job, &g_bdevperf.jobs, link) { + g_bdevperf.running_jobs++; + spdk_thread_send_msg(job->thread, bdevperf_job_run, job); + } +} + +static void +bdevperf_bdev_removed(void *arg) +{ + struct bdevperf_job *job = arg; + + bdevperf_job_drain(job); +} + +static uint32_t g_construct_job_count = 0; + +static void +_bdevperf_construct_job_done(void *ctx) +{ + if (--g_construct_job_count == 0) { + + if (g_run_rc != 0) { + /* Something failed. */ + bdevperf_test_done(NULL); + return; + } + + /* Ready to run the test */ + bdevperf_test(); + } +} + +/* Checkformat will not allow to use inlined type, + this is a workaround */ +typedef struct spdk_thread *spdk_thread_t; + +static spdk_thread_t +construct_job_thread(struct spdk_cpuset *cpumask, const char *tag) +{ + char thread_name[32]; + struct spdk_cpuset tmp; + + /* This function runs on the master thread. */ + assert(g_master_thread == spdk_get_thread()); + + /* Handle default mask */ + if (spdk_cpuset_count(cpumask) == 0) { + cpumask = &g_all_cpuset; + } + + /* Warn user that mask might need to be changed */ + spdk_cpuset_copy(&tmp, cpumask); + spdk_cpuset_or(&tmp, &g_all_cpuset); + if (!spdk_cpuset_equal(&tmp, &g_all_cpuset)) { + fprintf(stderr, "cpumask for '%s' is too big\n", tag); + } + + snprintf(thread_name, sizeof(thread_name), "%s_%s", + tag, + spdk_cpuset_fmt(cpumask)); + + return spdk_thread_create(thread_name, cpumask); +} + +static uint32_t +_get_next_core(void) +{ + static uint32_t current_core = SPDK_ENV_LCORE_ID_ANY; + + if (current_core == SPDK_ENV_LCORE_ID_ANY) { + current_core = spdk_env_get_first_core(); + return current_core; + } + + current_core = spdk_env_get_next_core(current_core); + if (current_core == SPDK_ENV_LCORE_ID_ANY) { + current_core = spdk_env_get_first_core(); + } + + return current_core; +} + +static void +_bdevperf_construct_job(void *ctx) +{ + struct bdevperf_job *job = ctx; + int rc; + + rc = spdk_bdev_open(job->bdev, true, bdevperf_bdev_removed, job, &job->bdev_desc); + if (rc != 0) { + SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(job->bdev), rc); + g_run_rc = -EINVAL; + goto end; + } + + job->ch = spdk_bdev_get_io_channel(job->bdev_desc); + if (!job->ch) { + SPDK_ERRLOG("Could not get io_channel for device %s, error=%d\n", spdk_bdev_get_name(job->bdev), + rc); + g_run_rc = -ENOMEM; + goto end; + } + +end: + spdk_thread_send_msg(g_master_thread, _bdevperf_construct_job_done, NULL); +} + +static void +job_init_rw(struct bdevperf_job *job, enum job_config_rw rw) +{ + switch (rw) { + case JOB_CONFIG_RW_READ: + job->rw_percentage = 100; + break; + case JOB_CONFIG_RW_WRITE: + job->rw_percentage = 0; + break; + case JOB_CONFIG_RW_RANDREAD: + job->is_random = true; + job->rw_percentage = 100; + break; + case JOB_CONFIG_RW_RANDWRITE: + job->is_random = true; + job->rw_percentage = 0; + break; + case JOB_CONFIG_RW_RW: + job->is_random = false; + break; + case JOB_CONFIG_RW_RANDRW: + job->is_random = true; + break; + case JOB_CONFIG_RW_VERIFY: + job->verify = true; + job->rw_percentage = 50; + break; + case JOB_CONFIG_RW_RESET: + job->reset = true; + job->verify = true; + job->rw_percentage = 50; + break; + case JOB_CONFIG_RW_UNMAP: + job->unmap = true; + break; + case JOB_CONFIG_RW_FLUSH: + job->flush = true; + break; + case JOB_CONFIG_RW_WRITE_ZEROES: + job->write_zeroes = true; + break; + } +} + +static int +bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config, + struct spdk_thread *thread) +{ + struct bdevperf_job *job; + struct bdevperf_task *task; + int block_size, data_block_size; + int rc; + int task_num, n; + + block_size = spdk_bdev_get_block_size(bdev); + data_block_size = spdk_bdev_get_data_block_size(bdev); + + job = calloc(1, sizeof(struct bdevperf_job)); + if (!job) { + fprintf(stderr, "Unable to allocate memory for new job.\n"); + return -ENOMEM; + } + + job->name = strdup(spdk_bdev_get_name(bdev)); + if (!job->name) { + fprintf(stderr, "Unable to allocate memory for job name.\n"); + free(job); + return -ENOMEM; + } + + job->workload_type = g_workload_type; + job->io_size = config->bs; + job->rw_percentage = config->rwmixread; + job->continue_on_failure = g_continue_on_failure; + job->queue_depth = config->iodepth; + job->bdev = bdev; + job->io_size_blocks = job->io_size / data_block_size; + job->buf_size = job->io_size_blocks * block_size; + job_init_rw(job, config->rw); + + if ((job->io_size % data_block_size) != 0) { + SPDK_ERRLOG("IO size (%d) is not multiples of data block size of bdev %s (%"PRIu32")\n", + job->io_size, spdk_bdev_get_name(bdev), data_block_size); + free(job->name); + free(job); + return -ENOTSUP; + } + + if (job->unmap && !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { + printf("Skipping %s because it does not support unmap\n", spdk_bdev_get_name(bdev)); + free(job->name); + free(job); + return -ENOTSUP; + } + + if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) { + job->dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK; + } + if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) { + job->dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK; + } + + job->offset_in_ios = 0; + + if (config->length != 0) { + /* Use subset of disk */ + job->size_in_ios = config->length / job->io_size_blocks; + job->ios_base = config->offset / job->io_size_blocks; + } else { + /* Use whole disk */ + job->size_in_ios = spdk_bdev_get_num_blocks(bdev) / job->io_size_blocks; + job->ios_base = 0; + } + + if (job->verify) { + job->outstanding = spdk_bit_array_create(job->size_in_ios); + if (job->outstanding == NULL) { + SPDK_ERRLOG("Could not create outstanding array bitmap for bdev %s\n", + spdk_bdev_get_name(bdev)); + free(job->name); + free(job); + return -ENOMEM; + } + } + + TAILQ_INIT(&job->task_list); + + task_num = job->queue_depth; + if (job->reset) { + task_num += 1; + } + if (job->abort) { + task_num += job->queue_depth; + } + + TAILQ_INSERT_TAIL(&g_bdevperf.jobs, job, link); + + for (n = 0; n < task_num; n++) { + task = calloc(1, sizeof(struct bdevperf_task)); + if (!task) { + fprintf(stderr, "Failed to allocate task from memory\n"); + return -ENOMEM; + } + + task->buf = spdk_zmalloc(job->buf_size, spdk_bdev_get_buf_align(job->bdev), NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!task->buf) { + fprintf(stderr, "Cannot allocate buf for task=%p\n", task); + free(task); + return -ENOMEM; + } + + if (spdk_bdev_is_md_separate(job->bdev)) { + task->md_buf = spdk_zmalloc(job->io_size_blocks * + spdk_bdev_get_md_size(job->bdev), 0, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + if (!task->md_buf) { + fprintf(stderr, "Cannot allocate md buf for task=%p\n", task); + spdk_free(task->buf); + free(task); + return -ENOMEM; + } + } + + task->job = job; + TAILQ_INSERT_TAIL(&job->task_list, task, link); + } + + job->thread = thread; + + g_construct_job_count++; + + rc = spdk_thread_send_msg(thread, _bdevperf_construct_job, job); + assert(rc == 0); + + return rc; +} + +static int +parse_rw(const char *str, enum job_config_rw ret) +{ + if (str == NULL) { + return ret; + } + + if (!strcmp(str, "read")) { + ret = JOB_CONFIG_RW_READ; + } else if (!strcmp(str, "randread")) { + ret = JOB_CONFIG_RW_RANDREAD; + } else if (!strcmp(str, "write")) { + ret = JOB_CONFIG_RW_WRITE; + } else if (!strcmp(str, "randwrite")) { + ret = JOB_CONFIG_RW_RANDWRITE; + } else if (!strcmp(str, "verify")) { + ret = JOB_CONFIG_RW_VERIFY; + } else if (!strcmp(str, "reset")) { + ret = JOB_CONFIG_RW_RESET; + } else if (!strcmp(str, "unmap")) { + ret = JOB_CONFIG_RW_UNMAP; + } else if (!strcmp(str, "write_zeroes")) { + ret = JOB_CONFIG_RW_WRITE_ZEROES; + } else if (!strcmp(str, "flush")) { + ret = JOB_CONFIG_RW_FLUSH; + } else if (!strcmp(str, "rw")) { + ret = JOB_CONFIG_RW_RW; + } else if (!strcmp(str, "randrw")) { + ret = JOB_CONFIG_RW_RANDRW; + } else { + fprintf(stderr, "rw must be one of\n" + "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n"); + ret = BDEVPERF_CONFIG_ERROR; + } + + return ret; +} + +static const char * +config_filename_next(const char *filename, char *out) +{ + int i, k; + + if (filename == NULL) { + out[0] = '\0'; + return NULL; + } + + if (filename[0] == ':') { + filename++; + } + + for (i = 0, k = 0; + filename[i] != '\0' && + filename[i] != ':' && + i < BDEVPERF_CONFIG_MAX_FILENAME; + i++) { + if (filename[i] == ' ' || filename[i] == '\t') { + continue; + } + + out[k++] = filename[i]; + } + out[k] = 0; + + return filename + i; +} + +static void +bdevperf_construct_config_jobs(void) +{ + char filename[BDEVPERF_CONFIG_MAX_FILENAME]; + struct spdk_thread *thread; + struct job_config *config; + struct spdk_bdev *bdev; + const char *filenames; + int rc; + + TAILQ_FOREACH(config, &job_config_list, link) { + filenames = config->filename; + + thread = construct_job_thread(&config->cpumask, config->name); + assert(thread); + + while (filenames) { + filenames = config_filename_next(filenames, filename); + if (strlen(filename) == 0) { + break; + } + + bdev = spdk_bdev_get_by_name(filename); + if (!bdev) { + fprintf(stderr, "Unable to find bdev '%s'\n", filename); + g_run_rc = -EINVAL; + return; + } + + rc = bdevperf_construct_job(bdev, config, thread); + if (rc < 0) { + g_run_rc = rc; + return; + } + } + } +} + +static int +make_cli_job_config(const char *filename, int offset, int range) +{ + struct job_config *config = calloc(1, sizeof(*config)); + + if (config == NULL) { + fprintf(stderr, "Unable to allocate memory for job config\n"); + return -ENOMEM; + } + + config->name = filename; + config->filename = filename; + spdk_cpuset_zero(&config->cpumask); + spdk_cpuset_set_cpu(&config->cpumask, _get_next_core(), true); + config->bs = g_io_size; + config->iodepth = g_queue_depth; + config->rwmixread = g_rw_percentage; + config->offset = offset; + config->length = range; + config->rw = parse_rw(g_workload_type, BDEVPERF_CONFIG_ERROR); + if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { + return -EINVAL; + } + + TAILQ_INSERT_TAIL(&job_config_list, config, link); + return 0; +} + +static void +bdevperf_construct_multithread_jobs(void) +{ + struct spdk_bdev *bdev; + uint32_t i; + uint32_t num_cores; + uint32_t blocks_per_job; + uint32_t offset; + + num_cores = 0; + SPDK_ENV_FOREACH_CORE(i) { + num_cores++; + } + + if (num_cores == 0) { + g_run_rc = -EINVAL; + return; + } + + if (g_job_bdev_name != NULL) { + bdev = spdk_bdev_get_by_name(g_job_bdev_name); + if (!bdev) { + fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); + return; + } + + blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores; + offset = 0; + + SPDK_ENV_FOREACH_CORE(i) { + g_run_rc = make_cli_job_config(g_job_bdev_name, offset, blocks_per_job); + if (g_run_rc) { + return; + } + + offset += blocks_per_job; + } + } else { + bdev = spdk_bdev_first_leaf(); + while (bdev != NULL) { + blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores; + offset = 0; + + SPDK_ENV_FOREACH_CORE(i) { + g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev), + offset, blocks_per_job); + if (g_run_rc) { + return; + } + + offset += blocks_per_job; + } + + bdev = spdk_bdev_next_leaf(bdev); + } + } +} + +static void +bdevperf_construct_jobs(void) +{ + struct spdk_bdev *bdev; + + /* There are three different modes for allocating jobs. Standard mode + * (the default) creates one spdk_thread per bdev and runs the I/O job there. + * + * The -C flag places bdevperf into "multithread" mode, meaning it creates + * one spdk_thread per bdev PER CORE, and runs a copy of the job on each. + * This runs multiple threads per bdev, effectively. + * + * The -j flag implies "FIO" mode which tries to mimic semantic of FIO jobs. + * In "FIO" mode, threads are spawned per-job instead of per-bdev. + * Each FIO job can be individually parameterized by filename, cpu mask, etc, + * which is different from other modes in that they only support global options. + */ + + /* Increment initial construct_jobs count so that it will never reach 0 in the middle + * of iteration. + */ + g_construct_job_count = 1; + + if (g_bdevperf_conf) { + goto end; + } else if (g_multithread_mode) { + bdevperf_construct_multithread_jobs(); + goto end; + } + + if (g_job_bdev_name != NULL) { + bdev = spdk_bdev_get_by_name(g_job_bdev_name); + if (bdev) { + /* Construct the job */ + g_run_rc = make_cli_job_config(g_job_bdev_name, 0, 0); + } else { + fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); + } + } else { + bdev = spdk_bdev_first_leaf(); + + while (bdev != NULL) { + /* Construct the job */ + g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev), 0, 0); + if (g_run_rc) { + break; + } + + bdev = spdk_bdev_next_leaf(bdev); + } + } + +end: + if (g_run_rc == 0) { + bdevperf_construct_config_jobs(); + } + + if (--g_construct_job_count == 0) { + if (g_run_rc != 0) { + /* Something failed. */ + bdevperf_test_done(NULL); + return; + } + + bdevperf_test(); + } +} + +static int +parse_uint_option(struct spdk_conf_section *s, const char *name, int def) +{ + const char *job_name; + int tmp; + + tmp = spdk_conf_section_get_intval(s, name); + if (tmp == -1) { + /* Field was not found. Check default value + * In [global] section it is ok to have undefined values + * but for other sections it is not ok */ + if (def == BDEVPERF_CONFIG_UNDEFINED) { + job_name = spdk_conf_section_get_name(s); + if (strcmp(job_name, "global") == 0) { + return def; + } + + fprintf(stderr, + "Job '%s' has no '%s' assigned\n", + job_name, name); + return BDEVPERF_CONFIG_ERROR; + } + return def; + } + + /* NOTE: get_intval returns nonnegative on success */ + if (tmp < 0) { + fprintf(stderr, "Job '%s' has bad '%s' value.\n", + spdk_conf_section_get_name(s), name); + return BDEVPERF_CONFIG_ERROR; + } + + return tmp; +} + +/* CLI arguments override parameters for global sections */ +static void +config_set_cli_args(struct job_config *config) +{ + if (g_job_bdev_name) { + config->filename = g_job_bdev_name; + } + if (g_io_size > 0) { + config->bs = g_io_size; + } + if (g_queue_depth > 0) { + config->iodepth = g_queue_depth; + } + if (g_rw_percentage > 0) { + config->rwmixread = g_rw_percentage; + } + if (g_workload_type) { + config->rw = parse_rw(g_workload_type, config->rw); + } +} + +static int +read_job_config(void) +{ + struct job_config global_default_config; + struct job_config global_config; + struct spdk_conf_section *s; + struct job_config *config; + const char *cpumask; + const char *rw; + bool is_global; + int n = 0; + + if (g_bdevperf_conf_file == NULL) { + return 0; + } + + g_bdevperf_conf = spdk_conf_allocate(); + if (g_bdevperf_conf == NULL) { + fprintf(stderr, "Could not allocate job config structure\n"); + return 1; + } + + spdk_conf_disable_sections_merge(g_bdevperf_conf); + if (spdk_conf_read(g_bdevperf_conf, g_bdevperf_conf_file)) { + fprintf(stderr, "Invalid job config"); + return 1; + } + + /* Initialize global defaults */ + global_default_config.filename = NULL; + /* Zero mask is the same as g_all_cpuset + * The g_all_cpuset is not initialized yet, + * so use zero mask as the default instead */ + spdk_cpuset_zero(&global_default_config.cpumask); + global_default_config.bs = BDEVPERF_CONFIG_UNDEFINED; + global_default_config.iodepth = BDEVPERF_CONFIG_UNDEFINED; + /* bdevperf has no default for -M option but in FIO the default is 50 */ + global_default_config.rwmixread = 50; + global_default_config.offset = 0; + /* length 0 means 100% */ + global_default_config.length = 0; + global_default_config.rw = BDEVPERF_CONFIG_UNDEFINED; + config_set_cli_args(&global_default_config); + + if ((int)global_default_config.rw == BDEVPERF_CONFIG_ERROR) { + return 1; + } + + /* There is only a single instance of global job_config + * We just reset its value when we encounter new [global] section */ + global_config = global_default_config; + + for (s = spdk_conf_first_section(g_bdevperf_conf); + s != NULL; + s = spdk_conf_next_section(s)) { + config = calloc(1, sizeof(*config)); + if (config == NULL) { + fprintf(stderr, "Unable to allocate memory for job config\n"); + return 1; + } + + config->name = spdk_conf_section_get_name(s); + is_global = strcmp(config->name, "global") == 0; + + if (is_global) { + global_config = global_default_config; + } + + config->filename = spdk_conf_section_get_val(s, "filename"); + if (config->filename == NULL) { + config->filename = global_config.filename; + } + if (!is_global) { + if (config->filename == NULL) { + fprintf(stderr, "Job '%s' expects 'filename' parameter\n", config->name); + goto error; + } else if (strnlen(config->filename, BDEVPERF_CONFIG_MAX_FILENAME) + >= BDEVPERF_CONFIG_MAX_FILENAME) { + fprintf(stderr, + "filename for '%s' job is too long. Max length is %d\n", + config->name, BDEVPERF_CONFIG_MAX_FILENAME); + goto error; + } + } + + cpumask = spdk_conf_section_get_val(s, "cpumask"); + if (cpumask == NULL) { + config->cpumask = global_config.cpumask; + } else if (spdk_cpuset_parse(&config->cpumask, cpumask)) { + fprintf(stderr, "Job '%s' has bad 'cpumask' value\n", config->name); + goto error; + } + + config->bs = parse_uint_option(s, "bs", global_config.bs); + if (config->bs == BDEVPERF_CONFIG_ERROR) { + goto error; + } else if (config->bs == 0) { + fprintf(stderr, "'bs' of job '%s' must be greater than 0\n", config->name); + goto error; + } + + config->iodepth = parse_uint_option(s, "iodepth", global_config.iodepth); + if (config->iodepth == BDEVPERF_CONFIG_ERROR) { + goto error; + } else if (config->iodepth == 0) { + fprintf(stderr, + "'iodepth' of job '%s' must be greater than 0\n", + config->name); + goto error; + } + + config->rwmixread = parse_uint_option(s, "rwmixread", global_config.rwmixread); + if (config->rwmixread == BDEVPERF_CONFIG_ERROR) { + goto error; + } else if (config->rwmixread > 100) { + fprintf(stderr, + "'rwmixread' value of '%s' job is not in 0-100 range\n", + config->name); + goto error; + } + + config->offset = parse_uint_option(s, "offset", global_config.offset); + if (config->offset == BDEVPERF_CONFIG_ERROR) { + goto error; + } + + config->length = parse_uint_option(s, "length", global_config.length); + if (config->length == BDEVPERF_CONFIG_ERROR) { + goto error; + } + + rw = spdk_conf_section_get_val(s, "rw"); + config->rw = parse_rw(rw, global_config.rw); + if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { + fprintf(stderr, "Job '%s' has bad 'rw' value\n", config->name); + goto error; + } else if (!is_global && (int)config->rw == BDEVPERF_CONFIG_UNDEFINED) { + fprintf(stderr, "Job '%s' has no 'rw' assigned\n", config->name); + goto error; + } + + if (is_global) { + config_set_cli_args(config); + global_config = *config; + free(config); + } else { + TAILQ_INSERT_TAIL(&job_config_list, config, link); + n++; + } + } + + printf("Using job config with %d jobs\n", n); + return 0; +error: + free(config); + return 1; +} + +static void +bdevperf_run(void *arg1) +{ + uint32_t i; + + g_master_thread = spdk_get_thread(); + + spdk_cpuset_zero(&g_all_cpuset); + SPDK_ENV_FOREACH_CORE(i) { + spdk_cpuset_set_cpu(&g_all_cpuset, i, true); + } + + if (g_wait_for_tests) { + /* Do not perform any tests until RPC is received */ + return; + } + + bdevperf_construct_jobs(); +} + +static void +rpc_perform_tests_cb(void) +{ + struct spdk_json_write_ctx *w; + struct spdk_jsonrpc_request *request = g_request; + + g_request = NULL; + + if (g_run_rc == 0) { + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_uint32(w, g_run_rc); + spdk_jsonrpc_end_result(request, w); + } else { + spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "bdevperf failed with error %s", spdk_strerror(-g_run_rc)); + } + + /* Reset g_run_rc to 0 for the next test run. */ + g_run_rc = 0; +} + +static void +rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) +{ + if (params != NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "perform_tests method requires no parameters"); + return; + } + if (g_request != NULL) { + fprintf(stderr, "Another test is already in progress.\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + spdk_strerror(-EINPROGRESS)); + return; + } + g_request = request; + + bdevperf_construct_jobs(); +} +SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME) + +static void +_bdevperf_job_drain(void *ctx) +{ + bdevperf_job_drain(ctx); +} + +static void +spdk_bdevperf_shutdown_cb(void) +{ + g_shutdown = true; + struct bdevperf_job *job, *tmp; + + if (g_bdevperf.running_jobs == 0) { + bdevperf_test_done(NULL); + return; + } + + g_shutdown_tsc = spdk_get_ticks() - g_shutdown_tsc; + + /* Iterate jobs to stop all I/O */ + TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, tmp) { + spdk_thread_send_msg(job->thread, _bdevperf_job_drain, job); + } +} + +static int +bdevperf_parse_arg(int ch, char *arg) +{ + long long tmp; + + if (ch == 'w') { + g_workload_type = optarg; + } else if (ch == 'T') { + g_job_bdev_name = optarg; + } else if (ch == 'z') { + g_wait_for_tests = true; + } else if (ch == 'x') { + g_zcopy = false; + } else if (ch == 'A') { + g_abort = true; + } else if (ch == 'C') { + g_multithread_mode = true; + } else if (ch == 'f') { + g_continue_on_failure = true; + } else if (ch == 'j') { + g_bdevperf_conf_file = optarg; + } else { + tmp = spdk_strtoll(optarg, 10); + if (tmp < 0) { + fprintf(stderr, "Parse failed for the option %c.\n", ch); + return tmp; + } else if (tmp >= INT_MAX) { + fprintf(stderr, "Parsed option was too large %c.\n", ch); + return -ERANGE; + } + + switch (ch) { + case 'q': + g_queue_depth = tmp; + break; + case 'o': + g_io_size = tmp; + break; + case 't': + g_time_in_sec = tmp; + break; + case 'k': + g_timeout_in_sec = tmp; + break; + case 'M': + g_rw_percentage = tmp; + g_mix_specified = true; + break; + case 'P': + g_show_performance_ema_period = tmp; + break; + case 'S': + g_show_performance_real_time = 1; + g_show_performance_period_in_usec = tmp * 1000000; + break; + default: + return -EINVAL; + } + } + return 0; +} + +static void +bdevperf_usage(void) +{ + printf(" -q <depth> io depth\n"); + printf(" -o <size> io size in bytes\n"); + printf(" -w <type> io pattern type, must be one of (read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n"); + printf(" -t <time> time in seconds\n"); + printf(" -k <timeout> timeout in seconds to detect starved I/O (default is 0 and disabled)\n"); + printf(" -M <percent> rwmixread (100 for reads, 0 for writes)\n"); + printf(" -P <num> number of moving average period\n"); + printf("\t\t(If set to n, show weighted mean of the previous n IO/s in real time)\n"); + printf("\t\t(Formula: M = 2 / (n + 1), EMA[i+1] = IO/s * M + (1 - M) * EMA[i])\n"); + printf("\t\t(only valid with -S)\n"); + printf(" -S <period> show performance result in real time every <period> seconds\n"); + printf(" -T <bdev> bdev to run against. Default: all available bdevs.\n"); + printf(" -f continue processing I/O even after failures\n"); + printf(" -x disable using zcopy bdev API for read or write I/O\n"); + printf(" -z start bdevperf, but wait for RPC to start tests\n"); + printf(" -A abort the timeout I/O\n"); + printf(" -C enable every core to send I/Os to each bdev\n"); + printf(" -j use job config file"); +} + +static int +verify_test_params(struct spdk_app_opts *opts) +{ + /* When RPC is used for starting tests and + * no rpc_addr was configured for the app, + * use the default address. */ + if (g_wait_for_tests && opts->rpc_addr == NULL) { + opts->rpc_addr = SPDK_DEFAULT_RPC_ADDR; + } + + if (!g_bdevperf_conf_file && g_queue_depth <= 0) { + spdk_app_usage(); + bdevperf_usage(); + return 1; + } + if (!g_bdevperf_conf_file && g_io_size <= 0) { + spdk_app_usage(); + bdevperf_usage(); + return 1; + } + if (!g_bdevperf_conf_file && !g_workload_type) { + spdk_app_usage(); + bdevperf_usage(); + return 1; + } + if (g_time_in_sec <= 0) { + spdk_app_usage(); + bdevperf_usage(); + return 1; + } + g_time_in_usec = g_time_in_sec * 1000000LL; + + if (g_timeout_in_sec < 0) { + spdk_app_usage(); + bdevperf_usage(); + return 1; + } + + if (g_show_performance_ema_period > 0 && + g_show_performance_real_time == 0) { + fprintf(stderr, "-P option must be specified with -S option\n"); + return 1; + } + + if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { + printf("I/O size of %d is greater than zero copy threshold (%d).\n", + g_io_size, SPDK_BDEV_LARGE_BUF_MAX_SIZE); + printf("Zero copy mechanism will not be used.\n"); + g_zcopy = false; + } + + if (g_bdevperf_conf_file) { + /* workload_type verification happens during config file parsing */ + return 0; + } + + if (!strcmp(g_workload_type, "verify") || + !strcmp(g_workload_type, "reset")) { + g_rw_percentage = 50; + if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { + fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n", + SPDK_BDEV_LARGE_BUF_MAX_SIZE, g_io_size); + return 1; + } + g_verify = true; + if (!strcmp(g_workload_type, "reset")) { + g_reset = true; + } + } + + if (!strcmp(g_workload_type, "read") || + !strcmp(g_workload_type, "randread") || + !strcmp(g_workload_type, "write") || + !strcmp(g_workload_type, "randwrite") || + !strcmp(g_workload_type, "verify") || + !strcmp(g_workload_type, "reset") || + !strcmp(g_workload_type, "unmap") || + !strcmp(g_workload_type, "write_zeroes") || + !strcmp(g_workload_type, "flush")) { + if (g_mix_specified) { + fprintf(stderr, "Ignoring -M option... Please use -M option" + " only when using rw or randrw.\n"); + } + } + + if (!strcmp(g_workload_type, "rw") || + !strcmp(g_workload_type, "randrw")) { + if (g_rw_percentage < 0 || g_rw_percentage > 100) { + fprintf(stderr, + "-M must be specified to value from 0 to 100 " + "for rw or randrw.\n"); + return 1; + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + int rc; + + spdk_app_opts_init(&opts); + opts.name = "bdevperf"; + opts.rpc_addr = NULL; + opts.reactor_mask = NULL; + opts.shutdown_cb = spdk_bdevperf_shutdown_cb; + + if ((rc = spdk_app_parse_args(argc, argv, &opts, "xzfq:o:t:w:k:ACM:P:S:T:j:", NULL, + bdevperf_parse_arg, bdevperf_usage)) != + SPDK_APP_PARSE_ARGS_SUCCESS) { + return rc; + } + + if (read_job_config()) { + free_job_config(); + return 1; + } + + if (verify_test_params(&opts) != 0) { + free_job_config(); + exit(1); + } + + rc = spdk_app_start(&opts, bdevperf_run, NULL); + + spdk_app_fini(); + free_job_config(); + return rc; +} diff --git a/src/spdk/test/bdev/bdevperf/bdevperf.py b/src/spdk/test/bdev/bdevperf/bdevperf.py new file mode 100755 index 000000000..178d90c34 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/bdevperf.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import logging +import argparse +import sys +import shlex + +try: + from rpc.client import print_dict, JSONRPCException + import rpc +except ImportError: + print("SPDK RPC library missing. Please add spdk/scripts/ directory to PYTHONPATH:") + print("'export PYTHONPATH=$PYTHONPATH:./spdk/scripts/'") + exit(1) + +try: + from shlex import quote +except ImportError: + from pipes import quote + + +def print_array(a): + print(" ".join((quote(v) for v in a))) + + +def perform_tests_func(client): + """Perform bdevperf tests according to command line arguments when application was started. + + Args: + none + + Returns: + On success, 0 is returned. On error, -1 is returned. + """ + params = {} + return client.call('perform_tests', params) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='SPDK RPC command line interface. NOTE: spdk/scripts/ is expected in PYTHONPATH') + parser.add_argument('-s', dest='server_addr', + help='RPC domain socket path or IP address', default='/var/tmp/spdk.sock') + parser.add_argument('-p', dest='port', + help='RPC port number (if server_addr is IP address)', + default=5260, type=int) + parser.add_argument('-t', dest='timeout', + help='Timeout as a floating point number expressed in seconds waiting for response. Default: 60.0', + default=60.0, type=float) + parser.add_argument('-v', dest='verbose', action='store_const', const="INFO", + help='Set verbose mode to INFO', default="ERROR") + parser.add_argument('--verbose', dest='verbose', choices=['DEBUG', 'INFO', 'ERROR'], + help="""Set verbose level. """) + subparsers = parser.add_subparsers(help='RPC methods') + + def perform_tests(args): + print_dict(perform_tests_func(args.client)) + + p = subparsers.add_parser('perform_tests', help='Perform bdevperf tests') + p.set_defaults(func=perform_tests) + + def call_rpc_func(args): + try: + args.func(args) + except JSONRPCException as ex: + print(ex.message) + exit(1) + + def execute_script(parser, client, fd): + for rpc_call in map(str.rstrip, fd): + if not rpc_call.strip(): + continue + args = parser.parse_args(shlex.split(rpc_call)) + args.client = client + call_rpc_func(args) + + args = parser.parse_args() + args.client = rpc.client.JSONRPCClient(args.server_addr, args.port, args.timeout, log_level=getattr(logging, args.verbose.upper())) + if hasattr(args, 'func'): + call_rpc_func(args) + elif sys.stdin.isatty(): + # No arguments and no data piped through stdin + parser.print_help() + exit(1) + else: + execute_script(parser, args.client, sys.stdin) diff --git a/src/spdk/test/bdev/bdevperf/common.sh b/src/spdk/test/bdev/bdevperf/common.sh new file mode 100644 index 000000000..eade380a3 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/common.sh @@ -0,0 +1,33 @@ +bdevperf=$rootdir/test/bdev/bdevperf/bdevperf + +function create_job() { + local job_section=$1 + local rw=$2 + local filename=$3 + + if [[ $job_section == "global" ]]; then + cat <<- EOF >> "$testdir"/test.conf + [global] + filename=${filename} + EOF + fi + job="[${job_section}]" + echo $global + cat <<- EOF >> "$testdir"/test.conf + ${job} + filename=${filename} + bs=1024 + rwmixread=70 + rw=${rw} + iodepth=256 + cpumask=0xff + EOF +} + +function get_num_jobs() { + echo "$1" | grep -oE "Using job config with [0-9]+ jobs" | grep -oE "[0-9]+" +} + +function cleanup() { + rm -f $testdir/test.conf +} diff --git a/src/spdk/test/bdev/bdevperf/conf.json b/src/spdk/test/bdev/bdevperf/conf.json new file mode 100644 index 000000000..c58407f38 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/conf.json @@ -0,0 +1,25 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_malloc_create", + "params": { + "name": "Malloc0", + "num_blocks": 102400, + "block_size": 512 + } + }, + { + "method": "bdev_malloc_create", + "params": { + "name": "Malloc1", + "num_blocks": 102400, + "block_size": 512 + } + } + ] + } + ] +} diff --git a/src/spdk/test/bdev/bdevperf/test_config.sh b/src/spdk/test/bdev/bdevperf/test_config.sh new file mode 100755 index 000000000..911d4e27d --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/test_config.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/test/common/autotest_common.sh +source $testdir/common.sh + +jsonconf=$testdir/conf.json +testconf=$testdir/test.conf + +trap 'cleanup; exit 1' SIGINT SIGTERM EXIT +#Test inheriting filename and rw_mode parameters from global section. +create_job "global" "read" "Malloc0" +create_job "job0" +create_job "job1" +create_job "job2" +create_job "job3" +bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1) +[[ $(get_num_jobs "$bdevperf_output") == "4" ]] + +bdevperf_output=$($bdevperf -C -t 2 --json $jsonconf -j $testconf) + +cleanup +#Test missing global section. +create_job "job0" "write" "Malloc0" +create_job "job1" "write" "Malloc0" +create_job "job2" "write" "Malloc0" +bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1) +[[ $(get_num_jobs "$bdevperf_output") == "3" ]] + +cleanup +#Test inheriting multiple filenames and rw_mode parameters from global section. +create_job "global" "rw" "Malloc0:Malloc1" +create_job "job0" +create_job "job1" +create_job "job2" +create_job "job3" +bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1) +[[ $(get_num_jobs "$bdevperf_output") == "4" ]] +cleanup +trap - SIGINT SIGTERM EXIT diff --git a/src/spdk/test/bdev/blockdev.sh b/src/spdk/test/bdev/blockdev.sh new file mode 100755 index 000000000..12d9c6f52 --- /dev/null +++ b/src/spdk/test/bdev/blockdev.sh @@ -0,0 +1,408 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../..) +source $rootdir/test/common/autotest_common.sh +source $testdir/nbd_common.sh + +rpc_py="$rootdir/scripts/rpc.py" +conf_file="$testdir/bdev.json" +# Make sure the configuration is clean +: > "$conf_file" + +function cleanup() { + rm -f "$SPDK_TEST_STORAGE/aiofile" + rm -f "$SPDK_TEST_STORAGE/spdk-pmem-pool" + rm -f "$conf_file" + + if [[ $test_type == rbd ]]; then + rbd_cleanup + fi +} + +function start_spdk_tgt() { + "$SPDK_BIN_DIR/spdk_tgt" & + spdk_tgt_pid=$! + trap 'killprocess "$spdk_tgt_pid"; exit 1' SIGINT SIGTERM EXIT + waitforlisten "$spdk_tgt_pid" +} + +function setup_bdev_conf() { + "$rpc_py" <<- RPC + bdev_split_create Malloc1 2 + bdev_split_create -s 4 Malloc2 8 + bdev_malloc_create -b Malloc0 32 512 + bdev_malloc_create -b Malloc1 32 512 + bdev_malloc_create -b Malloc2 32 512 + bdev_malloc_create -b Malloc3 32 512 + bdev_malloc_create -b Malloc4 32 512 + bdev_malloc_create -b Malloc5 32 512 + bdev_passthru_create -p TestPT -b Malloc3 + bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5" + RPC + # FIXME: QoS doesn't work properly with json_config, see issue 1146 + #$rpc_py bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3 + #$rpc_py bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0 + if [[ $(uname -s) != "FreeBSD" ]]; then + dd if=/dev/zero of="$SPDK_TEST_STORAGE/aiofile" bs=2048 count=5000 + "$rpc_py" bdev_aio_create "$SPDK_TEST_STORAGE/aiofile" AIO0 2048 + fi +} + +function setup_nvme_conf() { + "$rootdir/scripts/gen_nvme.sh" --json | "$rpc_py" load_subsystem_config +} + +function setup_gpt_conf() { + if [[ $(uname -s) = Linux ]] && hash sgdisk; then + $rootdir/scripts/setup.sh reset + # FIXME: Note that we are racing with the kernel here. There's no guarantee that + # proper object will be already in place under sysfs nor that any udev-like + # helper created proper block devices for us. Replace the below sleep with proper + # udev settle routine. + sleep 1s + # Get nvme devices by following drivers' links towards nvme class + local nvme_devs=(/sys/bus/pci/drivers/nvme/*/nvme/nvme*/nvme*n*) nvme_dev + gpt_nvme="" + # Pick first device which doesn't have any valid partition table + for nvme_dev in "${nvme_devs[@]}"; do + dev=/dev/${nvme_dev##*/} + if ! pt=$(parted "$dev" -ms print 2>&1); then + [[ $pt == *"$dev: unrecognised disk label"* ]] || continue + gpt_nvme=$dev + break + fi + done + if [[ -n $gpt_nvme ]]; then + # Create gpt partition table + parted -s "$gpt_nvme" mklabel gpt mkpart first '0%' '50%' mkpart second '50%' '100%' + # change the GUID to SPDK GUID value + # FIXME: Hardcode this in some common place, this value should not be changed much + IFS="()" read -r _ SPDK_GPT_GUID _ < <(grep SPDK_GPT_PART_TYPE_GUID module/bdev/gpt/gpt.h) + SPDK_GPT_GUID=${SPDK_GPT_GUID//, /-} SPDK_GPT_GUID=${SPDK_GPT_GUID//0x/} + sgdisk -t "1:$SPDK_GPT_GUID" "$gpt_nvme" + sgdisk -t "2:$SPDK_GPT_GUID" "$gpt_nvme" + "$rootdir/scripts/setup.sh" + "$rpc_py" bdev_get_bdevs + setup_nvme_conf + else + printf 'Did not find any nvme block devices to work with, aborting the test\n' >&2 + "$rootdir/scripts/setup.sh" + return 1 + fi + else + # Not supported platform or missing tooling, nothing to be done, simply exit the test + # in a graceful manner. + trap - SIGINT SIGTERM EXIT + killprocess "$spdk_tgt_pid" + cleanup + exit 0 + fi +} + +function setup_crypto_aesni_conf() { + # Malloc0 and Malloc1 use AESNI + "$rpc_py" <<- RPC + bdev_malloc_create -b Malloc0 16 512 + bdev_malloc_create -b Malloc1 16 512 + bdev_crypto_create Malloc0 crypto_ram crypto_aesni_mb 0123456789123456 + bdev_crypto_create Malloc1 crypto_ram2 crypto_aesni_mb 9012345678912345 + RPC +} + +function setup_crypto_qat_conf() { + # Malloc0 will use QAT AES_CBC + # Malloc1 will use QAT AES_XTS + "$rpc_py" <<- RPC + bdev_malloc_create -b Malloc0 16 512 + bdev_malloc_create -b Malloc1 16 512 + bdev_crypto_create Malloc0 crypto_ram crypto_qat 0123456789123456 + bdev_crypto_create -c AES_XTS -k2 0123456789123456 Malloc1 crypto_ram3 crypto_qat 0123456789123456 + RPC + "$rpc_py" bdev_get_bdevs -b Malloc1 +} + +function setup_pmem_conf() { + if hash pmempool; then + rm -f "$SPDK_TEST_STORAGE/spdk-pmem-pool" + pmempool create blk --size=32M 512 "$SPDK_TEST_STORAGE/spdk-pmem-pool" + "$rpc_py" bdev_pmem_create -n Pmem0 "$SPDK_TEST_STORAGE/spdk-pmem-pool" + else + return 1 + fi +} + +function setup_rbd_conf() { + timing_enter rbd_setup + rbd_setup 127.0.0.1 + timing_exit rbd_setup + + "$rpc_py" bdev_rbd_create -b Ceph0 rbd foo 512 +} + +function bdev_bounds() { + $testdir/bdevio/bdevio -w -s $PRE_RESERVED_MEM --json "$conf_file" & + bdevio_pid=$! + trap 'killprocess $bdevio_pid; exit 1' SIGINT SIGTERM EXIT + echo "Process bdevio pid: $bdevio_pid" + waitforlisten $bdevio_pid + $testdir/bdevio/tests.py perform_tests + killprocess $bdevio_pid + trap - SIGINT SIGTERM EXIT +} + +function nbd_function_test() { + if [ $(uname -s) = Linux ] && modprobe -n nbd; then + local rpc_server=/var/tmp/spdk-nbd.sock + local conf=$1 + local nbd_all=($(ls /dev/nbd* | grep -v p)) + local bdev_all=($bdevs_name) + local nbd_num=${#bdevs_all[@]} + if [ ${#nbd_all[@]} -le $nbd_num ]; then + nbd_num=${#nbd_all[@]} + fi + local nbd_list=(${nbd_all[@]:0:$nbd_num}) + local bdev_list=(${bdev_all[@]:0:$nbd_num}) + + if [ ! -e $conf ]; then + return 1 + fi + + modprobe nbd + $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 --json "$conf" & + nbd_pid=$! + trap 'killprocess $nbd_pid; exit 1' SIGINT SIGTERM EXIT + echo "Process nbd pid: $nbd_pid" + waitforlisten $nbd_pid $rpc_server + + nbd_rpc_start_stop_verify $rpc_server "${bdev_list[*]}" + nbd_rpc_data_verify $rpc_server "${bdev_list[*]}" "${nbd_list[*]}" + + killprocess $nbd_pid + trap - SIGINT SIGTERM EXIT + fi + + return 0 +} + +function fio_test_suite() { + # Generate the fio config file given the list of all unclaimed bdevs + fio_config_gen $testdir/bdev.fio verify AIO + for b in $(echo $bdevs | jq -r '.name'); do + echo "[job_$b]" >> $testdir/bdev.fio + echo "filename=$b" >> $testdir/bdev.fio + done + + local fio_params="--ioengine=spdk_bdev --iodepth=8 --bs=4k --runtime=10 $testdir/bdev.fio --spdk_json_conf=$conf_file" + + run_test "bdev_fio_rw_verify" fio_bdev $fio_params --spdk_mem=$PRE_RESERVED_MEM \ + --output=$output_dir/blockdev_fio_verify.txt + rm -f ./*.state + rm -f $testdir/bdev.fio + + # Generate the fio config file given the list of all unclaimed bdevs that support unmap + fio_config_gen $testdir/bdev.fio trim + if [ "$(echo $bdevs | jq -r 'select(.supported_io_types.unmap == true) | .name')" != "" ]; then + for b in $(echo $bdevs | jq -r 'select(.supported_io_types.unmap == true) | .name'); do + echo "[job_$b]" >> $testdir/bdev.fio + echo "filename=$b" >> $testdir/bdev.fio + done + else + rm -f $testdir/bdev.fio + return 0 + fi + + run_test "bdev_fio_trim" fio_bdev $fio_params --output=$output_dir/blockdev_trim.txt + rm -f ./*.state + rm -f $testdir/bdev.fio +} + +function get_io_result() { + local limit_type=$1 + local qos_dev=$2 + local iostat_result + iostat_result=$($rootdir/scripts/iostat.py -d -i 1 -t $QOS_RUN_TIME | grep $qos_dev | tail -1) + if [ $limit_type = IOPS ]; then + iostat_result=$(awk '{print $2}' <<< $iostat_result) + elif [ $limit_type = BANDWIDTH ]; then + iostat_result=$(awk '{print $6}' <<< $iostat_result) + fi + + echo ${iostat_result/.*/} +} + +function run_qos_test() { + local qos_limit=$1 + local qos_result=0 + + qos_result=$(get_io_result $2 $3) + if [ $2 = BANDWIDTH ]; then + qos_limit=$((qos_limit * 1024)) + fi + lower_limit=$((qos_limit * 9 / 10)) + upper_limit=$((qos_limit * 11 / 10)) + + # QoS realization is related with bytes transfered. It currently has some variation. + if [ $qos_result -lt $lower_limit ] || [ $qos_result -gt $upper_limit ]; then + echo "Failed to limit the io read rate of NULL bdev by qos" + $rpc_py bdev_malloc_delete $QOS_DEV_1 + $rpc_py bdev_null_delete $QOS_DEV_2 + killprocess $QOS_PID + exit 1 + fi +} + +function qos_function_test() { + local qos_lower_iops_limit=1000 + local qos_lower_bw_limit=2 + local io_result=0 + local iops_limit=0 + local bw_limit=0 + + io_result=$(get_io_result IOPS $QOS_DEV_1) + # Set the IOPS limit as one quarter of the measured performance without QoS + iops_limit=$(((io_result / 4) / qos_lower_iops_limit * qos_lower_iops_limit)) + if [ $iops_limit -gt $qos_lower_iops_limit ]; then + + # Run bdevperf with IOPS rate limit on bdev 1 + $rpc_py bdev_set_qos_limit --rw_ios_per_sec $iops_limit $QOS_DEV_1 + run_test "bdev_qos_iops" run_qos_test $iops_limit IOPS $QOS_DEV_1 + + # Run bdevperf with bandwidth rate limit on bdev 2 + # Set the bandwidth limit as 1/10 of the measure performance without QoS + bw_limit=$(get_io_result BANDWIDTH $QOS_DEV_2) + bw_limit=$((bw_limit / 1024 / 10)) + if [ $bw_limit -lt $qos_lower_bw_limit ]; then + bw_limit=$qos_lower_bw_limit + fi + $rpc_py bdev_set_qos_limit --rw_mbytes_per_sec $bw_limit $QOS_DEV_2 + run_test "bdev_qos_bw" run_qos_test $bw_limit BANDWIDTH $QOS_DEV_2 + + # Run bdevperf with additional read only bandwidth rate limit on bdev 1 + $rpc_py bdev_set_qos_limit --r_mbytes_per_sec $qos_lower_bw_limit $QOS_DEV_1 + run_test "bdev_qos_ro_bw" run_qos_test $qos_lower_bw_limit BANDWIDTH $QOS_DEV_1 + else + echo "Actual IOPS without limiting is too low - exit testing" + fi +} + +function qos_test_suite() { + # Run bdevperf with QoS disabled first + "$testdir/bdevperf/bdevperf" -z -m 0x2 -q 256 -o 4096 -w randread -t 60 & + QOS_PID=$! + echo "Process qos testing pid: $QOS_PID" + trap 'killprocess $QOS_PID; exit 1' SIGINT SIGTERM EXIT + waitforlisten $QOS_PID + + $rpc_py bdev_malloc_create -b $QOS_DEV_1 128 512 + waitforbdev $QOS_DEV_1 + $rpc_py bdev_null_create $QOS_DEV_2 128 512 + waitforbdev $QOS_DEV_2 + + $rootdir/test/bdev/bdevperf/bdevperf.py perform_tests & + qos_function_test + + $rpc_py bdev_malloc_delete $QOS_DEV_1 + $rpc_py bdev_null_delete $QOS_DEV_2 + killprocess $QOS_PID + trap - SIGINT SIGTERM EXIT +} + +# Inital bdev creation and configuration +#----------------------------------------------------- +QOS_DEV_1="Malloc_0" +QOS_DEV_2="Null_1" +QOS_RUN_TIME=5 + +if [ $(uname -s) = Linux ]; then + # Test dynamic memory management. All hugepages will be reserved at runtime + PRE_RESERVED_MEM=0 +else + # Dynamic memory management is not supported on BSD + PRE_RESERVED_MEM=2048 +fi + +test_type=${1:-bdev} +start_spdk_tgt +case "$test_type" in + bdev) + setup_bdev_conf + ;; + nvme) + setup_nvme_conf + ;; + gpt) + setup_gpt_conf + ;; + crypto_aesni) + setup_crypto_aesni_conf + ;; + crypto_qat) + setup_crypto_qat_conf + ;; + pmem) + setup_pmem_conf + ;; + rbd) + setup_rbd_conf + ;; + *) + echo "invalid test name" + exit 1 + ;; +esac + +# Generate json config and use it throughout all the tests +cat <<- CONF > "$conf_file" + {"subsystems":[ + $("$rpc_py" save_subsystem_config -n bdev) + ]} +CONF + +bdevs=$("$rpc_py" bdev_get_bdevs | jq -r '.[] | select(.claimed == false)') +bdevs_name=$(echo $bdevs | jq -r '.name') +bdev_list=($bdevs_name) +hello_world_bdev=${bdev_list[0]} +trap - SIGINT SIGTERM EXIT +killprocess "$spdk_tgt_pid" +# End bdev configuration +#----------------------------------------------------- + +run_test "bdev_hello_world" $SPDK_EXAMPLE_DIR/hello_bdev --json "$conf_file" -b "$hello_world_bdev" +run_test "bdev_bounds" bdev_bounds +run_test "bdev_nbd" nbd_function_test $conf_file "$bdevs_name" +if [[ $CONFIG_FIO_PLUGIN == y ]]; then + if [ "$test_type" = "nvme" ] || [ "$test_type" = "gpt" ]; then + # TODO: once we get real multi-ns drives, re-enable this test for NVMe. + echo "skipping fio tests on NVMe due to multi-ns failures." + else + run_test "bdev_fio" fio_test_suite + fi +else + echo "FIO not available" + exit 1 +fi + +run_test "bdev_verify" $testdir/bdevperf/bdevperf --json "$conf_file" -q 128 -o 4096 -w verify -t 5 -C -m 0x3 +run_test "bdev_write_zeroes" $testdir/bdevperf/bdevperf --json "$conf_file" -q 128 -o 4096 -w write_zeroes -t 1 + +if [[ $test_type == bdev ]]; then + run_test "bdev_qos" qos_test_suite +fi + +# Temporarily disabled - infinite loop +# if [ $RUN_NIGHTLY -eq 1 ]; then +# run_test "bdev_reset" $testdir/bdevperf/bdevperf --json "$conf_file" -q 16 -w reset -o 4096 -t 60 +# fi + +# Bdev and configuration cleanup below this line +#----------------------------------------------------- +if [ "$test_type" = "gpt" ]; then + "$rootdir/scripts/setup.sh" reset + sleep 1s + if [[ -b $gpt_nvme ]]; then + dd if=/dev/zero of="$gpt_nvme" bs=4096 count=8 oflag=direct + fi +fi + +cleanup diff --git a/src/spdk/test/bdev/nbd_common.sh b/src/spdk/test/bdev/nbd_common.sh new file mode 100644 index 000000000..2ea765649 --- /dev/null +++ b/src/spdk/test/bdev/nbd_common.sh @@ -0,0 +1,123 @@ +set -e + +function nbd_start_disks() { + local rpc_server=$1 + local bdev_list=($2) + local nbd_list=($3) + local i + + for ((i = 0; i < ${#nbd_list[@]}; i++)); do + $rootdir/scripts/rpc.py -s $rpc_server nbd_start_disk ${bdev_list[$i]} ${nbd_list[$i]} + # Wait for nbd device ready + waitfornbd $(basename ${nbd_list[$i]}) + done +} + +function nbd_start_disks_without_nbd_idx() { + local rpc_server=$1 + local bdev_list=($2) + local i + local nbd_device + + for ((i = 0; i < ${#bdev_list[@]}; i++)); do + nbd_device=$($rootdir/scripts/rpc.py -s $rpc_server nbd_start_disk ${bdev_list[$i]}) + # Wait for nbd device ready + waitfornbd $(basename ${nbd_device}) + done +} + +function waitfornbd_exit() { + local nbd_name=$1 + + for ((i = 1; i <= 20; i++)); do + if grep -q -w $nbd_name /proc/partitions; then + sleep 0.1 + else + break + fi + done + + return 0 +} + +function nbd_stop_disks() { + local rpc_server=$1 + local nbd_list=($2) + local i + + for i in "${nbd_list[@]}"; do + $rootdir/scripts/rpc.py -s $rpc_server nbd_stop_disk $i + waitfornbd_exit $(basename $i) + done +} + +function nbd_get_count() { + # return = count of spdk nbd devices + local rpc_server=$1 + + nbd_disks_json=$($rootdir/scripts/rpc.py -s $rpc_server nbd_get_disks) + nbd_disks_name=$(echo "${nbd_disks_json}" | jq -r '.[] | .nbd_device') + count=$(echo "${nbd_disks_name}" | grep -c /dev/nbd || true) + echo $count +} + +function nbd_dd_data_verify() { + local nbd_list=($1) + local operation=$2 + local tmp_file=$SPDK_TEST_STORAGE/nbdrandtest + + if [ "$operation" = "write" ]; then + # data write + dd if=/dev/urandom of=$tmp_file bs=4096 count=256 + for i in "${nbd_list[@]}"; do + dd if=$tmp_file of=$i bs=4096 count=256 oflag=direct + done + elif [ "$operation" = "verify" ]; then + # data read and verify + for i in "${nbd_list[@]}"; do + cmp -b -n 1M $tmp_file $i + done + rm $tmp_file + fi +} + +function nbd_rpc_data_verify() { + local rpc_server=$1 + local bdev_list=($2) + local nbd_list=($3) + + nbd_start_disks $rpc_server "${bdev_list[*]}" "${nbd_list[*]}" + count=$(nbd_get_count $rpc_server) + if [ $count -ne ${#nbd_list[@]} ]; then + return 1 + fi + + nbd_dd_data_verify "${nbd_list[*]}" "write" + nbd_dd_data_verify "${nbd_list[*]}" "verify" + + nbd_stop_disks $rpc_server "${nbd_list[*]}" + count=$(nbd_get_count $rpc_server) + if [ $count -ne 0 ]; then + return 1 + fi + + return 0 +} + +function nbd_rpc_start_stop_verify() { + local rpc_server=$1 + local bdev_list=($2) + + nbd_start_disks_without_nbd_idx $rpc_server "${bdev_list[*]}" + + nbd_disks_json=$($rootdir/scripts/rpc.py -s $rpc_server nbd_get_disks) + nbd_disks_name=($(echo "${nbd_disks_json}" | jq -r '.[] | .nbd_device')) + nbd_stop_disks $rpc_server "${nbd_disks_name[*]}" + + count=$(nbd_get_count $rpc_server) + if [ $count -ne 0 ]; then + return 1 + fi + + return 0 +} |