summaryrefslogtreecommitdiffstats
path: root/src/spdk/test/bdev
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/test/bdev
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/test/bdev')
-rw-r--r--src/spdk/test/bdev/Makefile44
-rwxr-xr-xsrc/spdk/test/bdev/bdev_raid.sh119
-rw-r--r--src/spdk/test/bdev/bdevio/.gitignore1
-rw-r--r--src/spdk/test/bdev/bdevio/Makefile48
-rw-r--r--src/spdk/test/bdev/bdevio/bdevio.c1433
-rwxr-xr-xsrc/spdk/test/bdev/bdevio/tests.py88
-rw-r--r--src/spdk/test/bdev/bdevperf/.gitignore1
-rw-r--r--src/spdk/test/bdev/bdevperf/Makefile55
-rw-r--r--src/spdk/test/bdev/bdevperf/bdevperf.c2137
-rwxr-xr-xsrc/spdk/test/bdev/bdevperf/bdevperf.py86
-rw-r--r--src/spdk/test/bdev/bdevperf/common.sh33
-rw-r--r--src/spdk/test/bdev/bdevperf/conf.json25
-rwxr-xr-xsrc/spdk/test/bdev/bdevperf/test_config.sh41
-rwxr-xr-xsrc/spdk/test/bdev/blockdev.sh408
-rw-r--r--src/spdk/test/bdev/nbd_common.sh123
15 files changed, 4642 insertions, 0 deletions
diff --git a/src/spdk/test/bdev/Makefile b/src/spdk/test/bdev/Makefile
new file mode 100644
index 000000000..cb15bd49a
--- /dev/null
+++ b/src/spdk/test/bdev/Makefile
@@ -0,0 +1,44 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+DIRS-y = bdevio bdevperf
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk
diff --git a/src/spdk/test/bdev/bdev_raid.sh b/src/spdk/test/bdev/bdev_raid.sh
new file mode 100755
index 000000000..c85d33f6e
--- /dev/null
+++ b/src/spdk/test/bdev/bdev_raid.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$(readlink -f $testdir/../..)
+rpc_server=/var/tmp/spdk-raid.sock
+rpc_py="$rootdir/scripts/rpc.py -s $rpc_server"
+tmp_file=$SPDK_TEST_STORAGE/raidrandtest
+
+source $rootdir/test/common/autotest_common.sh
+source $testdir/nbd_common.sh
+
+function raid_unmap_data_verify() {
+ if hash blkdiscard; then
+ local nbd=$1
+ local rpc_server=$2
+ local blksize
+ blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
+ local rw_blk_num=4096
+ local rw_len=$((blksize * rw_blk_num))
+ local unmap_blk_offs=(0 1028 321)
+ local unmap_blk_nums=(128 2035 456)
+ local unmap_off
+ local unmap_len
+
+ # data write
+ dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
+ dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
+ blockdev --flushbufs $nbd
+
+ # confirm random data is written correctly in raid0 device
+ cmp -b -n $rw_len $tmp_file $nbd
+
+ for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
+ unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
+ unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
+
+ # data unmap on tmp_file
+ dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
+
+ # data unmap on raid bdev
+ blkdiscard -o $unmap_off -l $unmap_len $nbd
+ blockdev --flushbufs $nbd
+
+ # data verify after unmap
+ cmp -b -n $rw_len $tmp_file $nbd
+ done
+ fi
+
+ return 0
+}
+
+function on_error_exit() {
+ if [ -n "$raid_pid" ]; then
+ killprocess $raid_pid
+ fi
+
+ rm -f $tmp_file
+ print_backtrace
+ exit 1
+}
+
+function configure_raid_bdev() {
+ rm -rf $testdir/rpcs.txt
+
+ cat <<- EOL >> $testdir/rpcs.txt
+ bdev_malloc_create 32 512 -b Base_1
+ bdev_malloc_create 32 512 -b Base_2
+ bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n raid0
+ EOL
+ $rpc_py < $testdir/rpcs.txt
+
+ rm -rf $testdir/rpcs.txt
+}
+
+function raid_function_test() {
+ if [ $(uname -s) = Linux ] && modprobe -n nbd; then
+ local nbd=/dev/nbd0
+ local raid_bdev
+
+ modprobe nbd
+ $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
+ raid_pid=$!
+ echo "Process raid pid: $raid_pid"
+ waitforlisten $raid_pid $rpc_server
+
+ configure_raid_bdev
+ raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1)
+ if [ $raid_bdev = "" ]; then
+ echo "No raid0 device in SPDK app"
+ return 1
+ fi
+
+ nbd_start_disks $rpc_server $raid_bdev $nbd
+ count=$(nbd_get_count $rpc_server)
+ if [ $count -ne 1 ]; then
+ return 1
+ fi
+
+ raid_unmap_data_verify $nbd $rpc_server
+
+ nbd_stop_disks $rpc_server $nbd
+ count=$(nbd_get_count $rpc_server)
+ if [ $count -ne 0 ]; then
+ return 1
+ fi
+
+ killprocess $raid_pid
+ else
+ echo "skipping bdev raid tests."
+ fi
+
+ return 0
+}
+
+trap 'on_error_exit;' ERR
+
+raid_function_test
+
+rm -f $tmp_file
diff --git a/src/spdk/test/bdev/bdevio/.gitignore b/src/spdk/test/bdev/bdevio/.gitignore
new file mode 100644
index 000000000..1bb55429d
--- /dev/null
+++ b/src/spdk/test/bdev/bdevio/.gitignore
@@ -0,0 +1 @@
+bdevio
diff --git a/src/spdk/test/bdev/bdevio/Makefile b/src/spdk/test/bdev/bdevio/Makefile
new file mode 100644
index 000000000..83aca58ca
--- /dev/null
+++ b/src/spdk/test/bdev/bdevio/Makefile
@@ -0,0 +1,48 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
+
+APP = bdevio
+
+C_SRCS := bdevio.c
+
+SPDK_LIB_LIST = $(ALL_MODULES_LIST)
+SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM)
+SPDK_LIB_LIST += app_rpc bdev bdev_rpc accel event trace log conf thread util rpc jsonrpc json sock notify
+
+LIBS += -lcunit
+
+include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
diff --git a/src/spdk/test/bdev/bdevio/bdevio.c b/src/spdk/test/bdev/bdevio/bdevio.c
new file mode 100644
index 000000000..54d1712e3
--- /dev/null
+++ b/src/spdk/test/bdev/bdevio/bdevio.c
@@ -0,0 +1,1433 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/accel_engine.h"
+#include "spdk/env.h"
+#include "spdk/log.h"
+#include "spdk/thread.h"
+#include "spdk/event.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+
+#include "CUnit/Basic.h"
+
+#define BUFFER_IOVS 1024
+#define BUFFER_SIZE 260 * 1024
+#define BDEV_TASK_ARRAY_SIZE 2048
+
+pthread_mutex_t g_test_mutex;
+pthread_cond_t g_test_cond;
+
+static struct spdk_thread *g_thread_init;
+static struct spdk_thread *g_thread_ut;
+static struct spdk_thread *g_thread_io;
+static bool g_wait_for_tests = false;
+static int g_num_failures = 0;
+
+struct io_target {
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *bdev_desc;
+ struct spdk_io_channel *ch;
+ struct io_target *next;
+};
+
+struct bdevio_request {
+ char *buf;
+ char *fused_buf;
+ int data_len;
+ uint64_t offset;
+ struct iovec iov[BUFFER_IOVS];
+ int iovcnt;
+ struct iovec fused_iov[BUFFER_IOVS];
+ int fused_iovcnt;
+ struct io_target *target;
+};
+
+struct io_target *g_io_targets = NULL;
+struct io_target *g_current_io_target = NULL;
+static void rpc_perform_tests_cb(unsigned num_failures, struct spdk_jsonrpc_request *request);
+
+static void
+execute_spdk_function(spdk_msg_fn fn, void *arg)
+{
+ pthread_mutex_lock(&g_test_mutex);
+ spdk_thread_send_msg(g_thread_io, fn, arg);
+ pthread_cond_wait(&g_test_cond, &g_test_mutex);
+ pthread_mutex_unlock(&g_test_mutex);
+}
+
+static void
+wake_ut_thread(void)
+{
+ pthread_mutex_lock(&g_test_mutex);
+ pthread_cond_signal(&g_test_cond);
+ pthread_mutex_unlock(&g_test_mutex);
+}
+
+static void
+__get_io_channel(void *arg)
+{
+ struct io_target *target = arg;
+
+ target->ch = spdk_bdev_get_io_channel(target->bdev_desc);
+ assert(target->ch);
+ wake_ut_thread();
+}
+
+static int
+bdevio_construct_target(struct spdk_bdev *bdev)
+{
+ struct io_target *target;
+ int rc;
+ uint64_t num_blocks = spdk_bdev_get_num_blocks(bdev);
+ uint32_t block_size = spdk_bdev_get_block_size(bdev);
+
+ target = malloc(sizeof(struct io_target));
+ if (target == NULL) {
+ return -ENOMEM;
+ }
+
+ rc = spdk_bdev_open(bdev, true, NULL, NULL, &target->bdev_desc);
+ if (rc != 0) {
+ free(target);
+ SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc);
+ return rc;
+ }
+
+ printf(" %s: %" PRIu64 " blocks of %" PRIu32 " bytes (%" PRIu64 " MiB)\n",
+ spdk_bdev_get_name(bdev),
+ num_blocks, block_size,
+ (num_blocks * block_size + 1024 * 1024 - 1) / (1024 * 1024));
+
+ target->bdev = bdev;
+ target->next = g_io_targets;
+ execute_spdk_function(__get_io_channel, target);
+ g_io_targets = target;
+
+ return 0;
+}
+
+static int
+bdevio_construct_targets(void)
+{
+ struct spdk_bdev *bdev;
+ int rc;
+
+ printf("I/O targets:\n");
+
+ bdev = spdk_bdev_first_leaf();
+ while (bdev != NULL) {
+ rc = bdevio_construct_target(bdev);
+ if (rc < 0) {
+ SPDK_ERRLOG("Could not construct bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc);
+ return rc;
+ }
+ bdev = spdk_bdev_next_leaf(bdev);
+ }
+
+ if (g_io_targets == NULL) {
+ SPDK_ERRLOG("No bdevs to perform tests on\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+__put_io_channel(void *arg)
+{
+ struct io_target *target = arg;
+
+ spdk_put_io_channel(target->ch);
+ wake_ut_thread();
+}
+
+static void
+bdevio_cleanup_targets(void)
+{
+ struct io_target *target;
+
+ target = g_io_targets;
+ while (target != NULL) {
+ execute_spdk_function(__put_io_channel, target);
+ spdk_bdev_close(target->bdev_desc);
+ g_io_targets = target->next;
+ free(target);
+ target = g_io_targets;
+ }
+}
+
+static bool g_completion_success;
+
+static void
+initialize_buffer(char **buf, int pattern, int size)
+{
+ *buf = spdk_zmalloc(size, 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ memset(*buf, pattern, size);
+}
+
+static void
+quick_test_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg)
+{
+ g_completion_success = success;
+ spdk_bdev_free_io(bdev_io);
+ wake_ut_thread();
+}
+
+static void
+__blockdev_write(void *arg)
+{
+ struct bdevio_request *req = arg;
+ struct io_target *target = req->target;
+ int rc;
+
+ if (req->iovcnt) {
+ rc = spdk_bdev_writev(target->bdev_desc, target->ch, req->iov, req->iovcnt, req->offset,
+ req->data_len, quick_test_complete, NULL);
+ } else {
+ rc = spdk_bdev_write(target->bdev_desc, target->ch, req->buf, req->offset,
+ req->data_len, quick_test_complete, NULL);
+ }
+
+ if (rc) {
+ g_completion_success = false;
+ wake_ut_thread();
+ }
+}
+
+static void
+__blockdev_write_zeroes(void *arg)
+{
+ struct bdevio_request *req = arg;
+ struct io_target *target = req->target;
+ int rc;
+
+ rc = spdk_bdev_write_zeroes(target->bdev_desc, target->ch, req->offset,
+ req->data_len, quick_test_complete, NULL);
+ if (rc) {
+ g_completion_success = false;
+ wake_ut_thread();
+ }
+}
+
+static void
+__blockdev_compare_and_write(void *arg)
+{
+ struct bdevio_request *req = arg;
+ struct io_target *target = req->target;
+ int rc;
+
+ rc = spdk_bdev_comparev_and_writev_blocks(target->bdev_desc, target->ch, req->iov, req->iovcnt,
+ req->fused_iov, req->fused_iovcnt, req->offset, req->data_len, quick_test_complete, NULL);
+
+ if (rc) {
+ g_completion_success = false;
+ wake_ut_thread();
+ }
+}
+
+static void
+sgl_chop_buffer(struct bdevio_request *req, int iov_len)
+{
+ int data_len = req->data_len;
+ char *buf = req->buf;
+
+ req->iovcnt = 0;
+ if (!iov_len) {
+ return;
+ }
+
+ for (; data_len > 0 && req->iovcnt < BUFFER_IOVS; req->iovcnt++) {
+ if (data_len < iov_len) {
+ iov_len = data_len;
+ }
+
+ req->iov[req->iovcnt].iov_base = buf;
+ req->iov[req->iovcnt].iov_len = iov_len;
+
+ buf += iov_len;
+ data_len -= iov_len;
+ }
+
+ CU_ASSERT_EQUAL_FATAL(data_len, 0);
+}
+
+static void
+sgl_chop_fused_buffer(struct bdevio_request *req, int iov_len)
+{
+ int data_len = req->data_len;
+ char *buf = req->fused_buf;
+
+ req->fused_iovcnt = 0;
+ if (!iov_len) {
+ return;
+ }
+
+ for (; data_len > 0 && req->fused_iovcnt < BUFFER_IOVS; req->fused_iovcnt++) {
+ if (data_len < iov_len) {
+ iov_len = data_len;
+ }
+
+ req->fused_iov[req->fused_iovcnt].iov_base = buf;
+ req->fused_iov[req->fused_iovcnt].iov_len = iov_len;
+
+ buf += iov_len;
+ data_len -= iov_len;
+ }
+
+ CU_ASSERT_EQUAL_FATAL(data_len, 0);
+}
+
+static void
+blockdev_write(struct io_target *target, char *tx_buf,
+ uint64_t offset, int data_len, int iov_len)
+{
+ struct bdevio_request req;
+
+ req.target = target;
+ req.buf = tx_buf;
+ req.data_len = data_len;
+ req.offset = offset;
+ sgl_chop_buffer(&req, iov_len);
+
+ g_completion_success = false;
+
+ execute_spdk_function(__blockdev_write, &req);
+}
+
+static void
+_blockdev_compare_and_write(struct io_target *target, char *cmp_buf, char *write_buf,
+ uint64_t offset, int data_len, int iov_len)
+{
+ struct bdevio_request req;
+
+ req.target = target;
+ req.buf = cmp_buf;
+ req.fused_buf = write_buf;
+ req.data_len = data_len;
+ req.offset = offset;
+ sgl_chop_buffer(&req, iov_len);
+ sgl_chop_fused_buffer(&req, iov_len);
+
+ g_completion_success = false;
+
+ execute_spdk_function(__blockdev_compare_and_write, &req);
+}
+
+static void
+blockdev_write_zeroes(struct io_target *target, char *tx_buf,
+ uint64_t offset, int data_len)
+{
+ struct bdevio_request req;
+
+ req.target = target;
+ req.buf = tx_buf;
+ req.data_len = data_len;
+ req.offset = offset;
+
+ g_completion_success = false;
+
+ execute_spdk_function(__blockdev_write_zeroes, &req);
+}
+
+static void
+__blockdev_read(void *arg)
+{
+ struct bdevio_request *req = arg;
+ struct io_target *target = req->target;
+ int rc;
+
+ if (req->iovcnt) {
+ rc = spdk_bdev_readv(target->bdev_desc, target->ch, req->iov, req->iovcnt, req->offset,
+ req->data_len, quick_test_complete, NULL);
+ } else {
+ rc = spdk_bdev_read(target->bdev_desc, target->ch, req->buf, req->offset,
+ req->data_len, quick_test_complete, NULL);
+ }
+
+ if (rc) {
+ g_completion_success = false;
+ wake_ut_thread();
+ }
+}
+
+static void
+blockdev_read(struct io_target *target, char *rx_buf,
+ uint64_t offset, int data_len, int iov_len)
+{
+ struct bdevio_request req;
+
+ req.target = target;
+ req.buf = rx_buf;
+ req.data_len = data_len;
+ req.offset = offset;
+ req.iovcnt = 0;
+ sgl_chop_buffer(&req, iov_len);
+
+ g_completion_success = false;
+
+ execute_spdk_function(__blockdev_read, &req);
+}
+
+static int
+blockdev_write_read_data_match(char *rx_buf, char *tx_buf, int data_length)
+{
+ int rc;
+ rc = memcmp(rx_buf, tx_buf, data_length);
+
+ spdk_free(rx_buf);
+ spdk_free(tx_buf);
+
+ return rc;
+}
+
+static bool
+blockdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t data_length)
+{
+ if (data_length < spdk_bdev_get_block_size(bdev) ||
+ data_length % spdk_bdev_get_block_size(bdev) ||
+ data_length / spdk_bdev_get_block_size(bdev) > spdk_bdev_get_num_blocks(bdev)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void
+blockdev_write_read(uint32_t data_length, uint32_t iov_len, int pattern, uint64_t offset,
+ int expected_rc, bool write_zeroes)
+{
+ struct io_target *target;
+ char *tx_buf = NULL;
+ char *rx_buf = NULL;
+ int rc;
+
+ target = g_current_io_target;
+
+ if (!blockdev_io_valid_blocks(target->bdev, data_length)) {
+ return;
+ }
+
+ if (!write_zeroes) {
+ initialize_buffer(&tx_buf, pattern, data_length);
+ initialize_buffer(&rx_buf, 0, data_length);
+
+ blockdev_write(target, tx_buf, offset, data_length, iov_len);
+ } else {
+ initialize_buffer(&tx_buf, 0, data_length);
+ initialize_buffer(&rx_buf, pattern, data_length);
+
+ blockdev_write_zeroes(target, tx_buf, offset, data_length);
+ }
+
+
+ if (expected_rc == 0) {
+ CU_ASSERT_EQUAL(g_completion_success, true);
+ } else {
+ CU_ASSERT_EQUAL(g_completion_success, false);
+ }
+ blockdev_read(target, rx_buf, offset, data_length, iov_len);
+
+ if (expected_rc == 0) {
+ CU_ASSERT_EQUAL(g_completion_success, true);
+ } else {
+ CU_ASSERT_EQUAL(g_completion_success, false);
+ }
+
+ if (g_completion_success) {
+ rc = blockdev_write_read_data_match(rx_buf, tx_buf, data_length);
+ /* Assert the write by comparing it with values read
+ * from each blockdev */
+ CU_ASSERT_EQUAL(rc, 0);
+ }
+}
+
+static void
+blockdev_compare_and_write(uint32_t data_length, uint32_t iov_len, uint64_t offset)
+{
+ struct io_target *target;
+ char *tx_buf = NULL;
+ char *write_buf = NULL;
+ char *rx_buf = NULL;
+ int rc;
+
+ target = g_current_io_target;
+
+ if (!blockdev_io_valid_blocks(target->bdev, data_length)) {
+ return;
+ }
+
+ initialize_buffer(&tx_buf, 0xAA, data_length);
+ initialize_buffer(&rx_buf, 0, data_length);
+ initialize_buffer(&write_buf, 0xBB, data_length);
+
+ blockdev_write(target, tx_buf, offset, data_length, iov_len);
+ CU_ASSERT_EQUAL(g_completion_success, true);
+
+ _blockdev_compare_and_write(target, tx_buf, write_buf, offset, data_length, iov_len);
+ CU_ASSERT_EQUAL(g_completion_success, true);
+
+ _blockdev_compare_and_write(target, tx_buf, write_buf, offset, data_length, iov_len);
+ CU_ASSERT_EQUAL(g_completion_success, false);
+
+ blockdev_read(target, rx_buf, offset, data_length, iov_len);
+ CU_ASSERT_EQUAL(g_completion_success, true);
+ rc = blockdev_write_read_data_match(rx_buf, write_buf, data_length);
+ /* Assert the write by comparing it with values read
+ * from each blockdev */
+ CU_ASSERT_EQUAL(rc, 0);
+}
+
+static void
+blockdev_write_read_4k(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 4K */
+ data_length = 4096;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_write_zeroes_read_4k(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 4K */
+ data_length = 4096;
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write_zeroes and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
+}
+
+/*
+ * This i/o will not have to split at the bdev layer.
+ */
+static void
+blockdev_write_zeroes_read_1m(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 1M */
+ data_length = 1048576;
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write_zeroes and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
+}
+
+/*
+ * This i/o will have to split at the bdev layer if
+ * write-zeroes is not supported by the bdev.
+ */
+static void
+blockdev_write_zeroes_read_3m(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 3M */
+ data_length = 3145728;
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write_zeroes and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
+}
+
+/*
+ * This i/o will have to split at the bdev layer if
+ * write-zeroes is not supported by the bdev. It also
+ * tests a write size that is not an even multiple of
+ * the bdev layer zero buffer size.
+ */
+static void
+blockdev_write_zeroes_read_3m_500k(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 3.5M */
+ data_length = 3670016;
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write_zeroes and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 1);
+}
+
+static void
+blockdev_writev_readv_4k(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 4K */
+ data_length = 4096;
+ iov_len = 4096;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_comparev_and_writev(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+
+ data_length = 1;
+ iov_len = 1;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 0;
+
+ blockdev_compare_and_write(data_length, iov_len, offset);
+}
+
+static void
+blockdev_writev_readv_30x4k(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 4K */
+ data_length = 4096 * 30;
+ iov_len = 4096;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_write_read_512Bytes(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 512 */
+ data_length = 512;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_writev_readv_512Bytes(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 512 */
+ data_length = 512;
+ iov_len = 512;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_write_read_size_gt_128k(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 132K */
+ data_length = 135168;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_writev_readv_size_gt_128k(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 132K */
+ data_length = 135168;
+ iov_len = 135168;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_writev_readv_size_gt_128k_two_iov(void)
+{
+ uint32_t data_length, iov_len;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 132K */
+ data_length = 135168;
+ iov_len = 128 * 1024;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+
+ blockdev_write_read(data_length, iov_len, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_write_read_invalid_size(void)
+{
+ uint32_t data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size is not a multiple of the block size */
+ data_length = 0x1015;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 8192;
+ pattern = 0xA3;
+ /* Params are invalid, hence the expected return value
+ * of write and read for all blockdevs is < 0 */
+ expected_rc = -1;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_write_read_offset_plus_nbytes_equals_bdev_size(void)
+{
+ struct io_target *target;
+ struct spdk_bdev *bdev;
+ char *tx_buf = NULL;
+ char *rx_buf = NULL;
+ uint64_t offset;
+ uint32_t block_size;
+ int rc;
+
+ target = g_current_io_target;
+ bdev = target->bdev;
+
+ block_size = spdk_bdev_get_block_size(bdev);
+
+ /* The start offset has been set to a marginal value
+ * such that offset + nbytes == Total size of
+ * blockdev. */
+ offset = ((spdk_bdev_get_num_blocks(bdev) - 1) * block_size);
+
+ initialize_buffer(&tx_buf, 0xA3, block_size);
+ initialize_buffer(&rx_buf, 0, block_size);
+
+ blockdev_write(target, tx_buf, offset, block_size, 0);
+ CU_ASSERT_EQUAL(g_completion_success, true);
+
+ blockdev_read(target, rx_buf, offset, block_size, 0);
+ CU_ASSERT_EQUAL(g_completion_success, true);
+
+ rc = blockdev_write_read_data_match(rx_buf, tx_buf, block_size);
+ /* Assert the write by comparing it with values read
+ * from each blockdev */
+ CU_ASSERT_EQUAL(rc, 0);
+}
+
+static void
+blockdev_write_read_offset_plus_nbytes_gt_bdev_size(void)
+{
+ struct io_target *target;
+ struct spdk_bdev *bdev;
+ char *tx_buf = NULL;
+ char *rx_buf = NULL;
+ int data_length;
+ uint64_t offset;
+ int pattern;
+
+ /* Tests the overflow condition of the blockdevs. */
+ data_length = 4096;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ pattern = 0xA3;
+
+ target = g_current_io_target;
+ bdev = target->bdev;
+
+ /* The start offset has been set to a valid value
+ * but offset + nbytes is greater than the Total size
+ * of the blockdev. The test should fail. */
+ offset = ((spdk_bdev_get_num_blocks(bdev) * spdk_bdev_get_block_size(bdev)) - 1024);
+
+ initialize_buffer(&tx_buf, pattern, data_length);
+ initialize_buffer(&rx_buf, 0, data_length);
+
+ blockdev_write(target, tx_buf, offset, data_length, 0);
+ CU_ASSERT_EQUAL(g_completion_success, false);
+
+ blockdev_read(target, rx_buf, offset, data_length, 0);
+ CU_ASSERT_EQUAL(g_completion_success, false);
+}
+
+static void
+blockdev_write_read_max_offset(void)
+{
+ int data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ data_length = 4096;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ /* The start offset has been set to UINT64_MAX such that
+ * adding nbytes wraps around and points to an invalid address. */
+ offset = UINT64_MAX;
+ pattern = 0xA3;
+ /* Params are invalid, hence the expected return value
+ * of write and read for all blockdevs is < 0 */
+ expected_rc = -1;
+
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+blockdev_overlapped_write_read_8k(void)
+{
+ int data_length;
+ uint64_t offset;
+ int pattern;
+ int expected_rc;
+
+ /* Data size = 8K */
+ data_length = 8192;
+ CU_ASSERT_TRUE(data_length < BUFFER_SIZE);
+ offset = 0;
+ pattern = 0xA3;
+ /* Params are valid, hence the expected return value
+ * of write and read for all blockdevs is 0. */
+ expected_rc = 0;
+ /* Assert the write by comparing it with values read
+ * from the same offset for each blockdev */
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+
+ /* Overwrite the pattern 0xbb of size 8K on an address offset overlapping
+ * with the address written above and assert the new value in
+ * the overlapped address range */
+ /* Populate 8k with value 0xBB */
+ pattern = 0xBB;
+ /* Offset = 6144; Overlap offset addresses and write value 0xbb */
+ offset = 4096;
+ /* Assert the write by comparing it with values read
+ * from the overlapped offset for each blockdev */
+ blockdev_write_read(data_length, 0, pattern, offset, expected_rc, 0);
+}
+
+static void
+__blockdev_reset(void *arg)
+{
+ struct bdevio_request *req = arg;
+ struct io_target *target = req->target;
+ int rc;
+
+ rc = spdk_bdev_reset(target->bdev_desc, target->ch, quick_test_complete, NULL);
+ if (rc < 0) {
+ g_completion_success = false;
+ wake_ut_thread();
+ }
+}
+
+static void
+blockdev_test_reset(void)
+{
+ struct bdevio_request req;
+ struct io_target *target;
+
+ target = g_current_io_target;
+ req.target = target;
+
+ g_completion_success = false;
+
+ execute_spdk_function(__blockdev_reset, &req);
+
+ /* Workaround: NVMe-oF target doesn't support reset yet - so for now
+ * don't fail the test if it's an NVMe bdev.
+ */
+ if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) {
+ CU_ASSERT_EQUAL(g_completion_success, true);
+ }
+}
+
+struct bdevio_passthrough_request {
+ struct spdk_nvme_cmd cmd;
+ void *buf;
+ uint32_t len;
+ struct io_target *target;
+ int sct;
+ int sc;
+ uint32_t cdw0;
+};
+
+static void
+nvme_pt_test_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg)
+{
+ struct bdevio_passthrough_request *pt_req = arg;
+
+ spdk_bdev_io_get_nvme_status(bdev_io, &pt_req->cdw0, &pt_req->sct, &pt_req->sc);
+ spdk_bdev_free_io(bdev_io);
+ wake_ut_thread();
+}
+
+static void
+__blockdev_nvme_passthru(void *arg)
+{
+ struct bdevio_passthrough_request *pt_req = arg;
+ struct io_target *target = pt_req->target;
+ int rc;
+
+ rc = spdk_bdev_nvme_io_passthru(target->bdev_desc, target->ch,
+ &pt_req->cmd, pt_req->buf, pt_req->len,
+ nvme_pt_test_complete, pt_req);
+ if (rc) {
+ wake_ut_thread();
+ }
+}
+
+static void
+blockdev_test_nvme_passthru_rw(void)
+{
+ struct bdevio_passthrough_request pt_req;
+ void *write_buf, *read_buf;
+ struct io_target *target;
+
+ target = g_current_io_target;
+
+ if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) {
+ return;
+ }
+
+ memset(&pt_req, 0, sizeof(pt_req));
+ pt_req.target = target;
+ pt_req.cmd.opc = SPDK_NVME_OPC_WRITE;
+ pt_req.cmd.nsid = 1;
+ *(uint64_t *)&pt_req.cmd.cdw10 = 4;
+ pt_req.cmd.cdw12 = 0;
+
+ pt_req.len = spdk_bdev_get_block_size(target->bdev);
+ write_buf = spdk_malloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ memset(write_buf, 0xA5, pt_req.len);
+ pt_req.buf = write_buf;
+
+ pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC;
+ pt_req.sc = SPDK_NVME_SC_INVALID_FIELD;
+ execute_spdk_function(__blockdev_nvme_passthru, &pt_req);
+ CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC);
+ CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS);
+
+ pt_req.cmd.opc = SPDK_NVME_OPC_READ;
+ read_buf = spdk_zmalloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ pt_req.buf = read_buf;
+
+ pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC;
+ pt_req.sc = SPDK_NVME_SC_INVALID_FIELD;
+ execute_spdk_function(__blockdev_nvme_passthru, &pt_req);
+ CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC);
+ CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS);
+
+ CU_ASSERT(!memcmp(read_buf, write_buf, pt_req.len));
+ spdk_free(read_buf);
+ spdk_free(write_buf);
+}
+
+static void
+blockdev_test_nvme_passthru_vendor_specific(void)
+{
+ struct bdevio_passthrough_request pt_req;
+ struct io_target *target;
+
+ target = g_current_io_target;
+
+ if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_IO)) {
+ return;
+ }
+
+ memset(&pt_req, 0, sizeof(pt_req));
+ pt_req.target = target;
+ pt_req.cmd.opc = 0x7F; /* choose known invalid opcode */
+ pt_req.cmd.nsid = 1;
+
+ pt_req.sct = SPDK_NVME_SCT_VENDOR_SPECIFIC;
+ pt_req.sc = SPDK_NVME_SC_SUCCESS;
+ pt_req.cdw0 = 0xbeef;
+ execute_spdk_function(__blockdev_nvme_passthru, &pt_req);
+ CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC);
+ CU_ASSERT(pt_req.sc == SPDK_NVME_SC_INVALID_OPCODE);
+ CU_ASSERT(pt_req.cdw0 == 0x0);
+}
+
+static void
+__blockdev_nvme_admin_passthru(void *arg)
+{
+ struct bdevio_passthrough_request *pt_req = arg;
+ struct io_target *target = pt_req->target;
+ int rc;
+
+ rc = spdk_bdev_nvme_admin_passthru(target->bdev_desc, target->ch,
+ &pt_req->cmd, pt_req->buf, pt_req->len,
+ nvme_pt_test_complete, pt_req);
+ if (rc) {
+ wake_ut_thread();
+ }
+}
+
+static void
+blockdev_test_nvme_admin_passthru(void)
+{
+ struct io_target *target;
+ struct bdevio_passthrough_request pt_req;
+
+ target = g_current_io_target;
+
+ if (!spdk_bdev_io_type_supported(target->bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) {
+ return;
+ }
+
+ memset(&pt_req, 0, sizeof(pt_req));
+ pt_req.target = target;
+ pt_req.cmd.opc = SPDK_NVME_OPC_IDENTIFY;
+ pt_req.cmd.nsid = 0;
+ *(uint64_t *)&pt_req.cmd.cdw10 = SPDK_NVME_IDENTIFY_CTRLR;
+
+ pt_req.len = sizeof(struct spdk_nvme_ctrlr_data);
+ pt_req.buf = spdk_malloc(pt_req.len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+
+ pt_req.sct = SPDK_NVME_SCT_GENERIC;
+ pt_req.sc = SPDK_NVME_SC_SUCCESS;
+ execute_spdk_function(__blockdev_nvme_admin_passthru, &pt_req);
+ CU_ASSERT(pt_req.sct == SPDK_NVME_SCT_GENERIC);
+ CU_ASSERT(pt_req.sc == SPDK_NVME_SC_SUCCESS);
+}
+
+static void
+__stop_init_thread(void *arg)
+{
+ unsigned num_failures = g_num_failures;
+ struct spdk_jsonrpc_request *request = arg;
+
+ g_num_failures = 0;
+
+ bdevio_cleanup_targets();
+ if (g_wait_for_tests) {
+ /* Do not stop the app yet, wait for another RPC */
+ rpc_perform_tests_cb(num_failures, request);
+ return;
+ }
+ spdk_app_stop(num_failures);
+}
+
+static void
+stop_init_thread(unsigned num_failures, struct spdk_jsonrpc_request *request)
+{
+ g_num_failures = num_failures;
+
+ spdk_thread_send_msg(g_thread_init, __stop_init_thread, request);
+}
+
+static int
+suite_init(void)
+{
+ if (g_current_io_target == NULL) {
+ g_current_io_target = g_io_targets;
+ }
+ return 0;
+}
+
+static int
+suite_fini(void)
+{
+ g_current_io_target = g_current_io_target->next;
+ return 0;
+}
+
+#define SUITE_NAME_MAX 64
+
+static int
+__setup_ut_on_single_target(struct io_target *target)
+{
+ unsigned rc = 0;
+ CU_pSuite suite = NULL;
+ char name[SUITE_NAME_MAX];
+
+ snprintf(name, sizeof(name), "bdevio tests on: %s", spdk_bdev_get_name(target->bdev));
+ suite = CU_add_suite(name, suite_init, suite_fini);
+ if (suite == NULL) {
+ CU_cleanup_registry();
+ rc = CU_get_error();
+ return -rc;
+ }
+
+ if (
+ CU_add_test(suite, "blockdev write read 4k", blockdev_write_read_4k) == NULL
+ || CU_add_test(suite, "blockdev write zeroes read 4k", blockdev_write_zeroes_read_4k) == NULL
+ || CU_add_test(suite, "blockdev write zeroes read 1m", blockdev_write_zeroes_read_1m) == NULL
+ || CU_add_test(suite, "blockdev write zeroes read 3m", blockdev_write_zeroes_read_3m) == NULL
+ || CU_add_test(suite, "blockdev write zeroes read 3.5m", blockdev_write_zeroes_read_3m_500k) == NULL
+ || CU_add_test(suite, "blockdev reset",
+ blockdev_test_reset) == NULL
+ || CU_add_test(suite, "blockdev write read 512 bytes",
+ blockdev_write_read_512Bytes) == NULL
+ || CU_add_test(suite, "blockdev write read size > 128k",
+ blockdev_write_read_size_gt_128k) == NULL
+ || CU_add_test(suite, "blockdev write read invalid size",
+ blockdev_write_read_invalid_size) == NULL
+ || CU_add_test(suite, "blockdev write read offset + nbytes == size of blockdev",
+ blockdev_write_read_offset_plus_nbytes_equals_bdev_size) == NULL
+ || CU_add_test(suite, "blockdev write read offset + nbytes > size of blockdev",
+ blockdev_write_read_offset_plus_nbytes_gt_bdev_size) == NULL
+ || CU_add_test(suite, "blockdev write read max offset",
+ blockdev_write_read_max_offset) == NULL
+ || CU_add_test(suite, "blockdev write read 8k on overlapped address offset",
+ blockdev_overlapped_write_read_8k) == NULL
+ || CU_add_test(suite, "blockdev writev readv 4k", blockdev_writev_readv_4k) == NULL
+ || CU_add_test(suite, "blockdev writev readv 30 x 4k",
+ blockdev_writev_readv_30x4k) == NULL
+ || CU_add_test(suite, "blockdev writev readv 512 bytes",
+ blockdev_writev_readv_512Bytes) == NULL
+ || CU_add_test(suite, "blockdev writev readv size > 128k",
+ blockdev_writev_readv_size_gt_128k) == NULL
+ || CU_add_test(suite, "blockdev writev readv size > 128k in two iovs",
+ blockdev_writev_readv_size_gt_128k_two_iov) == NULL
+ || CU_add_test(suite, "blockdev comparev and writev", blockdev_comparev_and_writev) == NULL
+ || CU_add_test(suite, "blockdev nvme passthru rw",
+ blockdev_test_nvme_passthru_rw) == NULL
+ || CU_add_test(suite, "blockdev nvme passthru vendor specific",
+ blockdev_test_nvme_passthru_vendor_specific) == NULL
+ || CU_add_test(suite, "blockdev nvme admin passthru",
+ blockdev_test_nvme_admin_passthru) == NULL
+ ) {
+ CU_cleanup_registry();
+ rc = CU_get_error();
+ return -rc;
+ }
+ return 0;
+}
+
+static void
+__run_ut_thread(void *arg)
+{
+ struct spdk_jsonrpc_request *request = arg;
+ int rc = 0;
+ struct io_target *target;
+ unsigned num_failures;
+
+ if (CU_initialize_registry() != CUE_SUCCESS) {
+ /* CUnit error, probably won't recover */
+ rc = CU_get_error();
+ stop_init_thread(-rc, request);
+ }
+
+ target = g_io_targets;
+ while (target != NULL) {
+ rc = __setup_ut_on_single_target(target);
+ if (rc < 0) {
+ /* CUnit error, probably won't recover */
+ stop_init_thread(-rc, request);
+ }
+ target = target->next;
+ }
+ CU_basic_set_mode(CU_BRM_VERBOSE);
+ CU_basic_run_tests();
+ num_failures = CU_get_number_of_failures();
+ CU_cleanup_registry();
+
+ stop_init_thread(num_failures, request);
+}
+
+static void
+__construct_targets(void *arg)
+{
+ if (bdevio_construct_targets() < 0) {
+ spdk_app_stop(-1);
+ return;
+ }
+
+ spdk_thread_send_msg(g_thread_ut, __run_ut_thread, NULL);
+}
+
+static void
+test_main(void *arg1)
+{
+ struct spdk_cpuset tmpmask = {}, *appmask;
+ uint32_t cpu, init_cpu;
+
+ pthread_mutex_init(&g_test_mutex, NULL);
+ pthread_cond_init(&g_test_cond, NULL);
+
+ appmask = spdk_app_get_core_mask();
+
+ if (spdk_cpuset_count(appmask) < 3) {
+ spdk_app_stop(-1);
+ return;
+ }
+
+ init_cpu = spdk_env_get_current_core();
+ g_thread_init = spdk_get_thread();
+
+ for (cpu = 0; cpu < SPDK_ENV_LCORE_ID_ANY; cpu++) {
+ if (cpu != init_cpu && spdk_cpuset_get_cpu(appmask, cpu)) {
+ spdk_cpuset_zero(&tmpmask);
+ spdk_cpuset_set_cpu(&tmpmask, cpu, true);
+ g_thread_ut = spdk_thread_create("ut_thread", &tmpmask);
+ break;
+ }
+ }
+
+ if (cpu == SPDK_ENV_LCORE_ID_ANY) {
+ spdk_app_stop(-1);
+ return;
+ }
+
+ for (cpu++; cpu < SPDK_ENV_LCORE_ID_ANY; cpu++) {
+ if (cpu != init_cpu && spdk_cpuset_get_cpu(appmask, cpu)) {
+ spdk_cpuset_zero(&tmpmask);
+ spdk_cpuset_set_cpu(&tmpmask, cpu, true);
+ g_thread_io = spdk_thread_create("io_thread", &tmpmask);
+ break;
+ }
+ }
+
+ if (cpu == SPDK_ENV_LCORE_ID_ANY) {
+ spdk_app_stop(-1);
+ return;
+ }
+
+ if (g_wait_for_tests) {
+ /* Do not perform any tests until RPC is received */
+ return;
+ }
+
+ spdk_thread_send_msg(g_thread_init, __construct_targets, NULL);
+}
+
+static void
+bdevio_usage(void)
+{
+ printf(" -w start bdevio app and wait for RPC to start the tests\n");
+}
+
+static int
+bdevio_parse_arg(int ch, char *arg)
+{
+ switch (ch) {
+ case 'w':
+ g_wait_for_tests = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+struct rpc_perform_tests {
+ char *name;
+};
+
+static void
+free_rpc_perform_tests(struct rpc_perform_tests *r)
+{
+ free(r->name);
+}
+
+static const struct spdk_json_object_decoder rpc_perform_tests_decoders[] = {
+ {"name", offsetof(struct rpc_perform_tests, name), spdk_json_decode_string, true},
+};
+
+static void
+rpc_perform_tests_cb(unsigned num_failures, struct spdk_jsonrpc_request *request)
+{
+ struct spdk_json_write_ctx *w;
+
+ if (num_failures == 0) {
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_uint32(w, num_failures);
+ spdk_jsonrpc_end_result(request, w);
+ } else {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "%d test cases failed", num_failures);
+ }
+}
+
+static void
+rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ struct rpc_perform_tests req = {NULL};
+ struct spdk_bdev *bdev;
+ int rc;
+
+ if (params && spdk_json_decode_object(params, rpc_perform_tests_decoders,
+ SPDK_COUNTOF(rpc_perform_tests_decoders),
+ &req)) {
+ SPDK_ERRLOG("spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
+ goto invalid;
+ }
+
+ if (req.name) {
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ SPDK_ERRLOG("Bdev '%s' does not exist\n", req.name);
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Bdev '%s' does not exist: %s",
+ req.name, spdk_strerror(ENODEV));
+ goto invalid;
+ }
+ rc = bdevio_construct_target(bdev);
+ if (rc < 0) {
+ SPDK_ERRLOG("Could not construct target for bdev '%s'\n", spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Could not construct target for bdev '%s': %s",
+ spdk_bdev_get_name(bdev), spdk_strerror(-rc));
+ goto invalid;
+ }
+ } else {
+ rc = bdevio_construct_targets();
+ if (rc < 0) {
+ SPDK_ERRLOG("Could not construct targets for all bdevs\n");
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "Could not construct targets for all bdevs: %s",
+ spdk_strerror(-rc));
+ goto invalid;
+ }
+ }
+ free_rpc_perform_tests(&req);
+
+ spdk_thread_send_msg(g_thread_ut, __run_ut_thread, request);
+
+ return;
+
+invalid:
+ free_rpc_perform_tests(&req);
+}
+SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME)
+
+int
+main(int argc, char **argv)
+{
+ int rc;
+ struct spdk_app_opts opts = {};
+
+ spdk_app_opts_init(&opts);
+ opts.name = "bdevio";
+ opts.reactor_mask = "0x7";
+
+ if ((rc = spdk_app_parse_args(argc, argv, &opts, "w", NULL,
+ bdevio_parse_arg, bdevio_usage)) !=
+ SPDK_APP_PARSE_ARGS_SUCCESS) {
+ return rc;
+ }
+
+ rc = spdk_app_start(&opts, test_main, NULL);
+ spdk_app_fini();
+
+ return rc;
+}
diff --git a/src/spdk/test/bdev/bdevio/tests.py b/src/spdk/test/bdev/bdevio/tests.py
new file mode 100755
index 000000000..8b46061d0
--- /dev/null
+++ b/src/spdk/test/bdev/bdevio/tests.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+import logging
+import argparse
+import sys
+import shlex
+
+try:
+ from rpc.client import print_dict, JSONRPCException
+ import rpc
+except ImportError:
+ print("SPDK RPC library missing. Please add spdk/scripts/ directory to PYTHONPATH:")
+ print("'export PYTHONPATH=$PYTHONPATH:./spdk/scripts/'")
+ exit(1)
+
+try:
+ from shlex import quote
+except ImportError:
+ from pipes import quote
+
+
+def print_array(a):
+ print(" ".join((quote(v) for v in a)))
+
+
+def perform_tests_func(client, name=None):
+ """
+
+ Args:
+ name: bdev name to perform bdevio tests on (optional; if omitted, test all bdevs)
+
+ Returns:
+ Number of failures in tests. 0 means no errors found.
+ """
+ params = {}
+ if name:
+ params['name'] = name
+ return client.call('perform_tests', params)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='SPDK RPC command line interface. NOTE: spdk/scripts/ is expected in PYTHONPATH')
+ parser.add_argument('-s', dest='server_addr',
+ help='RPC domain socket path or IP address', default='/var/tmp/spdk.sock')
+ parser.add_argument('-p', dest='port',
+ help='RPC port number (if server_addr is IP address)',
+ default=5260, type=int)
+ parser.add_argument('-t', dest='timeout',
+ help='Timeout as a floating point number expressed in seconds waiting for response. Default: 60.0',
+ default=60.0, type=float)
+ parser.add_argument('-v', dest='verbose', action='store_const', const="INFO",
+ help='Set verbose mode to INFO', default="ERROR")
+ parser.add_argument('--verbose', dest='verbose', choices=['DEBUG', 'INFO', 'ERROR'],
+ help="""Set verbose level. """)
+ subparsers = parser.add_subparsers(help='RPC methods')
+
+ def perform_tests(args):
+ print_dict(perform_tests_func(args.client, name=args.name))
+
+ p = subparsers.add_parser('perform_tests', help='Perform all bdevio tests on select bdev')
+ p.add_argument('-b', '--name', help="Name of the Blockdev. Example: Nvme0n1")
+ p.set_defaults(func=perform_tests)
+
+ def call_rpc_func(args):
+ try:
+ args.func(args)
+ except JSONRPCException as ex:
+ print(ex.message)
+ exit(1)
+
+ def execute_script(parser, client, fd):
+ for rpc_call in map(str.rstrip, fd):
+ if not rpc_call.strip():
+ continue
+ args = parser.parse_args(shlex.split(rpc_call))
+ args.client = client
+ call_rpc_func(args)
+
+ args = parser.parse_args()
+ args.client = rpc.client.JSONRPCClient(args.server_addr, args.port, args.timeout, log_level=getattr(logging, args.verbose.upper()))
+ if hasattr(args, 'func'):
+ call_rpc_func(args)
+ elif sys.stdin.isatty():
+ # No arguments and no data piped through stdin
+ parser.print_help()
+ exit(1)
+ else:
+ execute_script(parser, args.client, sys.stdin)
diff --git a/src/spdk/test/bdev/bdevperf/.gitignore b/src/spdk/test/bdev/bdevperf/.gitignore
new file mode 100644
index 000000000..e14ddd841
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/.gitignore
@@ -0,0 +1 @@
+bdevperf
diff --git a/src/spdk/test/bdev/bdevperf/Makefile b/src/spdk/test/bdev/bdevperf/Makefile
new file mode 100644
index 000000000..689d7fe10
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/Makefile
@@ -0,0 +1,55 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
+
+APP = bdevperf
+
+C_SRCS := bdevperf.c
+
+SPDK_LIB_LIST = $(ALL_MODULES_LIST)
+SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM)
+SPDK_LIB_LIST += bdev accel event trace log conf thread util sock notify
+SPDK_LIB_LIST += rpc jsonrpc json app_rpc log_rpc bdev_rpc
+
+ifeq ($(OS),Linux)
+SPDK_LIB_LIST += event_nbd nbd
+endif
+
+ifeq ($(SPDK_ROOT_DIR)/lib/env_dpdk,$(CONFIG_ENV))
+SPDK_LIB_LIST += env_dpdk_rpc
+endif
+
+include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
diff --git a/src/spdk/test/bdev/bdevperf/bdevperf.c b/src/spdk/test/bdev/bdevperf/bdevperf.c
new file mode 100644
index 000000000..adcdf31cb
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/bdevperf.c
@@ -0,0 +1,2137 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+#include "spdk/accel_engine.h"
+#include "spdk/endian.h"
+#include "spdk/env.h"
+#include "spdk/event.h"
+#include "spdk/log.h"
+#include "spdk/util.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/rpc.h"
+#include "spdk/bit_array.h"
+#include "spdk/conf.h"
+
+#define BDEVPERF_CONFIG_MAX_FILENAME 1024
+#define BDEVPERF_CONFIG_UNDEFINED -1
+#define BDEVPERF_CONFIG_ERROR -2
+
+struct bdevperf_task {
+ struct iovec iov;
+ struct bdevperf_job *job;
+ struct spdk_bdev_io *bdev_io;
+ void *buf;
+ void *md_buf;
+ uint64_t offset_blocks;
+ struct bdevperf_task *task_to_abort;
+ enum spdk_bdev_io_type io_type;
+ TAILQ_ENTRY(bdevperf_task) link;
+ struct spdk_bdev_io_wait_entry bdev_io_wait;
+};
+
+static const char *g_workload_type = NULL;
+static int g_io_size = 0;
+/* initialize to invalid value so we can detect if user overrides it. */
+static int g_rw_percentage = -1;
+static bool g_verify = false;
+static bool g_reset = false;
+static bool g_continue_on_failure = false;
+static bool g_abort = false;
+static int g_queue_depth = 0;
+static uint64_t g_time_in_usec;
+static int g_show_performance_real_time = 0;
+static uint64_t g_show_performance_period_in_usec = 1000000;
+static uint64_t g_show_performance_period_num = 0;
+static uint64_t g_show_performance_ema_period = 0;
+static int g_run_rc = 0;
+static bool g_shutdown = false;
+static uint64_t g_shutdown_tsc;
+static bool g_zcopy = true;
+static struct spdk_thread *g_master_thread;
+static int g_time_in_sec = 0;
+static bool g_mix_specified = false;
+static const char *g_job_bdev_name;
+static bool g_wait_for_tests = false;
+static struct spdk_jsonrpc_request *g_request = NULL;
+static bool g_multithread_mode = false;
+static int g_timeout_in_sec;
+static struct spdk_conf *g_bdevperf_conf = NULL;
+static const char *g_bdevperf_conf_file = NULL;
+
+static struct spdk_cpuset g_all_cpuset;
+static struct spdk_poller *g_perf_timer = NULL;
+
+static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task);
+static void rpc_perform_tests_cb(void);
+
+struct bdevperf_job {
+ char *name;
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *bdev_desc;
+ struct spdk_io_channel *ch;
+ TAILQ_ENTRY(bdevperf_job) link;
+ struct spdk_thread *thread;
+
+ const char *workload_type;
+ int io_size;
+ int rw_percentage;
+ bool is_random;
+ bool verify;
+ bool reset;
+ bool continue_on_failure;
+ bool unmap;
+ bool write_zeroes;
+ bool flush;
+ bool abort;
+ int queue_depth;
+
+ uint64_t io_completed;
+ uint64_t io_failed;
+ uint64_t io_timeout;
+ uint64_t prev_io_completed;
+ double ema_io_per_second;
+ int current_queue_depth;
+ uint64_t size_in_ios;
+ uint64_t ios_base;
+ uint64_t offset_in_ios;
+ uint64_t io_size_blocks;
+ uint64_t buf_size;
+ uint32_t dif_check_flags;
+ bool is_draining;
+ struct spdk_poller *run_timer;
+ struct spdk_poller *reset_timer;
+ struct spdk_bit_array *outstanding;
+ TAILQ_HEAD(, bdevperf_task) task_list;
+};
+
+struct spdk_bdevperf {
+ TAILQ_HEAD(, bdevperf_job) jobs;
+ uint32_t running_jobs;
+};
+
+static struct spdk_bdevperf g_bdevperf = {
+ .jobs = TAILQ_HEAD_INITIALIZER(g_bdevperf.jobs),
+ .running_jobs = 0,
+};
+
+enum job_config_rw {
+ JOB_CONFIG_RW_READ = 0,
+ JOB_CONFIG_RW_WRITE,
+ JOB_CONFIG_RW_RANDREAD,
+ JOB_CONFIG_RW_RANDWRITE,
+ JOB_CONFIG_RW_RW,
+ JOB_CONFIG_RW_RANDRW,
+ JOB_CONFIG_RW_VERIFY,
+ JOB_CONFIG_RW_RESET,
+ JOB_CONFIG_RW_UNMAP,
+ JOB_CONFIG_RW_FLUSH,
+ JOB_CONFIG_RW_WRITE_ZEROES,
+};
+
+/* Storing values from a section of job config file */
+struct job_config {
+ const char *name;
+ const char *filename;
+ struct spdk_cpuset cpumask;
+ int bs;
+ int iodepth;
+ int rwmixread;
+ int offset;
+ int length;
+ enum job_config_rw rw;
+ TAILQ_ENTRY(job_config) link;
+};
+
+TAILQ_HEAD(, job_config) job_config_list
+ = TAILQ_HEAD_INITIALIZER(job_config_list);
+
+static bool g_performance_dump_active = false;
+
+struct bdevperf_aggregate_stats {
+ struct bdevperf_job *current_job;
+ uint64_t io_time_in_usec;
+ uint64_t ema_period;
+ double total_io_per_second;
+ double total_mb_per_second;
+ double total_failed_per_second;
+ double total_timeout_per_second;
+};
+
+static struct bdevperf_aggregate_stats g_stats = {};
+
+/*
+ * Cumulative Moving Average (CMA): average of all data up to current
+ * Exponential Moving Average (EMA): weighted mean of the previous n data and more weight is given to recent
+ * Simple Moving Average (SMA): unweighted mean of the previous n data
+ *
+ * Bdevperf supports CMA and EMA.
+ */
+static double
+get_cma_io_per_second(struct bdevperf_job *job, uint64_t io_time_in_usec)
+{
+ return (double)job->io_completed * 1000000 / io_time_in_usec;
+}
+
+static double
+get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period)
+{
+ double io_completed, io_per_second;
+
+ io_completed = job->io_completed;
+ io_per_second = (double)(io_completed - job->prev_io_completed) * 1000000
+ / g_show_performance_period_in_usec;
+ job->prev_io_completed = io_completed;
+
+ job->ema_io_per_second += (io_per_second - job->ema_io_per_second) * 2
+ / (ema_period + 1);
+ return job->ema_io_per_second;
+}
+
+static void
+performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job)
+{
+ double io_per_second, mb_per_second, failed_per_second, timeout_per_second;
+
+ printf("\r Thread name: %s\n", spdk_thread_get_name(job->thread));
+ printf("\r Core Mask: 0x%s\n", spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));
+
+ if (stats->ema_period == 0) {
+ io_per_second = get_cma_io_per_second(job, stats->io_time_in_usec);
+ } else {
+ io_per_second = get_ema_io_per_second(job, stats->ema_period);
+ }
+ mb_per_second = io_per_second * job->io_size / (1024 * 1024);
+ failed_per_second = (double)job->io_failed * 1000000 / stats->io_time_in_usec;
+ timeout_per_second = (double)job->io_timeout * 1000000 / stats->io_time_in_usec;
+
+ printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n",
+ job->name, io_per_second, mb_per_second);
+ if (failed_per_second != 0) {
+ printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n",
+ "", failed_per_second, timeout_per_second);
+ }
+ stats->total_io_per_second += io_per_second;
+ stats->total_mb_per_second += mb_per_second;
+ stats->total_failed_per_second += failed_per_second;
+ stats->total_timeout_per_second += timeout_per_second;
+}
+
+static void
+generate_data(void *buf, int buf_len, int block_size, void *md_buf, int md_size,
+ int num_blocks, int seed)
+{
+ int offset_blocks = 0, md_offset, data_block_size;
+
+ if (buf_len < num_blocks * block_size) {
+ return;
+ }
+
+ if (md_buf == NULL) {
+ data_block_size = block_size - md_size;
+ md_buf = (char *)buf + data_block_size;
+ md_offset = block_size;
+ } else {
+ data_block_size = block_size;
+ md_offset = md_size;
+ }
+
+ while (offset_blocks < num_blocks) {
+ memset(buf, seed, data_block_size);
+ memset(md_buf, seed, md_size);
+ buf += block_size;
+ md_buf += md_offset;
+ offset_blocks++;
+ }
+}
+
+static bool
+copy_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size,
+ void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks)
+{
+ if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) {
+ return false;
+ }
+
+ assert((wr_md_buf != NULL) == (rd_md_buf != NULL));
+
+ memcpy(wr_buf, rd_buf, block_size * num_blocks);
+
+ if (wr_md_buf != NULL) {
+ memcpy(wr_md_buf, rd_md_buf, md_size * num_blocks);
+ }
+
+ return true;
+}
+
+static bool
+verify_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size,
+ void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks, bool md_check)
+{
+ int offset_blocks = 0, md_offset, data_block_size;
+
+ if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) {
+ return false;
+ }
+
+ assert((wr_md_buf != NULL) == (rd_md_buf != NULL));
+
+ if (wr_md_buf == NULL) {
+ data_block_size = block_size - md_size;
+ wr_md_buf = (char *)wr_buf + data_block_size;
+ rd_md_buf = (char *)rd_buf + data_block_size;
+ md_offset = block_size;
+ } else {
+ data_block_size = block_size;
+ md_offset = md_size;
+ }
+
+ while (offset_blocks < num_blocks) {
+ if (memcmp(wr_buf, rd_buf, data_block_size) != 0) {
+ return false;
+ }
+
+ wr_buf += block_size;
+ rd_buf += block_size;
+
+ if (md_check) {
+ if (memcmp(wr_md_buf, rd_md_buf, md_size) != 0) {
+ return false;
+ }
+
+ wr_md_buf += md_offset;
+ rd_md_buf += md_offset;
+ }
+
+ offset_blocks++;
+ }
+
+ return true;
+}
+
+static void
+free_job_config(void)
+{
+ struct job_config *config, *tmp;
+
+ spdk_conf_free(g_bdevperf_conf);
+ g_bdevperf_conf = NULL;
+
+ TAILQ_FOREACH_SAFE(config, &job_config_list, link, tmp) {
+ TAILQ_REMOVE(&job_config_list, config, link);
+ free(config);
+ }
+}
+
+static void
+bdevperf_test_done(void *ctx)
+{
+ struct bdevperf_job *job, *jtmp;
+ struct bdevperf_task *task, *ttmp;
+
+ if (g_time_in_usec && !g_run_rc) {
+ g_stats.io_time_in_usec = g_time_in_usec;
+
+ if (g_performance_dump_active) {
+ spdk_thread_send_msg(spdk_get_thread(), bdevperf_test_done, NULL);
+ return;
+ }
+ } else {
+ printf("Job run time less than one microsecond, no performance data will be shown\n");
+ }
+
+ if (g_show_performance_real_time) {
+ spdk_poller_unregister(&g_perf_timer);
+ }
+
+ if (g_shutdown) {
+ g_time_in_usec = g_shutdown_tsc * 1000000 / spdk_get_ticks_hz();
+ printf("Received shutdown signal, test time was about %.6f seconds\n",
+ (double)g_time_in_usec / 1000000);
+ }
+
+ TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
+ TAILQ_REMOVE(&g_bdevperf.jobs, job, link);
+
+ performance_dump_job(&g_stats, job);
+
+ TAILQ_FOREACH_SAFE(task, &job->task_list, link, ttmp) {
+ TAILQ_REMOVE(&job->task_list, task, link);
+ spdk_free(task->buf);
+ spdk_free(task->md_buf);
+ free(task);
+ }
+
+ if (job->verify) {
+ spdk_bit_array_free(&job->outstanding);
+ }
+
+ free(job->name);
+ free(job);
+ }
+
+ printf("\r =====================================================\n");
+ printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n",
+ "Total", g_stats.total_io_per_second, g_stats.total_mb_per_second);
+ if (g_stats.total_failed_per_second != 0 || g_stats.total_timeout_per_second != 0) {
+ printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n",
+ "", g_stats.total_failed_per_second, g_stats.total_timeout_per_second);
+ }
+ fflush(stdout);
+
+ if (g_request && !g_shutdown) {
+ rpc_perform_tests_cb();
+ } else {
+ spdk_app_stop(g_run_rc);
+ }
+}
+
+static void
+bdevperf_job_end(void *ctx)
+{
+ assert(g_master_thread == spdk_get_thread());
+
+ if (--g_bdevperf.running_jobs == 0) {
+ bdevperf_test_done(NULL);
+ }
+}
+
+static void
+bdevperf_queue_io_wait_with_cb(struct bdevperf_task *task, spdk_bdev_io_wait_cb cb_fn)
+{
+ struct bdevperf_job *job = task->job;
+
+ task->bdev_io_wait.bdev = job->bdev;
+ task->bdev_io_wait.cb_fn = cb_fn;
+ task->bdev_io_wait.cb_arg = task;
+ spdk_bdev_queue_io_wait(job->bdev, job->ch, &task->bdev_io_wait);
+}
+
+static int
+bdevperf_job_drain(void *ctx)
+{
+ struct bdevperf_job *job = ctx;
+
+ spdk_poller_unregister(&job->run_timer);
+ if (job->reset) {
+ spdk_poller_unregister(&job->reset_timer);
+ }
+
+ job->is_draining = true;
+
+ return -1;
+}
+
+static void
+bdevperf_abort_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct bdevperf_task *task = cb_arg;
+ struct bdevperf_job *job = task->job;
+
+ job->current_queue_depth--;
+
+ if (success) {
+ job->io_completed++;
+ } else {
+ job->io_failed++;
+ if (!job->continue_on_failure) {
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ }
+ }
+
+ spdk_bdev_free_io(bdev_io);
+
+ /* Return task to free list because abort is submitted on demand. */
+ TAILQ_INSERT_TAIL(&job->task_list, task, link);
+
+ if (job->is_draining) {
+ if (job->current_queue_depth == 0) {
+ spdk_put_io_channel(job->ch);
+ spdk_bdev_close(job->bdev_desc);
+ spdk_thread_send_msg(g_master_thread, bdevperf_job_end, NULL);
+ }
+ }
+}
+
+static void
+bdevperf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct bdevperf_job *job;
+ struct bdevperf_task *task = cb_arg;
+ struct iovec *iovs;
+ int iovcnt;
+ bool md_check;
+ uint64_t offset_in_ios;
+
+ job = task->job;
+ md_check = spdk_bdev_get_dif_type(job->bdev) == SPDK_DIF_DISABLE;
+
+ if (!success) {
+ if (!job->reset && !job->continue_on_failure) {
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ printf("task offset: %lu on job bdev=%s fails\n",
+ task->offset_blocks, job->name);
+ }
+ } else if (job->verify || job->reset) {
+ spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt);
+ assert(iovcnt == 1);
+ assert(iovs != NULL);
+ if (!verify_data(task->buf, job->buf_size, iovs[0].iov_base, iovs[0].iov_len,
+ spdk_bdev_get_block_size(job->bdev),
+ task->md_buf, spdk_bdev_io_get_md_buf(bdev_io),
+ spdk_bdev_get_md_size(job->bdev),
+ job->io_size_blocks, md_check)) {
+ printf("Buffer mismatch! Target: %s Disk Offset: %lu\n", job->name, task->offset_blocks);
+ printf(" First dword expected 0x%x got 0x%x\n", *(int *)task->buf, *(int *)iovs[0].iov_base);
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ }
+ }
+
+ job->current_queue_depth--;
+
+ if (success) {
+ job->io_completed++;
+ } else {
+ job->io_failed++;
+ }
+
+ if (job->verify) {
+ assert(task->offset_blocks / job->io_size_blocks >= job->ios_base);
+ offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base;
+
+ assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true);
+ spdk_bit_array_clear(job->outstanding, offset_in_ios);
+ }
+
+ spdk_bdev_free_io(bdev_io);
+
+ /*
+ * is_draining indicates when time has expired for the test run
+ * and we are just waiting for the previously submitted I/O
+ * to complete. In this case, do not submit a new I/O to replace
+ * the one just completed.
+ */
+ if (!job->is_draining) {
+ bdevperf_submit_single(job, task);
+ } else {
+ TAILQ_INSERT_TAIL(&job->task_list, task, link);
+ if (job->current_queue_depth == 0) {
+ spdk_put_io_channel(job->ch);
+ spdk_bdev_close(job->bdev_desc);
+ spdk_thread_send_msg(g_master_thread, bdevperf_job_end, NULL);
+ }
+ }
+}
+
+static void
+bdevperf_verify_submit_read(void *cb_arg)
+{
+ struct bdevperf_job *job;
+ struct bdevperf_task *task = cb_arg;
+ int rc;
+
+ job = task->job;
+
+ /* Read the data back in */
+ if (spdk_bdev_is_md_separate(job->bdev)) {
+ rc = spdk_bdev_read_blocks_with_md(job->bdev_desc, job->ch, NULL, NULL,
+ task->offset_blocks, job->io_size_blocks,
+ bdevperf_complete, task);
+ } else {
+ rc = spdk_bdev_read_blocks(job->bdev_desc, job->ch, NULL,
+ task->offset_blocks, job->io_size_blocks,
+ bdevperf_complete, task);
+ }
+
+ if (rc == -ENOMEM) {
+ bdevperf_queue_io_wait_with_cb(task, bdevperf_verify_submit_read);
+ } else if (rc != 0) {
+ printf("Failed to submit read: %d\n", rc);
+ bdevperf_job_drain(job);
+ g_run_rc = rc;
+ }
+}
+
+static void
+bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success,
+ void *cb_arg)
+{
+ if (success) {
+ spdk_bdev_free_io(bdev_io);
+ bdevperf_verify_submit_read(cb_arg);
+ } else {
+ bdevperf_complete(bdev_io, success, cb_arg);
+ }
+}
+
+static void
+bdevperf_zcopy_populate_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ if (!success) {
+ bdevperf_complete(bdev_io, success, cb_arg);
+ return;
+ }
+
+ spdk_bdev_zcopy_end(bdev_io, false, bdevperf_complete, cb_arg);
+}
+
+static int
+bdevperf_generate_dif(struct bdevperf_task *task)
+{
+ struct bdevperf_job *job = task->job;
+ struct spdk_bdev *bdev = job->bdev;
+ struct spdk_dif_ctx dif_ctx;
+ int rc;
+
+ rc = spdk_dif_ctx_init(&dif_ctx,
+ spdk_bdev_get_block_size(bdev),
+ spdk_bdev_get_md_size(bdev),
+ spdk_bdev_is_md_interleaved(bdev),
+ spdk_bdev_is_dif_head_of_md(bdev),
+ spdk_bdev_get_dif_type(bdev),
+ job->dif_check_flags,
+ task->offset_blocks, 0, 0, 0, 0);
+ if (rc != 0) {
+ fprintf(stderr, "Initialization of DIF context failed\n");
+ return rc;
+ }
+
+ if (spdk_bdev_is_md_interleaved(bdev)) {
+ rc = spdk_dif_generate(&task->iov, 1, job->io_size_blocks, &dif_ctx);
+ } else {
+ struct iovec md_iov = {
+ .iov_base = task->md_buf,
+ .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks,
+ };
+
+ rc = spdk_dix_generate(&task->iov, 1, &md_iov, job->io_size_blocks, &dif_ctx);
+ }
+
+ if (rc != 0) {
+ fprintf(stderr, "Generation of DIF/DIX failed\n");
+ }
+
+ return rc;
+}
+
+static void
+bdevperf_submit_task(void *arg)
+{
+ struct bdevperf_task *task = arg;
+ struct bdevperf_job *job = task->job;
+ struct spdk_bdev_desc *desc;
+ struct spdk_io_channel *ch;
+ spdk_bdev_io_completion_cb cb_fn;
+ uint64_t offset_in_ios;
+ int rc = 0;
+
+ desc = job->bdev_desc;
+ ch = job->ch;
+
+ switch (task->io_type) {
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ if (spdk_bdev_get_md_size(job->bdev) != 0 && job->dif_check_flags != 0) {
+ rc = bdevperf_generate_dif(task);
+ }
+ if (rc == 0) {
+ cb_fn = (job->verify || job->reset) ? bdevperf_verify_write_complete : bdevperf_complete;
+
+ if (g_zcopy) {
+ spdk_bdev_zcopy_end(task->bdev_io, true, cb_fn, task);
+ return;
+ } else {
+ if (spdk_bdev_is_md_separate(job->bdev)) {
+ rc = spdk_bdev_writev_blocks_with_md(desc, ch, &task->iov, 1,
+ task->md_buf,
+ task->offset_blocks,
+ job->io_size_blocks,
+ cb_fn, task);
+ } else {
+ rc = spdk_bdev_writev_blocks(desc, ch, &task->iov, 1,
+ task->offset_blocks,
+ job->io_size_blocks,
+ cb_fn, task);
+ }
+ }
+ }
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ rc = spdk_bdev_flush_blocks(desc, ch, task->offset_blocks,
+ job->io_size_blocks, bdevperf_complete, task);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ rc = spdk_bdev_unmap_blocks(desc, ch, task->offset_blocks,
+ job->io_size_blocks, bdevperf_complete, task);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ rc = spdk_bdev_write_zeroes_blocks(desc, ch, task->offset_blocks,
+ job->io_size_blocks, bdevperf_complete, task);
+ break;
+ case SPDK_BDEV_IO_TYPE_READ:
+ if (g_zcopy) {
+ rc = spdk_bdev_zcopy_start(desc, ch, task->offset_blocks, job->io_size_blocks,
+ true, bdevperf_zcopy_populate_complete, task);
+ } else {
+ if (spdk_bdev_is_md_separate(job->bdev)) {
+ rc = spdk_bdev_read_blocks_with_md(desc, ch, task->buf, task->md_buf,
+ task->offset_blocks,
+ job->io_size_blocks,
+ bdevperf_complete, task);
+ } else {
+ rc = spdk_bdev_read_blocks(desc, ch, task->buf, task->offset_blocks,
+ job->io_size_blocks, bdevperf_complete, task);
+ }
+ }
+ break;
+ case SPDK_BDEV_IO_TYPE_ABORT:
+ rc = spdk_bdev_abort(desc, ch, task->task_to_abort, bdevperf_abort_complete, task);
+ break;
+ default:
+ assert(false);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc == -ENOMEM) {
+ bdevperf_queue_io_wait_with_cb(task, bdevperf_submit_task);
+ return;
+ } else if (rc != 0) {
+ printf("Failed to submit bdev_io: %d\n", rc);
+ if (job->verify) {
+ assert(task->offset_blocks / job->io_size_blocks >= job->ios_base);
+ offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base;
+
+ assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true);
+ spdk_bit_array_clear(job->outstanding, offset_in_ios);
+ }
+ bdevperf_job_drain(job);
+ g_run_rc = rc;
+ return;
+ }
+
+ job->current_queue_depth++;
+}
+
+static void
+bdevperf_zcopy_get_buf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct bdevperf_task *task = cb_arg;
+ struct bdevperf_job *job = task->job;
+ struct iovec *iovs;
+ int iovcnt;
+
+ if (!success) {
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ return;
+ }
+
+ task->bdev_io = bdev_io;
+ task->io_type = SPDK_BDEV_IO_TYPE_WRITE;
+
+ if (job->verify || job->reset) {
+ /* When job->verify or job->reset is enabled, task->buf is used for
+ * verification of read after write. For write I/O, when zcopy APIs
+ * are used, task->buf cannot be used, and data must be written to
+ * the data buffer allocated underneath bdev layer instead.
+ * Hence we copy task->buf to the allocated data buffer here.
+ */
+ spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt);
+ assert(iovcnt == 1);
+ assert(iovs != NULL);
+
+ copy_data(iovs[0].iov_base, iovs[0].iov_len, task->buf, job->buf_size,
+ spdk_bdev_get_block_size(job->bdev),
+ spdk_bdev_io_get_md_buf(bdev_io), task->md_buf,
+ spdk_bdev_get_md_size(job->bdev), job->io_size_blocks);
+ }
+
+ bdevperf_submit_task(task);
+}
+
+static void
+bdevperf_prep_zcopy_write_task(void *arg)
+{
+ struct bdevperf_task *task = arg;
+ struct bdevperf_job *job = task->job;
+ int rc;
+
+ rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch,
+ task->offset_blocks, job->io_size_blocks,
+ false, bdevperf_zcopy_get_buf_complete, task);
+ if (rc != 0) {
+ assert(rc == -ENOMEM);
+ bdevperf_queue_io_wait_with_cb(task, bdevperf_prep_zcopy_write_task);
+ return;
+ }
+
+ job->current_queue_depth++;
+}
+
+static struct bdevperf_task *
+bdevperf_job_get_task(struct bdevperf_job *job)
+{
+ struct bdevperf_task *task;
+
+ task = TAILQ_FIRST(&job->task_list);
+ if (!task) {
+ printf("Task allocation failed\n");
+ abort();
+ }
+
+ TAILQ_REMOVE(&job->task_list, task, link);
+ return task;
+}
+
+static __thread unsigned int seed = 0;
+
+static void
+bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task)
+{
+ uint64_t offset_in_ios;
+
+ if (job->is_random) {
+ offset_in_ios = rand_r(&seed) % job->size_in_ios;
+ } else {
+ offset_in_ios = job->offset_in_ios++;
+ if (job->offset_in_ios == job->size_in_ios) {
+ job->offset_in_ios = 0;
+ }
+
+ /* Increment of offset_in_ios if there's already an outstanding IO
+ * to that location. We only need this with job->verify as random
+ * offsets are not supported with job->verify at this time.
+ */
+ if (job->verify) {
+ assert(spdk_bit_array_find_first_clear(job->outstanding, 0) != UINT32_MAX);
+
+ while (spdk_bit_array_get(job->outstanding, offset_in_ios)) {
+ offset_in_ios = job->offset_in_ios++;
+ if (job->offset_in_ios == job->size_in_ios) {
+ job->offset_in_ios = 0;
+ }
+ }
+ spdk_bit_array_set(job->outstanding, offset_in_ios);
+ }
+ }
+
+ /* For multi-thread to same job, offset_in_ios is relative
+ * to the LBA range assigned for that job. job->offset_blocks
+ * is absolute (entire bdev LBA range).
+ */
+ task->offset_blocks = (offset_in_ios + job->ios_base) * job->io_size_blocks;
+
+ if (job->verify || job->reset) {
+ generate_data(task->buf, job->buf_size,
+ spdk_bdev_get_block_size(job->bdev),
+ task->md_buf, spdk_bdev_get_md_size(job->bdev),
+ job->io_size_blocks, rand_r(&seed) % 256);
+ if (g_zcopy) {
+ bdevperf_prep_zcopy_write_task(task);
+ return;
+ } else {
+ task->iov.iov_base = task->buf;
+ task->iov.iov_len = job->buf_size;
+ task->io_type = SPDK_BDEV_IO_TYPE_WRITE;
+ }
+ } else if (job->flush) {
+ task->io_type = SPDK_BDEV_IO_TYPE_FLUSH;
+ } else if (job->unmap) {
+ task->io_type = SPDK_BDEV_IO_TYPE_UNMAP;
+ } else if (job->write_zeroes) {
+ task->io_type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
+ } else if ((job->rw_percentage == 100) ||
+ (job->rw_percentage != 0 && ((rand_r(&seed) % 100) < job->rw_percentage))) {
+ task->io_type = SPDK_BDEV_IO_TYPE_READ;
+ } else {
+ if (g_zcopy) {
+ bdevperf_prep_zcopy_write_task(task);
+ return;
+ } else {
+ task->iov.iov_base = task->buf;
+ task->iov.iov_len = job->buf_size;
+ task->io_type = SPDK_BDEV_IO_TYPE_WRITE;
+ }
+ }
+
+ bdevperf_submit_task(task);
+}
+
+static int reset_job(void *arg);
+
+static void
+reset_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct bdevperf_task *task = cb_arg;
+ struct bdevperf_job *job = task->job;
+
+ if (!success) {
+ printf("Reset blockdev=%s failed\n", spdk_bdev_get_name(job->bdev));
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ }
+
+ TAILQ_INSERT_TAIL(&job->task_list, task, link);
+ spdk_bdev_free_io(bdev_io);
+
+ job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job,
+ 10 * 1000000);
+}
+
+static int
+reset_job(void *arg)
+{
+ struct bdevperf_job *job = arg;
+ struct bdevperf_task *task;
+ int rc;
+
+ spdk_poller_unregister(&job->reset_timer);
+
+ /* Do reset. */
+ task = bdevperf_job_get_task(job);
+ rc = spdk_bdev_reset(job->bdev_desc, job->ch,
+ reset_cb, task);
+ if (rc) {
+ printf("Reset failed: %d\n", rc);
+ bdevperf_job_drain(job);
+ g_run_rc = -1;
+ }
+
+ return -1;
+}
+
+static void
+bdevperf_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io)
+{
+ struct bdevperf_job *job = cb_arg;
+ struct bdevperf_task *task;
+
+ job->io_timeout++;
+
+ if (job->is_draining || !job->abort ||
+ !spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ABORT)) {
+ return;
+ }
+
+ task = bdevperf_job_get_task(job);
+ if (task == NULL) {
+ return;
+ }
+
+ task->task_to_abort = spdk_bdev_io_get_cb_arg(bdev_io);
+ task->io_type = SPDK_BDEV_IO_TYPE_ABORT;
+
+ bdevperf_submit_task(task);
+}
+
+static void
+bdevperf_job_run(void *ctx)
+{
+ struct bdevperf_job *job = ctx;
+ struct bdevperf_task *task;
+ int i;
+
+ /* Submit initial I/O for this job. Each time one
+ * completes, another will be submitted. */
+
+ /* Start a timer to stop this I/O chain when the run is over */
+ job->run_timer = SPDK_POLLER_REGISTER(bdevperf_job_drain, job, g_time_in_usec);
+ if (job->reset) {
+ job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job,
+ 10 * 1000000);
+ }
+
+ spdk_bdev_set_timeout(job->bdev_desc, g_timeout_in_sec, bdevperf_timeout_cb, job);
+
+ for (i = 0; i < job->queue_depth; i++) {
+ task = bdevperf_job_get_task(job);
+ bdevperf_submit_single(job, task);
+ }
+}
+
+static void
+_performance_dump_done(void *ctx)
+{
+ struct bdevperf_aggregate_stats *stats = ctx;
+
+ printf("\r =====================================================\n");
+ printf("\r %-20s: %10.2f IOPS %10.2f MiB/s\n",
+ "Total", stats->total_io_per_second, stats->total_mb_per_second);
+ if (stats->total_failed_per_second != 0 || stats->total_timeout_per_second != 0) {
+ printf("\r %-20s: %10.2f Fail/s %8.2f TO/s\n",
+ "", stats->total_failed_per_second, stats->total_timeout_per_second);
+ }
+ fflush(stdout);
+
+ g_performance_dump_active = false;
+
+ free(stats);
+}
+
+static void
+_performance_dump(void *ctx)
+{
+ struct bdevperf_aggregate_stats *stats = ctx;
+
+ performance_dump_job(stats, stats->current_job);
+
+ /* This assumes the jobs list is static after start up time.
+ * That's true right now, but if that ever changed this would need a lock. */
+ stats->current_job = TAILQ_NEXT(stats->current_job, link);
+ if (stats->current_job == NULL) {
+ spdk_thread_send_msg(g_master_thread, _performance_dump_done, stats);
+ } else {
+ spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats);
+ }
+}
+
+static int
+performance_statistics_thread(void *arg)
+{
+ struct bdevperf_aggregate_stats *stats;
+
+ if (g_performance_dump_active) {
+ return -1;
+ }
+
+ g_performance_dump_active = true;
+
+ stats = calloc(1, sizeof(*stats));
+ if (stats == NULL) {
+ return -1;
+ }
+
+ g_show_performance_period_num++;
+
+ stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec;
+ stats->ema_period = g_show_performance_ema_period;
+
+ /* Iterate all of the jobs to gather stats
+ * These jobs will not get removed here until a final performance dump is run,
+ * so this should be safe without locking.
+ */
+ stats->current_job = TAILQ_FIRST(&g_bdevperf.jobs);
+ if (stats->current_job == NULL) {
+ spdk_thread_send_msg(g_master_thread, _performance_dump_done, stats);
+ } else {
+ spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats);
+ }
+
+ return -1;
+}
+
+static void
+bdevperf_test(void)
+{
+ struct bdevperf_job *job;
+
+ printf("Running I/O for %" PRIu64 " seconds...\n", g_time_in_usec / 1000000);
+ fflush(stdout);
+
+ /* Start a timer to dump performance numbers */
+ g_shutdown_tsc = spdk_get_ticks();
+ if (g_show_performance_real_time) {
+ g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL,
+ g_show_performance_period_in_usec);
+ }
+
+ /* Iterate jobs to start all I/O */
+ TAILQ_FOREACH(job, &g_bdevperf.jobs, link) {
+ g_bdevperf.running_jobs++;
+ spdk_thread_send_msg(job->thread, bdevperf_job_run, job);
+ }
+}
+
+static void
+bdevperf_bdev_removed(void *arg)
+{
+ struct bdevperf_job *job = arg;
+
+ bdevperf_job_drain(job);
+}
+
+static uint32_t g_construct_job_count = 0;
+
+static void
+_bdevperf_construct_job_done(void *ctx)
+{
+ if (--g_construct_job_count == 0) {
+
+ if (g_run_rc != 0) {
+ /* Something failed. */
+ bdevperf_test_done(NULL);
+ return;
+ }
+
+ /* Ready to run the test */
+ bdevperf_test();
+ }
+}
+
+/* Checkformat will not allow to use inlined type,
+ this is a workaround */
+typedef struct spdk_thread *spdk_thread_t;
+
+static spdk_thread_t
+construct_job_thread(struct spdk_cpuset *cpumask, const char *tag)
+{
+ char thread_name[32];
+ struct spdk_cpuset tmp;
+
+ /* This function runs on the master thread. */
+ assert(g_master_thread == spdk_get_thread());
+
+ /* Handle default mask */
+ if (spdk_cpuset_count(cpumask) == 0) {
+ cpumask = &g_all_cpuset;
+ }
+
+ /* Warn user that mask might need to be changed */
+ spdk_cpuset_copy(&tmp, cpumask);
+ spdk_cpuset_or(&tmp, &g_all_cpuset);
+ if (!spdk_cpuset_equal(&tmp, &g_all_cpuset)) {
+ fprintf(stderr, "cpumask for '%s' is too big\n", tag);
+ }
+
+ snprintf(thread_name, sizeof(thread_name), "%s_%s",
+ tag,
+ spdk_cpuset_fmt(cpumask));
+
+ return spdk_thread_create(thread_name, cpumask);
+}
+
+static uint32_t
+_get_next_core(void)
+{
+ static uint32_t current_core = SPDK_ENV_LCORE_ID_ANY;
+
+ if (current_core == SPDK_ENV_LCORE_ID_ANY) {
+ current_core = spdk_env_get_first_core();
+ return current_core;
+ }
+
+ current_core = spdk_env_get_next_core(current_core);
+ if (current_core == SPDK_ENV_LCORE_ID_ANY) {
+ current_core = spdk_env_get_first_core();
+ }
+
+ return current_core;
+}
+
+static void
+_bdevperf_construct_job(void *ctx)
+{
+ struct bdevperf_job *job = ctx;
+ int rc;
+
+ rc = spdk_bdev_open(job->bdev, true, bdevperf_bdev_removed, job, &job->bdev_desc);
+ if (rc != 0) {
+ SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(job->bdev), rc);
+ g_run_rc = -EINVAL;
+ goto end;
+ }
+
+ job->ch = spdk_bdev_get_io_channel(job->bdev_desc);
+ if (!job->ch) {
+ SPDK_ERRLOG("Could not get io_channel for device %s, error=%d\n", spdk_bdev_get_name(job->bdev),
+ rc);
+ g_run_rc = -ENOMEM;
+ goto end;
+ }
+
+end:
+ spdk_thread_send_msg(g_master_thread, _bdevperf_construct_job_done, NULL);
+}
+
+static void
+job_init_rw(struct bdevperf_job *job, enum job_config_rw rw)
+{
+ switch (rw) {
+ case JOB_CONFIG_RW_READ:
+ job->rw_percentage = 100;
+ break;
+ case JOB_CONFIG_RW_WRITE:
+ job->rw_percentage = 0;
+ break;
+ case JOB_CONFIG_RW_RANDREAD:
+ job->is_random = true;
+ job->rw_percentage = 100;
+ break;
+ case JOB_CONFIG_RW_RANDWRITE:
+ job->is_random = true;
+ job->rw_percentage = 0;
+ break;
+ case JOB_CONFIG_RW_RW:
+ job->is_random = false;
+ break;
+ case JOB_CONFIG_RW_RANDRW:
+ job->is_random = true;
+ break;
+ case JOB_CONFIG_RW_VERIFY:
+ job->verify = true;
+ job->rw_percentage = 50;
+ break;
+ case JOB_CONFIG_RW_RESET:
+ job->reset = true;
+ job->verify = true;
+ job->rw_percentage = 50;
+ break;
+ case JOB_CONFIG_RW_UNMAP:
+ job->unmap = true;
+ break;
+ case JOB_CONFIG_RW_FLUSH:
+ job->flush = true;
+ break;
+ case JOB_CONFIG_RW_WRITE_ZEROES:
+ job->write_zeroes = true;
+ break;
+ }
+}
+
+static int
+bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config,
+ struct spdk_thread *thread)
+{
+ struct bdevperf_job *job;
+ struct bdevperf_task *task;
+ int block_size, data_block_size;
+ int rc;
+ int task_num, n;
+
+ block_size = spdk_bdev_get_block_size(bdev);
+ data_block_size = spdk_bdev_get_data_block_size(bdev);
+
+ job = calloc(1, sizeof(struct bdevperf_job));
+ if (!job) {
+ fprintf(stderr, "Unable to allocate memory for new job.\n");
+ return -ENOMEM;
+ }
+
+ job->name = strdup(spdk_bdev_get_name(bdev));
+ if (!job->name) {
+ fprintf(stderr, "Unable to allocate memory for job name.\n");
+ free(job);
+ return -ENOMEM;
+ }
+
+ job->workload_type = g_workload_type;
+ job->io_size = config->bs;
+ job->rw_percentage = config->rwmixread;
+ job->continue_on_failure = g_continue_on_failure;
+ job->queue_depth = config->iodepth;
+ job->bdev = bdev;
+ job->io_size_blocks = job->io_size / data_block_size;
+ job->buf_size = job->io_size_blocks * block_size;
+ job_init_rw(job, config->rw);
+
+ if ((job->io_size % data_block_size) != 0) {
+ SPDK_ERRLOG("IO size (%d) is not multiples of data block size of bdev %s (%"PRIu32")\n",
+ job->io_size, spdk_bdev_get_name(bdev), data_block_size);
+ free(job->name);
+ free(job);
+ return -ENOTSUP;
+ }
+
+ if (job->unmap && !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
+ printf("Skipping %s because it does not support unmap\n", spdk_bdev_get_name(bdev));
+ free(job->name);
+ free(job);
+ return -ENOTSUP;
+ }
+
+ if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) {
+ job->dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK;
+ }
+ if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) {
+ job->dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK;
+ }
+
+ job->offset_in_ios = 0;
+
+ if (config->length != 0) {
+ /* Use subset of disk */
+ job->size_in_ios = config->length / job->io_size_blocks;
+ job->ios_base = config->offset / job->io_size_blocks;
+ } else {
+ /* Use whole disk */
+ job->size_in_ios = spdk_bdev_get_num_blocks(bdev) / job->io_size_blocks;
+ job->ios_base = 0;
+ }
+
+ if (job->verify) {
+ job->outstanding = spdk_bit_array_create(job->size_in_ios);
+ if (job->outstanding == NULL) {
+ SPDK_ERRLOG("Could not create outstanding array bitmap for bdev %s\n",
+ spdk_bdev_get_name(bdev));
+ free(job->name);
+ free(job);
+ return -ENOMEM;
+ }
+ }
+
+ TAILQ_INIT(&job->task_list);
+
+ task_num = job->queue_depth;
+ if (job->reset) {
+ task_num += 1;
+ }
+ if (job->abort) {
+ task_num += job->queue_depth;
+ }
+
+ TAILQ_INSERT_TAIL(&g_bdevperf.jobs, job, link);
+
+ for (n = 0; n < task_num; n++) {
+ task = calloc(1, sizeof(struct bdevperf_task));
+ if (!task) {
+ fprintf(stderr, "Failed to allocate task from memory\n");
+ return -ENOMEM;
+ }
+
+ task->buf = spdk_zmalloc(job->buf_size, spdk_bdev_get_buf_align(job->bdev), NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!task->buf) {
+ fprintf(stderr, "Cannot allocate buf for task=%p\n", task);
+ free(task);
+ return -ENOMEM;
+ }
+
+ if (spdk_bdev_is_md_separate(job->bdev)) {
+ task->md_buf = spdk_zmalloc(job->io_size_blocks *
+ spdk_bdev_get_md_size(job->bdev), 0, NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (!task->md_buf) {
+ fprintf(stderr, "Cannot allocate md buf for task=%p\n", task);
+ spdk_free(task->buf);
+ free(task);
+ return -ENOMEM;
+ }
+ }
+
+ task->job = job;
+ TAILQ_INSERT_TAIL(&job->task_list, task, link);
+ }
+
+ job->thread = thread;
+
+ g_construct_job_count++;
+
+ rc = spdk_thread_send_msg(thread, _bdevperf_construct_job, job);
+ assert(rc == 0);
+
+ return rc;
+}
+
+static int
+parse_rw(const char *str, enum job_config_rw ret)
+{
+ if (str == NULL) {
+ return ret;
+ }
+
+ if (!strcmp(str, "read")) {
+ ret = JOB_CONFIG_RW_READ;
+ } else if (!strcmp(str, "randread")) {
+ ret = JOB_CONFIG_RW_RANDREAD;
+ } else if (!strcmp(str, "write")) {
+ ret = JOB_CONFIG_RW_WRITE;
+ } else if (!strcmp(str, "randwrite")) {
+ ret = JOB_CONFIG_RW_RANDWRITE;
+ } else if (!strcmp(str, "verify")) {
+ ret = JOB_CONFIG_RW_VERIFY;
+ } else if (!strcmp(str, "reset")) {
+ ret = JOB_CONFIG_RW_RESET;
+ } else if (!strcmp(str, "unmap")) {
+ ret = JOB_CONFIG_RW_UNMAP;
+ } else if (!strcmp(str, "write_zeroes")) {
+ ret = JOB_CONFIG_RW_WRITE_ZEROES;
+ } else if (!strcmp(str, "flush")) {
+ ret = JOB_CONFIG_RW_FLUSH;
+ } else if (!strcmp(str, "rw")) {
+ ret = JOB_CONFIG_RW_RW;
+ } else if (!strcmp(str, "randrw")) {
+ ret = JOB_CONFIG_RW_RANDRW;
+ } else {
+ fprintf(stderr, "rw must be one of\n"
+ "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n");
+ ret = BDEVPERF_CONFIG_ERROR;
+ }
+
+ return ret;
+}
+
+static const char *
+config_filename_next(const char *filename, char *out)
+{
+ int i, k;
+
+ if (filename == NULL) {
+ out[0] = '\0';
+ return NULL;
+ }
+
+ if (filename[0] == ':') {
+ filename++;
+ }
+
+ for (i = 0, k = 0;
+ filename[i] != '\0' &&
+ filename[i] != ':' &&
+ i < BDEVPERF_CONFIG_MAX_FILENAME;
+ i++) {
+ if (filename[i] == ' ' || filename[i] == '\t') {
+ continue;
+ }
+
+ out[k++] = filename[i];
+ }
+ out[k] = 0;
+
+ return filename + i;
+}
+
+static void
+bdevperf_construct_config_jobs(void)
+{
+ char filename[BDEVPERF_CONFIG_MAX_FILENAME];
+ struct spdk_thread *thread;
+ struct job_config *config;
+ struct spdk_bdev *bdev;
+ const char *filenames;
+ int rc;
+
+ TAILQ_FOREACH(config, &job_config_list, link) {
+ filenames = config->filename;
+
+ thread = construct_job_thread(&config->cpumask, config->name);
+ assert(thread);
+
+ while (filenames) {
+ filenames = config_filename_next(filenames, filename);
+ if (strlen(filename) == 0) {
+ break;
+ }
+
+ bdev = spdk_bdev_get_by_name(filename);
+ if (!bdev) {
+ fprintf(stderr, "Unable to find bdev '%s'\n", filename);
+ g_run_rc = -EINVAL;
+ return;
+ }
+
+ rc = bdevperf_construct_job(bdev, config, thread);
+ if (rc < 0) {
+ g_run_rc = rc;
+ return;
+ }
+ }
+ }
+}
+
+static int
+make_cli_job_config(const char *filename, int offset, int range)
+{
+ struct job_config *config = calloc(1, sizeof(*config));
+
+ if (config == NULL) {
+ fprintf(stderr, "Unable to allocate memory for job config\n");
+ return -ENOMEM;
+ }
+
+ config->name = filename;
+ config->filename = filename;
+ spdk_cpuset_zero(&config->cpumask);
+ spdk_cpuset_set_cpu(&config->cpumask, _get_next_core(), true);
+ config->bs = g_io_size;
+ config->iodepth = g_queue_depth;
+ config->rwmixread = g_rw_percentage;
+ config->offset = offset;
+ config->length = range;
+ config->rw = parse_rw(g_workload_type, BDEVPERF_CONFIG_ERROR);
+ if ((int)config->rw == BDEVPERF_CONFIG_ERROR) {
+ return -EINVAL;
+ }
+
+ TAILQ_INSERT_TAIL(&job_config_list, config, link);
+ return 0;
+}
+
+static void
+bdevperf_construct_multithread_jobs(void)
+{
+ struct spdk_bdev *bdev;
+ uint32_t i;
+ uint32_t num_cores;
+ uint32_t blocks_per_job;
+ uint32_t offset;
+
+ num_cores = 0;
+ SPDK_ENV_FOREACH_CORE(i) {
+ num_cores++;
+ }
+
+ if (num_cores == 0) {
+ g_run_rc = -EINVAL;
+ return;
+ }
+
+ if (g_job_bdev_name != NULL) {
+ bdev = spdk_bdev_get_by_name(g_job_bdev_name);
+ if (!bdev) {
+ fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name);
+ return;
+ }
+
+ blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores;
+ offset = 0;
+
+ SPDK_ENV_FOREACH_CORE(i) {
+ g_run_rc = make_cli_job_config(g_job_bdev_name, offset, blocks_per_job);
+ if (g_run_rc) {
+ return;
+ }
+
+ offset += blocks_per_job;
+ }
+ } else {
+ bdev = spdk_bdev_first_leaf();
+ while (bdev != NULL) {
+ blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores;
+ offset = 0;
+
+ SPDK_ENV_FOREACH_CORE(i) {
+ g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev),
+ offset, blocks_per_job);
+ if (g_run_rc) {
+ return;
+ }
+
+ offset += blocks_per_job;
+ }
+
+ bdev = spdk_bdev_next_leaf(bdev);
+ }
+ }
+}
+
+static void
+bdevperf_construct_jobs(void)
+{
+ struct spdk_bdev *bdev;
+
+ /* There are three different modes for allocating jobs. Standard mode
+ * (the default) creates one spdk_thread per bdev and runs the I/O job there.
+ *
+ * The -C flag places bdevperf into "multithread" mode, meaning it creates
+ * one spdk_thread per bdev PER CORE, and runs a copy of the job on each.
+ * This runs multiple threads per bdev, effectively.
+ *
+ * The -j flag implies "FIO" mode which tries to mimic semantic of FIO jobs.
+ * In "FIO" mode, threads are spawned per-job instead of per-bdev.
+ * Each FIO job can be individually parameterized by filename, cpu mask, etc,
+ * which is different from other modes in that they only support global options.
+ */
+
+ /* Increment initial construct_jobs count so that it will never reach 0 in the middle
+ * of iteration.
+ */
+ g_construct_job_count = 1;
+
+ if (g_bdevperf_conf) {
+ goto end;
+ } else if (g_multithread_mode) {
+ bdevperf_construct_multithread_jobs();
+ goto end;
+ }
+
+ if (g_job_bdev_name != NULL) {
+ bdev = spdk_bdev_get_by_name(g_job_bdev_name);
+ if (bdev) {
+ /* Construct the job */
+ g_run_rc = make_cli_job_config(g_job_bdev_name, 0, 0);
+ } else {
+ fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name);
+ }
+ } else {
+ bdev = spdk_bdev_first_leaf();
+
+ while (bdev != NULL) {
+ /* Construct the job */
+ g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev), 0, 0);
+ if (g_run_rc) {
+ break;
+ }
+
+ bdev = spdk_bdev_next_leaf(bdev);
+ }
+ }
+
+end:
+ if (g_run_rc == 0) {
+ bdevperf_construct_config_jobs();
+ }
+
+ if (--g_construct_job_count == 0) {
+ if (g_run_rc != 0) {
+ /* Something failed. */
+ bdevperf_test_done(NULL);
+ return;
+ }
+
+ bdevperf_test();
+ }
+}
+
+static int
+parse_uint_option(struct spdk_conf_section *s, const char *name, int def)
+{
+ const char *job_name;
+ int tmp;
+
+ tmp = spdk_conf_section_get_intval(s, name);
+ if (tmp == -1) {
+ /* Field was not found. Check default value
+ * In [global] section it is ok to have undefined values
+ * but for other sections it is not ok */
+ if (def == BDEVPERF_CONFIG_UNDEFINED) {
+ job_name = spdk_conf_section_get_name(s);
+ if (strcmp(job_name, "global") == 0) {
+ return def;
+ }
+
+ fprintf(stderr,
+ "Job '%s' has no '%s' assigned\n",
+ job_name, name);
+ return BDEVPERF_CONFIG_ERROR;
+ }
+ return def;
+ }
+
+ /* NOTE: get_intval returns nonnegative on success */
+ if (tmp < 0) {
+ fprintf(stderr, "Job '%s' has bad '%s' value.\n",
+ spdk_conf_section_get_name(s), name);
+ return BDEVPERF_CONFIG_ERROR;
+ }
+
+ return tmp;
+}
+
+/* CLI arguments override parameters for global sections */
+static void
+config_set_cli_args(struct job_config *config)
+{
+ if (g_job_bdev_name) {
+ config->filename = g_job_bdev_name;
+ }
+ if (g_io_size > 0) {
+ config->bs = g_io_size;
+ }
+ if (g_queue_depth > 0) {
+ config->iodepth = g_queue_depth;
+ }
+ if (g_rw_percentage > 0) {
+ config->rwmixread = g_rw_percentage;
+ }
+ if (g_workload_type) {
+ config->rw = parse_rw(g_workload_type, config->rw);
+ }
+}
+
+static int
+read_job_config(void)
+{
+ struct job_config global_default_config;
+ struct job_config global_config;
+ struct spdk_conf_section *s;
+ struct job_config *config;
+ const char *cpumask;
+ const char *rw;
+ bool is_global;
+ int n = 0;
+
+ if (g_bdevperf_conf_file == NULL) {
+ return 0;
+ }
+
+ g_bdevperf_conf = spdk_conf_allocate();
+ if (g_bdevperf_conf == NULL) {
+ fprintf(stderr, "Could not allocate job config structure\n");
+ return 1;
+ }
+
+ spdk_conf_disable_sections_merge(g_bdevperf_conf);
+ if (spdk_conf_read(g_bdevperf_conf, g_bdevperf_conf_file)) {
+ fprintf(stderr, "Invalid job config");
+ return 1;
+ }
+
+ /* Initialize global defaults */
+ global_default_config.filename = NULL;
+ /* Zero mask is the same as g_all_cpuset
+ * The g_all_cpuset is not initialized yet,
+ * so use zero mask as the default instead */
+ spdk_cpuset_zero(&global_default_config.cpumask);
+ global_default_config.bs = BDEVPERF_CONFIG_UNDEFINED;
+ global_default_config.iodepth = BDEVPERF_CONFIG_UNDEFINED;
+ /* bdevperf has no default for -M option but in FIO the default is 50 */
+ global_default_config.rwmixread = 50;
+ global_default_config.offset = 0;
+ /* length 0 means 100% */
+ global_default_config.length = 0;
+ global_default_config.rw = BDEVPERF_CONFIG_UNDEFINED;
+ config_set_cli_args(&global_default_config);
+
+ if ((int)global_default_config.rw == BDEVPERF_CONFIG_ERROR) {
+ return 1;
+ }
+
+ /* There is only a single instance of global job_config
+ * We just reset its value when we encounter new [global] section */
+ global_config = global_default_config;
+
+ for (s = spdk_conf_first_section(g_bdevperf_conf);
+ s != NULL;
+ s = spdk_conf_next_section(s)) {
+ config = calloc(1, sizeof(*config));
+ if (config == NULL) {
+ fprintf(stderr, "Unable to allocate memory for job config\n");
+ return 1;
+ }
+
+ config->name = spdk_conf_section_get_name(s);
+ is_global = strcmp(config->name, "global") == 0;
+
+ if (is_global) {
+ global_config = global_default_config;
+ }
+
+ config->filename = spdk_conf_section_get_val(s, "filename");
+ if (config->filename == NULL) {
+ config->filename = global_config.filename;
+ }
+ if (!is_global) {
+ if (config->filename == NULL) {
+ fprintf(stderr, "Job '%s' expects 'filename' parameter\n", config->name);
+ goto error;
+ } else if (strnlen(config->filename, BDEVPERF_CONFIG_MAX_FILENAME)
+ >= BDEVPERF_CONFIG_MAX_FILENAME) {
+ fprintf(stderr,
+ "filename for '%s' job is too long. Max length is %d\n",
+ config->name, BDEVPERF_CONFIG_MAX_FILENAME);
+ goto error;
+ }
+ }
+
+ cpumask = spdk_conf_section_get_val(s, "cpumask");
+ if (cpumask == NULL) {
+ config->cpumask = global_config.cpumask;
+ } else if (spdk_cpuset_parse(&config->cpumask, cpumask)) {
+ fprintf(stderr, "Job '%s' has bad 'cpumask' value\n", config->name);
+ goto error;
+ }
+
+ config->bs = parse_uint_option(s, "bs", global_config.bs);
+ if (config->bs == BDEVPERF_CONFIG_ERROR) {
+ goto error;
+ } else if (config->bs == 0) {
+ fprintf(stderr, "'bs' of job '%s' must be greater than 0\n", config->name);
+ goto error;
+ }
+
+ config->iodepth = parse_uint_option(s, "iodepth", global_config.iodepth);
+ if (config->iodepth == BDEVPERF_CONFIG_ERROR) {
+ goto error;
+ } else if (config->iodepth == 0) {
+ fprintf(stderr,
+ "'iodepth' of job '%s' must be greater than 0\n",
+ config->name);
+ goto error;
+ }
+
+ config->rwmixread = parse_uint_option(s, "rwmixread", global_config.rwmixread);
+ if (config->rwmixread == BDEVPERF_CONFIG_ERROR) {
+ goto error;
+ } else if (config->rwmixread > 100) {
+ fprintf(stderr,
+ "'rwmixread' value of '%s' job is not in 0-100 range\n",
+ config->name);
+ goto error;
+ }
+
+ config->offset = parse_uint_option(s, "offset", global_config.offset);
+ if (config->offset == BDEVPERF_CONFIG_ERROR) {
+ goto error;
+ }
+
+ config->length = parse_uint_option(s, "length", global_config.length);
+ if (config->length == BDEVPERF_CONFIG_ERROR) {
+ goto error;
+ }
+
+ rw = spdk_conf_section_get_val(s, "rw");
+ config->rw = parse_rw(rw, global_config.rw);
+ if ((int)config->rw == BDEVPERF_CONFIG_ERROR) {
+ fprintf(stderr, "Job '%s' has bad 'rw' value\n", config->name);
+ goto error;
+ } else if (!is_global && (int)config->rw == BDEVPERF_CONFIG_UNDEFINED) {
+ fprintf(stderr, "Job '%s' has no 'rw' assigned\n", config->name);
+ goto error;
+ }
+
+ if (is_global) {
+ config_set_cli_args(config);
+ global_config = *config;
+ free(config);
+ } else {
+ TAILQ_INSERT_TAIL(&job_config_list, config, link);
+ n++;
+ }
+ }
+
+ printf("Using job config with %d jobs\n", n);
+ return 0;
+error:
+ free(config);
+ return 1;
+}
+
+static void
+bdevperf_run(void *arg1)
+{
+ uint32_t i;
+
+ g_master_thread = spdk_get_thread();
+
+ spdk_cpuset_zero(&g_all_cpuset);
+ SPDK_ENV_FOREACH_CORE(i) {
+ spdk_cpuset_set_cpu(&g_all_cpuset, i, true);
+ }
+
+ if (g_wait_for_tests) {
+ /* Do not perform any tests until RPC is received */
+ return;
+ }
+
+ bdevperf_construct_jobs();
+}
+
+static void
+rpc_perform_tests_cb(void)
+{
+ struct spdk_json_write_ctx *w;
+ struct spdk_jsonrpc_request *request = g_request;
+
+ g_request = NULL;
+
+ if (g_run_rc == 0) {
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_uint32(w, g_run_rc);
+ spdk_jsonrpc_end_result(request, w);
+ } else {
+ spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "bdevperf failed with error %s", spdk_strerror(-g_run_rc));
+ }
+
+ /* Reset g_run_rc to 0 for the next test run. */
+ g_run_rc = 0;
+}
+
+static void
+rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params)
+{
+ if (params != NULL) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
+ "perform_tests method requires no parameters");
+ return;
+ }
+ if (g_request != NULL) {
+ fprintf(stderr, "Another test is already in progress.\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ spdk_strerror(-EINPROGRESS));
+ return;
+ }
+ g_request = request;
+
+ bdevperf_construct_jobs();
+}
+SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME)
+
+static void
+_bdevperf_job_drain(void *ctx)
+{
+ bdevperf_job_drain(ctx);
+}
+
+static void
+spdk_bdevperf_shutdown_cb(void)
+{
+ g_shutdown = true;
+ struct bdevperf_job *job, *tmp;
+
+ if (g_bdevperf.running_jobs == 0) {
+ bdevperf_test_done(NULL);
+ return;
+ }
+
+ g_shutdown_tsc = spdk_get_ticks() - g_shutdown_tsc;
+
+ /* Iterate jobs to stop all I/O */
+ TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, tmp) {
+ spdk_thread_send_msg(job->thread, _bdevperf_job_drain, job);
+ }
+}
+
+static int
+bdevperf_parse_arg(int ch, char *arg)
+{
+ long long tmp;
+
+ if (ch == 'w') {
+ g_workload_type = optarg;
+ } else if (ch == 'T') {
+ g_job_bdev_name = optarg;
+ } else if (ch == 'z') {
+ g_wait_for_tests = true;
+ } else if (ch == 'x') {
+ g_zcopy = false;
+ } else if (ch == 'A') {
+ g_abort = true;
+ } else if (ch == 'C') {
+ g_multithread_mode = true;
+ } else if (ch == 'f') {
+ g_continue_on_failure = true;
+ } else if (ch == 'j') {
+ g_bdevperf_conf_file = optarg;
+ } else {
+ tmp = spdk_strtoll(optarg, 10);
+ if (tmp < 0) {
+ fprintf(stderr, "Parse failed for the option %c.\n", ch);
+ return tmp;
+ } else if (tmp >= INT_MAX) {
+ fprintf(stderr, "Parsed option was too large %c.\n", ch);
+ return -ERANGE;
+ }
+
+ switch (ch) {
+ case 'q':
+ g_queue_depth = tmp;
+ break;
+ case 'o':
+ g_io_size = tmp;
+ break;
+ case 't':
+ g_time_in_sec = tmp;
+ break;
+ case 'k':
+ g_timeout_in_sec = tmp;
+ break;
+ case 'M':
+ g_rw_percentage = tmp;
+ g_mix_specified = true;
+ break;
+ case 'P':
+ g_show_performance_ema_period = tmp;
+ break;
+ case 'S':
+ g_show_performance_real_time = 1;
+ g_show_performance_period_in_usec = tmp * 1000000;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static void
+bdevperf_usage(void)
+{
+ printf(" -q <depth> io depth\n");
+ printf(" -o <size> io size in bytes\n");
+ printf(" -w <type> io pattern type, must be one of (read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n");
+ printf(" -t <time> time in seconds\n");
+ printf(" -k <timeout> timeout in seconds to detect starved I/O (default is 0 and disabled)\n");
+ printf(" -M <percent> rwmixread (100 for reads, 0 for writes)\n");
+ printf(" -P <num> number of moving average period\n");
+ printf("\t\t(If set to n, show weighted mean of the previous n IO/s in real time)\n");
+ printf("\t\t(Formula: M = 2 / (n + 1), EMA[i+1] = IO/s * M + (1 - M) * EMA[i])\n");
+ printf("\t\t(only valid with -S)\n");
+ printf(" -S <period> show performance result in real time every <period> seconds\n");
+ printf(" -T <bdev> bdev to run against. Default: all available bdevs.\n");
+ printf(" -f continue processing I/O even after failures\n");
+ printf(" -x disable using zcopy bdev API for read or write I/O\n");
+ printf(" -z start bdevperf, but wait for RPC to start tests\n");
+ printf(" -A abort the timeout I/O\n");
+ printf(" -C enable every core to send I/Os to each bdev\n");
+ printf(" -j use job config file");
+}
+
+static int
+verify_test_params(struct spdk_app_opts *opts)
+{
+ /* When RPC is used for starting tests and
+ * no rpc_addr was configured for the app,
+ * use the default address. */
+ if (g_wait_for_tests && opts->rpc_addr == NULL) {
+ opts->rpc_addr = SPDK_DEFAULT_RPC_ADDR;
+ }
+
+ if (!g_bdevperf_conf_file && g_queue_depth <= 0) {
+ spdk_app_usage();
+ bdevperf_usage();
+ return 1;
+ }
+ if (!g_bdevperf_conf_file && g_io_size <= 0) {
+ spdk_app_usage();
+ bdevperf_usage();
+ return 1;
+ }
+ if (!g_bdevperf_conf_file && !g_workload_type) {
+ spdk_app_usage();
+ bdevperf_usage();
+ return 1;
+ }
+ if (g_time_in_sec <= 0) {
+ spdk_app_usage();
+ bdevperf_usage();
+ return 1;
+ }
+ g_time_in_usec = g_time_in_sec * 1000000LL;
+
+ if (g_timeout_in_sec < 0) {
+ spdk_app_usage();
+ bdevperf_usage();
+ return 1;
+ }
+
+ if (g_show_performance_ema_period > 0 &&
+ g_show_performance_real_time == 0) {
+ fprintf(stderr, "-P option must be specified with -S option\n");
+ return 1;
+ }
+
+ if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) {
+ printf("I/O size of %d is greater than zero copy threshold (%d).\n",
+ g_io_size, SPDK_BDEV_LARGE_BUF_MAX_SIZE);
+ printf("Zero copy mechanism will not be used.\n");
+ g_zcopy = false;
+ }
+
+ if (g_bdevperf_conf_file) {
+ /* workload_type verification happens during config file parsing */
+ return 0;
+ }
+
+ if (!strcmp(g_workload_type, "verify") ||
+ !strcmp(g_workload_type, "reset")) {
+ g_rw_percentage = 50;
+ if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) {
+ fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n",
+ SPDK_BDEV_LARGE_BUF_MAX_SIZE, g_io_size);
+ return 1;
+ }
+ g_verify = true;
+ if (!strcmp(g_workload_type, "reset")) {
+ g_reset = true;
+ }
+ }
+
+ if (!strcmp(g_workload_type, "read") ||
+ !strcmp(g_workload_type, "randread") ||
+ !strcmp(g_workload_type, "write") ||
+ !strcmp(g_workload_type, "randwrite") ||
+ !strcmp(g_workload_type, "verify") ||
+ !strcmp(g_workload_type, "reset") ||
+ !strcmp(g_workload_type, "unmap") ||
+ !strcmp(g_workload_type, "write_zeroes") ||
+ !strcmp(g_workload_type, "flush")) {
+ if (g_mix_specified) {
+ fprintf(stderr, "Ignoring -M option... Please use -M option"
+ " only when using rw or randrw.\n");
+ }
+ }
+
+ if (!strcmp(g_workload_type, "rw") ||
+ !strcmp(g_workload_type, "randrw")) {
+ if (g_rw_percentage < 0 || g_rw_percentage > 100) {
+ fprintf(stderr,
+ "-M must be specified to value from 0 to 100 "
+ "for rw or randrw.\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct spdk_app_opts opts = {};
+ int rc;
+
+ spdk_app_opts_init(&opts);
+ opts.name = "bdevperf";
+ opts.rpc_addr = NULL;
+ opts.reactor_mask = NULL;
+ opts.shutdown_cb = spdk_bdevperf_shutdown_cb;
+
+ if ((rc = spdk_app_parse_args(argc, argv, &opts, "xzfq:o:t:w:k:ACM:P:S:T:j:", NULL,
+ bdevperf_parse_arg, bdevperf_usage)) !=
+ SPDK_APP_PARSE_ARGS_SUCCESS) {
+ return rc;
+ }
+
+ if (read_job_config()) {
+ free_job_config();
+ return 1;
+ }
+
+ if (verify_test_params(&opts) != 0) {
+ free_job_config();
+ exit(1);
+ }
+
+ rc = spdk_app_start(&opts, bdevperf_run, NULL);
+
+ spdk_app_fini();
+ free_job_config();
+ return rc;
+}
diff --git a/src/spdk/test/bdev/bdevperf/bdevperf.py b/src/spdk/test/bdev/bdevperf/bdevperf.py
new file mode 100755
index 000000000..178d90c34
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/bdevperf.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import logging
+import argparse
+import sys
+import shlex
+
+try:
+ from rpc.client import print_dict, JSONRPCException
+ import rpc
+except ImportError:
+ print("SPDK RPC library missing. Please add spdk/scripts/ directory to PYTHONPATH:")
+ print("'export PYTHONPATH=$PYTHONPATH:./spdk/scripts/'")
+ exit(1)
+
+try:
+ from shlex import quote
+except ImportError:
+ from pipes import quote
+
+
+def print_array(a):
+ print(" ".join((quote(v) for v in a)))
+
+
+def perform_tests_func(client):
+ """Perform bdevperf tests according to command line arguments when application was started.
+
+ Args:
+ none
+
+ Returns:
+ On success, 0 is returned. On error, -1 is returned.
+ """
+ params = {}
+ return client.call('perform_tests', params)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='SPDK RPC command line interface. NOTE: spdk/scripts/ is expected in PYTHONPATH')
+ parser.add_argument('-s', dest='server_addr',
+ help='RPC domain socket path or IP address', default='/var/tmp/spdk.sock')
+ parser.add_argument('-p', dest='port',
+ help='RPC port number (if server_addr is IP address)',
+ default=5260, type=int)
+ parser.add_argument('-t', dest='timeout',
+ help='Timeout as a floating point number expressed in seconds waiting for response. Default: 60.0',
+ default=60.0, type=float)
+ parser.add_argument('-v', dest='verbose', action='store_const', const="INFO",
+ help='Set verbose mode to INFO', default="ERROR")
+ parser.add_argument('--verbose', dest='verbose', choices=['DEBUG', 'INFO', 'ERROR'],
+ help="""Set verbose level. """)
+ subparsers = parser.add_subparsers(help='RPC methods')
+
+ def perform_tests(args):
+ print_dict(perform_tests_func(args.client))
+
+ p = subparsers.add_parser('perform_tests', help='Perform bdevperf tests')
+ p.set_defaults(func=perform_tests)
+
+ def call_rpc_func(args):
+ try:
+ args.func(args)
+ except JSONRPCException as ex:
+ print(ex.message)
+ exit(1)
+
+ def execute_script(parser, client, fd):
+ for rpc_call in map(str.rstrip, fd):
+ if not rpc_call.strip():
+ continue
+ args = parser.parse_args(shlex.split(rpc_call))
+ args.client = client
+ call_rpc_func(args)
+
+ args = parser.parse_args()
+ args.client = rpc.client.JSONRPCClient(args.server_addr, args.port, args.timeout, log_level=getattr(logging, args.verbose.upper()))
+ if hasattr(args, 'func'):
+ call_rpc_func(args)
+ elif sys.stdin.isatty():
+ # No arguments and no data piped through stdin
+ parser.print_help()
+ exit(1)
+ else:
+ execute_script(parser, args.client, sys.stdin)
diff --git a/src/spdk/test/bdev/bdevperf/common.sh b/src/spdk/test/bdev/bdevperf/common.sh
new file mode 100644
index 000000000..eade380a3
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/common.sh
@@ -0,0 +1,33 @@
+bdevperf=$rootdir/test/bdev/bdevperf/bdevperf
+
+function create_job() {
+ local job_section=$1
+ local rw=$2
+ local filename=$3
+
+ if [[ $job_section == "global" ]]; then
+ cat <<- EOF >> "$testdir"/test.conf
+ [global]
+ filename=${filename}
+ EOF
+ fi
+ job="[${job_section}]"
+ echo $global
+ cat <<- EOF >> "$testdir"/test.conf
+ ${job}
+ filename=${filename}
+ bs=1024
+ rwmixread=70
+ rw=${rw}
+ iodepth=256
+ cpumask=0xff
+ EOF
+}
+
+function get_num_jobs() {
+ echo "$1" | grep -oE "Using job config with [0-9]+ jobs" | grep -oE "[0-9]+"
+}
+
+function cleanup() {
+ rm -f $testdir/test.conf
+}
diff --git a/src/spdk/test/bdev/bdevperf/conf.json b/src/spdk/test/bdev/bdevperf/conf.json
new file mode 100644
index 000000000..c58407f38
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/conf.json
@@ -0,0 +1,25 @@
+{
+ "subsystems": [
+ {
+ "subsystem": "bdev",
+ "config": [
+ {
+ "method": "bdev_malloc_create",
+ "params": {
+ "name": "Malloc0",
+ "num_blocks": 102400,
+ "block_size": 512
+ }
+ },
+ {
+ "method": "bdev_malloc_create",
+ "params": {
+ "name": "Malloc1",
+ "num_blocks": 102400,
+ "block_size": 512
+ }
+ }
+ ]
+ }
+ ]
+}
diff --git a/src/spdk/test/bdev/bdevperf/test_config.sh b/src/spdk/test/bdev/bdevperf/test_config.sh
new file mode 100755
index 000000000..911d4e27d
--- /dev/null
+++ b/src/spdk/test/bdev/bdevperf/test_config.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$(readlink -f $testdir/../../..)
+source $rootdir/test/common/autotest_common.sh
+source $testdir/common.sh
+
+jsonconf=$testdir/conf.json
+testconf=$testdir/test.conf
+
+trap 'cleanup; exit 1' SIGINT SIGTERM EXIT
+#Test inheriting filename and rw_mode parameters from global section.
+create_job "global" "read" "Malloc0"
+create_job "job0"
+create_job "job1"
+create_job "job2"
+create_job "job3"
+bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1)
+[[ $(get_num_jobs "$bdevperf_output") == "4" ]]
+
+bdevperf_output=$($bdevperf -C -t 2 --json $jsonconf -j $testconf)
+
+cleanup
+#Test missing global section.
+create_job "job0" "write" "Malloc0"
+create_job "job1" "write" "Malloc0"
+create_job "job2" "write" "Malloc0"
+bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1)
+[[ $(get_num_jobs "$bdevperf_output") == "3" ]]
+
+cleanup
+#Test inheriting multiple filenames and rw_mode parameters from global section.
+create_job "global" "rw" "Malloc0:Malloc1"
+create_job "job0"
+create_job "job1"
+create_job "job2"
+create_job "job3"
+bdevperf_output=$($bdevperf -t 2 --json $jsonconf -j $testconf 2>&1)
+[[ $(get_num_jobs "$bdevperf_output") == "4" ]]
+cleanup
+trap - SIGINT SIGTERM EXIT
diff --git a/src/spdk/test/bdev/blockdev.sh b/src/spdk/test/bdev/blockdev.sh
new file mode 100755
index 000000000..12d9c6f52
--- /dev/null
+++ b/src/spdk/test/bdev/blockdev.sh
@@ -0,0 +1,408 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$(readlink -f $testdir/../..)
+source $rootdir/test/common/autotest_common.sh
+source $testdir/nbd_common.sh
+
+rpc_py="$rootdir/scripts/rpc.py"
+conf_file="$testdir/bdev.json"
+# Make sure the configuration is clean
+: > "$conf_file"
+
+function cleanup() {
+ rm -f "$SPDK_TEST_STORAGE/aiofile"
+ rm -f "$SPDK_TEST_STORAGE/spdk-pmem-pool"
+ rm -f "$conf_file"
+
+ if [[ $test_type == rbd ]]; then
+ rbd_cleanup
+ fi
+}
+
+function start_spdk_tgt() {
+ "$SPDK_BIN_DIR/spdk_tgt" &
+ spdk_tgt_pid=$!
+ trap 'killprocess "$spdk_tgt_pid"; exit 1' SIGINT SIGTERM EXIT
+ waitforlisten "$spdk_tgt_pid"
+}
+
+function setup_bdev_conf() {
+ "$rpc_py" <<- RPC
+ bdev_split_create Malloc1 2
+ bdev_split_create -s 4 Malloc2 8
+ bdev_malloc_create -b Malloc0 32 512
+ bdev_malloc_create -b Malloc1 32 512
+ bdev_malloc_create -b Malloc2 32 512
+ bdev_malloc_create -b Malloc3 32 512
+ bdev_malloc_create -b Malloc4 32 512
+ bdev_malloc_create -b Malloc5 32 512
+ bdev_passthru_create -p TestPT -b Malloc3
+ bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5"
+ RPC
+ # FIXME: QoS doesn't work properly with json_config, see issue 1146
+ #$rpc_py bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3
+ #$rpc_py bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0
+ if [[ $(uname -s) != "FreeBSD" ]]; then
+ dd if=/dev/zero of="$SPDK_TEST_STORAGE/aiofile" bs=2048 count=5000
+ "$rpc_py" bdev_aio_create "$SPDK_TEST_STORAGE/aiofile" AIO0 2048
+ fi
+}
+
+function setup_nvme_conf() {
+ "$rootdir/scripts/gen_nvme.sh" --json | "$rpc_py" load_subsystem_config
+}
+
+function setup_gpt_conf() {
+ if [[ $(uname -s) = Linux ]] && hash sgdisk; then
+ $rootdir/scripts/setup.sh reset
+ # FIXME: Note that we are racing with the kernel here. There's no guarantee that
+ # proper object will be already in place under sysfs nor that any udev-like
+ # helper created proper block devices for us. Replace the below sleep with proper
+ # udev settle routine.
+ sleep 1s
+ # Get nvme devices by following drivers' links towards nvme class
+ local nvme_devs=(/sys/bus/pci/drivers/nvme/*/nvme/nvme*/nvme*n*) nvme_dev
+ gpt_nvme=""
+ # Pick first device which doesn't have any valid partition table
+ for nvme_dev in "${nvme_devs[@]}"; do
+ dev=/dev/${nvme_dev##*/}
+ if ! pt=$(parted "$dev" -ms print 2>&1); then
+ [[ $pt == *"$dev: unrecognised disk label"* ]] || continue
+ gpt_nvme=$dev
+ break
+ fi
+ done
+ if [[ -n $gpt_nvme ]]; then
+ # Create gpt partition table
+ parted -s "$gpt_nvme" mklabel gpt mkpart first '0%' '50%' mkpart second '50%' '100%'
+ # change the GUID to SPDK GUID value
+ # FIXME: Hardcode this in some common place, this value should not be changed much
+ IFS="()" read -r _ SPDK_GPT_GUID _ < <(grep SPDK_GPT_PART_TYPE_GUID module/bdev/gpt/gpt.h)
+ SPDK_GPT_GUID=${SPDK_GPT_GUID//, /-} SPDK_GPT_GUID=${SPDK_GPT_GUID//0x/}
+ sgdisk -t "1:$SPDK_GPT_GUID" "$gpt_nvme"
+ sgdisk -t "2:$SPDK_GPT_GUID" "$gpt_nvme"
+ "$rootdir/scripts/setup.sh"
+ "$rpc_py" bdev_get_bdevs
+ setup_nvme_conf
+ else
+ printf 'Did not find any nvme block devices to work with, aborting the test\n' >&2
+ "$rootdir/scripts/setup.sh"
+ return 1
+ fi
+ else
+ # Not supported platform or missing tooling, nothing to be done, simply exit the test
+ # in a graceful manner.
+ trap - SIGINT SIGTERM EXIT
+ killprocess "$spdk_tgt_pid"
+ cleanup
+ exit 0
+ fi
+}
+
+function setup_crypto_aesni_conf() {
+ # Malloc0 and Malloc1 use AESNI
+ "$rpc_py" <<- RPC
+ bdev_malloc_create -b Malloc0 16 512
+ bdev_malloc_create -b Malloc1 16 512
+ bdev_crypto_create Malloc0 crypto_ram crypto_aesni_mb 0123456789123456
+ bdev_crypto_create Malloc1 crypto_ram2 crypto_aesni_mb 9012345678912345
+ RPC
+}
+
+function setup_crypto_qat_conf() {
+ # Malloc0 will use QAT AES_CBC
+ # Malloc1 will use QAT AES_XTS
+ "$rpc_py" <<- RPC
+ bdev_malloc_create -b Malloc0 16 512
+ bdev_malloc_create -b Malloc1 16 512
+ bdev_crypto_create Malloc0 crypto_ram crypto_qat 0123456789123456
+ bdev_crypto_create -c AES_XTS -k2 0123456789123456 Malloc1 crypto_ram3 crypto_qat 0123456789123456
+ RPC
+ "$rpc_py" bdev_get_bdevs -b Malloc1
+}
+
+function setup_pmem_conf() {
+ if hash pmempool; then
+ rm -f "$SPDK_TEST_STORAGE/spdk-pmem-pool"
+ pmempool create blk --size=32M 512 "$SPDK_TEST_STORAGE/spdk-pmem-pool"
+ "$rpc_py" bdev_pmem_create -n Pmem0 "$SPDK_TEST_STORAGE/spdk-pmem-pool"
+ else
+ return 1
+ fi
+}
+
+function setup_rbd_conf() {
+ timing_enter rbd_setup
+ rbd_setup 127.0.0.1
+ timing_exit rbd_setup
+
+ "$rpc_py" bdev_rbd_create -b Ceph0 rbd foo 512
+}
+
+function bdev_bounds() {
+ $testdir/bdevio/bdevio -w -s $PRE_RESERVED_MEM --json "$conf_file" &
+ bdevio_pid=$!
+ trap 'killprocess $bdevio_pid; exit 1' SIGINT SIGTERM EXIT
+ echo "Process bdevio pid: $bdevio_pid"
+ waitforlisten $bdevio_pid
+ $testdir/bdevio/tests.py perform_tests
+ killprocess $bdevio_pid
+ trap - SIGINT SIGTERM EXIT
+}
+
+function nbd_function_test() {
+ if [ $(uname -s) = Linux ] && modprobe -n nbd; then
+ local rpc_server=/var/tmp/spdk-nbd.sock
+ local conf=$1
+ local nbd_all=($(ls /dev/nbd* | grep -v p))
+ local bdev_all=($bdevs_name)
+ local nbd_num=${#bdevs_all[@]}
+ if [ ${#nbd_all[@]} -le $nbd_num ]; then
+ nbd_num=${#nbd_all[@]}
+ fi
+ local nbd_list=(${nbd_all[@]:0:$nbd_num})
+ local bdev_list=(${bdev_all[@]:0:$nbd_num})
+
+ if [ ! -e $conf ]; then
+ return 1
+ fi
+
+ modprobe nbd
+ $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 --json "$conf" &
+ nbd_pid=$!
+ trap 'killprocess $nbd_pid; exit 1' SIGINT SIGTERM EXIT
+ echo "Process nbd pid: $nbd_pid"
+ waitforlisten $nbd_pid $rpc_server
+
+ nbd_rpc_start_stop_verify $rpc_server "${bdev_list[*]}"
+ nbd_rpc_data_verify $rpc_server "${bdev_list[*]}" "${nbd_list[*]}"
+
+ killprocess $nbd_pid
+ trap - SIGINT SIGTERM EXIT
+ fi
+
+ return 0
+}
+
+function fio_test_suite() {
+ # Generate the fio config file given the list of all unclaimed bdevs
+ fio_config_gen $testdir/bdev.fio verify AIO
+ for b in $(echo $bdevs | jq -r '.name'); do
+ echo "[job_$b]" >> $testdir/bdev.fio
+ echo "filename=$b" >> $testdir/bdev.fio
+ done
+
+ local fio_params="--ioengine=spdk_bdev --iodepth=8 --bs=4k --runtime=10 $testdir/bdev.fio --spdk_json_conf=$conf_file"
+
+ run_test "bdev_fio_rw_verify" fio_bdev $fio_params --spdk_mem=$PRE_RESERVED_MEM \
+ --output=$output_dir/blockdev_fio_verify.txt
+ rm -f ./*.state
+ rm -f $testdir/bdev.fio
+
+ # Generate the fio config file given the list of all unclaimed bdevs that support unmap
+ fio_config_gen $testdir/bdev.fio trim
+ if [ "$(echo $bdevs | jq -r 'select(.supported_io_types.unmap == true) | .name')" != "" ]; then
+ for b in $(echo $bdevs | jq -r 'select(.supported_io_types.unmap == true) | .name'); do
+ echo "[job_$b]" >> $testdir/bdev.fio
+ echo "filename=$b" >> $testdir/bdev.fio
+ done
+ else
+ rm -f $testdir/bdev.fio
+ return 0
+ fi
+
+ run_test "bdev_fio_trim" fio_bdev $fio_params --output=$output_dir/blockdev_trim.txt
+ rm -f ./*.state
+ rm -f $testdir/bdev.fio
+}
+
+function get_io_result() {
+ local limit_type=$1
+ local qos_dev=$2
+ local iostat_result
+ iostat_result=$($rootdir/scripts/iostat.py -d -i 1 -t $QOS_RUN_TIME | grep $qos_dev | tail -1)
+ if [ $limit_type = IOPS ]; then
+ iostat_result=$(awk '{print $2}' <<< $iostat_result)
+ elif [ $limit_type = BANDWIDTH ]; then
+ iostat_result=$(awk '{print $6}' <<< $iostat_result)
+ fi
+
+ echo ${iostat_result/.*/}
+}
+
+function run_qos_test() {
+ local qos_limit=$1
+ local qos_result=0
+
+ qos_result=$(get_io_result $2 $3)
+ if [ $2 = BANDWIDTH ]; then
+ qos_limit=$((qos_limit * 1024))
+ fi
+ lower_limit=$((qos_limit * 9 / 10))
+ upper_limit=$((qos_limit * 11 / 10))
+
+ # QoS realization is related with bytes transfered. It currently has some variation.
+ if [ $qos_result -lt $lower_limit ] || [ $qos_result -gt $upper_limit ]; then
+ echo "Failed to limit the io read rate of NULL bdev by qos"
+ $rpc_py bdev_malloc_delete $QOS_DEV_1
+ $rpc_py bdev_null_delete $QOS_DEV_2
+ killprocess $QOS_PID
+ exit 1
+ fi
+}
+
+function qos_function_test() {
+ local qos_lower_iops_limit=1000
+ local qos_lower_bw_limit=2
+ local io_result=0
+ local iops_limit=0
+ local bw_limit=0
+
+ io_result=$(get_io_result IOPS $QOS_DEV_1)
+ # Set the IOPS limit as one quarter of the measured performance without QoS
+ iops_limit=$(((io_result / 4) / qos_lower_iops_limit * qos_lower_iops_limit))
+ if [ $iops_limit -gt $qos_lower_iops_limit ]; then
+
+ # Run bdevperf with IOPS rate limit on bdev 1
+ $rpc_py bdev_set_qos_limit --rw_ios_per_sec $iops_limit $QOS_DEV_1
+ run_test "bdev_qos_iops" run_qos_test $iops_limit IOPS $QOS_DEV_1
+
+ # Run bdevperf with bandwidth rate limit on bdev 2
+ # Set the bandwidth limit as 1/10 of the measure performance without QoS
+ bw_limit=$(get_io_result BANDWIDTH $QOS_DEV_2)
+ bw_limit=$((bw_limit / 1024 / 10))
+ if [ $bw_limit -lt $qos_lower_bw_limit ]; then
+ bw_limit=$qos_lower_bw_limit
+ fi
+ $rpc_py bdev_set_qos_limit --rw_mbytes_per_sec $bw_limit $QOS_DEV_2
+ run_test "bdev_qos_bw" run_qos_test $bw_limit BANDWIDTH $QOS_DEV_2
+
+ # Run bdevperf with additional read only bandwidth rate limit on bdev 1
+ $rpc_py bdev_set_qos_limit --r_mbytes_per_sec $qos_lower_bw_limit $QOS_DEV_1
+ run_test "bdev_qos_ro_bw" run_qos_test $qos_lower_bw_limit BANDWIDTH $QOS_DEV_1
+ else
+ echo "Actual IOPS without limiting is too low - exit testing"
+ fi
+}
+
+function qos_test_suite() {
+ # Run bdevperf with QoS disabled first
+ "$testdir/bdevperf/bdevperf" -z -m 0x2 -q 256 -o 4096 -w randread -t 60 &
+ QOS_PID=$!
+ echo "Process qos testing pid: $QOS_PID"
+ trap 'killprocess $QOS_PID; exit 1' SIGINT SIGTERM EXIT
+ waitforlisten $QOS_PID
+
+ $rpc_py bdev_malloc_create -b $QOS_DEV_1 128 512
+ waitforbdev $QOS_DEV_1
+ $rpc_py bdev_null_create $QOS_DEV_2 128 512
+ waitforbdev $QOS_DEV_2
+
+ $rootdir/test/bdev/bdevperf/bdevperf.py perform_tests &
+ qos_function_test
+
+ $rpc_py bdev_malloc_delete $QOS_DEV_1
+ $rpc_py bdev_null_delete $QOS_DEV_2
+ killprocess $QOS_PID
+ trap - SIGINT SIGTERM EXIT
+}
+
+# Inital bdev creation and configuration
+#-----------------------------------------------------
+QOS_DEV_1="Malloc_0"
+QOS_DEV_2="Null_1"
+QOS_RUN_TIME=5
+
+if [ $(uname -s) = Linux ]; then
+ # Test dynamic memory management. All hugepages will be reserved at runtime
+ PRE_RESERVED_MEM=0
+else
+ # Dynamic memory management is not supported on BSD
+ PRE_RESERVED_MEM=2048
+fi
+
+test_type=${1:-bdev}
+start_spdk_tgt
+case "$test_type" in
+ bdev)
+ setup_bdev_conf
+ ;;
+ nvme)
+ setup_nvme_conf
+ ;;
+ gpt)
+ setup_gpt_conf
+ ;;
+ crypto_aesni)
+ setup_crypto_aesni_conf
+ ;;
+ crypto_qat)
+ setup_crypto_qat_conf
+ ;;
+ pmem)
+ setup_pmem_conf
+ ;;
+ rbd)
+ setup_rbd_conf
+ ;;
+ *)
+ echo "invalid test name"
+ exit 1
+ ;;
+esac
+
+# Generate json config and use it throughout all the tests
+cat <<- CONF > "$conf_file"
+ {"subsystems":[
+ $("$rpc_py" save_subsystem_config -n bdev)
+ ]}
+CONF
+
+bdevs=$("$rpc_py" bdev_get_bdevs | jq -r '.[] | select(.claimed == false)')
+bdevs_name=$(echo $bdevs | jq -r '.name')
+bdev_list=($bdevs_name)
+hello_world_bdev=${bdev_list[0]}
+trap - SIGINT SIGTERM EXIT
+killprocess "$spdk_tgt_pid"
+# End bdev configuration
+#-----------------------------------------------------
+
+run_test "bdev_hello_world" $SPDK_EXAMPLE_DIR/hello_bdev --json "$conf_file" -b "$hello_world_bdev"
+run_test "bdev_bounds" bdev_bounds
+run_test "bdev_nbd" nbd_function_test $conf_file "$bdevs_name"
+if [[ $CONFIG_FIO_PLUGIN == y ]]; then
+ if [ "$test_type" = "nvme" ] || [ "$test_type" = "gpt" ]; then
+ # TODO: once we get real multi-ns drives, re-enable this test for NVMe.
+ echo "skipping fio tests on NVMe due to multi-ns failures."
+ else
+ run_test "bdev_fio" fio_test_suite
+ fi
+else
+ echo "FIO not available"
+ exit 1
+fi
+
+run_test "bdev_verify" $testdir/bdevperf/bdevperf --json "$conf_file" -q 128 -o 4096 -w verify -t 5 -C -m 0x3
+run_test "bdev_write_zeroes" $testdir/bdevperf/bdevperf --json "$conf_file" -q 128 -o 4096 -w write_zeroes -t 1
+
+if [[ $test_type == bdev ]]; then
+ run_test "bdev_qos" qos_test_suite
+fi
+
+# Temporarily disabled - infinite loop
+# if [ $RUN_NIGHTLY -eq 1 ]; then
+# run_test "bdev_reset" $testdir/bdevperf/bdevperf --json "$conf_file" -q 16 -w reset -o 4096 -t 60
+# fi
+
+# Bdev and configuration cleanup below this line
+#-----------------------------------------------------
+if [ "$test_type" = "gpt" ]; then
+ "$rootdir/scripts/setup.sh" reset
+ sleep 1s
+ if [[ -b $gpt_nvme ]]; then
+ dd if=/dev/zero of="$gpt_nvme" bs=4096 count=8 oflag=direct
+ fi
+fi
+
+cleanup
diff --git a/src/spdk/test/bdev/nbd_common.sh b/src/spdk/test/bdev/nbd_common.sh
new file mode 100644
index 000000000..2ea765649
--- /dev/null
+++ b/src/spdk/test/bdev/nbd_common.sh
@@ -0,0 +1,123 @@
+set -e
+
+function nbd_start_disks() {
+ local rpc_server=$1
+ local bdev_list=($2)
+ local nbd_list=($3)
+ local i
+
+ for ((i = 0; i < ${#nbd_list[@]}; i++)); do
+ $rootdir/scripts/rpc.py -s $rpc_server nbd_start_disk ${bdev_list[$i]} ${nbd_list[$i]}
+ # Wait for nbd device ready
+ waitfornbd $(basename ${nbd_list[$i]})
+ done
+}
+
+function nbd_start_disks_without_nbd_idx() {
+ local rpc_server=$1
+ local bdev_list=($2)
+ local i
+ local nbd_device
+
+ for ((i = 0; i < ${#bdev_list[@]}; i++)); do
+ nbd_device=$($rootdir/scripts/rpc.py -s $rpc_server nbd_start_disk ${bdev_list[$i]})
+ # Wait for nbd device ready
+ waitfornbd $(basename ${nbd_device})
+ done
+}
+
+function waitfornbd_exit() {
+ local nbd_name=$1
+
+ for ((i = 1; i <= 20; i++)); do
+ if grep -q -w $nbd_name /proc/partitions; then
+ sleep 0.1
+ else
+ break
+ fi
+ done
+
+ return 0
+}
+
+function nbd_stop_disks() {
+ local rpc_server=$1
+ local nbd_list=($2)
+ local i
+
+ for i in "${nbd_list[@]}"; do
+ $rootdir/scripts/rpc.py -s $rpc_server nbd_stop_disk $i
+ waitfornbd_exit $(basename $i)
+ done
+}
+
+function nbd_get_count() {
+ # return = count of spdk nbd devices
+ local rpc_server=$1
+
+ nbd_disks_json=$($rootdir/scripts/rpc.py -s $rpc_server nbd_get_disks)
+ nbd_disks_name=$(echo "${nbd_disks_json}" | jq -r '.[] | .nbd_device')
+ count=$(echo "${nbd_disks_name}" | grep -c /dev/nbd || true)
+ echo $count
+}
+
+function nbd_dd_data_verify() {
+ local nbd_list=($1)
+ local operation=$2
+ local tmp_file=$SPDK_TEST_STORAGE/nbdrandtest
+
+ if [ "$operation" = "write" ]; then
+ # data write
+ dd if=/dev/urandom of=$tmp_file bs=4096 count=256
+ for i in "${nbd_list[@]}"; do
+ dd if=$tmp_file of=$i bs=4096 count=256 oflag=direct
+ done
+ elif [ "$operation" = "verify" ]; then
+ # data read and verify
+ for i in "${nbd_list[@]}"; do
+ cmp -b -n 1M $tmp_file $i
+ done
+ rm $tmp_file
+ fi
+}
+
+function nbd_rpc_data_verify() {
+ local rpc_server=$1
+ local bdev_list=($2)
+ local nbd_list=($3)
+
+ nbd_start_disks $rpc_server "${bdev_list[*]}" "${nbd_list[*]}"
+ count=$(nbd_get_count $rpc_server)
+ if [ $count -ne ${#nbd_list[@]} ]; then
+ return 1
+ fi
+
+ nbd_dd_data_verify "${nbd_list[*]}" "write"
+ nbd_dd_data_verify "${nbd_list[*]}" "verify"
+
+ nbd_stop_disks $rpc_server "${nbd_list[*]}"
+ count=$(nbd_get_count $rpc_server)
+ if [ $count -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+function nbd_rpc_start_stop_verify() {
+ local rpc_server=$1
+ local bdev_list=($2)
+
+ nbd_start_disks_without_nbd_idx $rpc_server "${bdev_list[*]}"
+
+ nbd_disks_json=$($rootdir/scripts/rpc.py -s $rpc_server nbd_get_disks)
+ nbd_disks_name=($(echo "${nbd_disks_json}" | jq -r '.[] | .nbd_device'))
+ nbd_stop_disks $rpc_server "${nbd_disks_name[*]}"
+
+ count=$(nbd_get_count $rpc_server)
+ if [ $count -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}