From 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 20:24:20 +0200 Subject: Adding upstream version 14.2.21. Signed-off-by: Daniel Baumann --- src/spdk/test/bdev/bdevperf/bdevperf.c | 1035 ++++++++++++++++++++++++++++++++ 1 file changed, 1035 insertions(+) create mode 100644 src/spdk/test/bdev/bdevperf/bdevperf.c (limited to 'src/spdk/test/bdev/bdevperf/bdevperf.c') diff --git a/src/spdk/test/bdev/bdevperf/bdevperf.c b/src/spdk/test/bdev/bdevperf/bdevperf.c new file mode 100644 index 00000000..1416ea27 --- /dev/null +++ b/src/spdk/test/bdev/bdevperf/bdevperf.c @@ -0,0 +1,1035 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" +#include "spdk/copy_engine.h" +#include "spdk/endian.h" +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/util.h" +#include "spdk/thread.h" +#include "spdk/string.h" + +struct bdevperf_task { + struct iovec iov; + struct io_target *target; + void *buf; + uint64_t offset_blocks; + enum spdk_bdev_io_type io_type; + TAILQ_ENTRY(bdevperf_task) link; + struct spdk_bdev_io_wait_entry bdev_io_wait; +}; + +static const char *g_workload_type; +static int g_io_size = 0; +/* initialize to invalid value so we can detect if user overrides it. */ +static int g_rw_percentage = -1; +static int g_is_random; +static bool g_verify = false; +static bool g_reset = false; +static bool g_unmap = false; +static bool g_write_zeroes = false; +static bool g_flush = false; +static int g_queue_depth; +static uint64_t g_time_in_usec; +static int g_show_performance_real_time = 0; +static uint64_t g_show_performance_period_in_usec = 1000000; +static uint64_t g_show_performance_period_num = 0; +static uint64_t g_show_performance_ema_period = 0; +static bool g_run_failed = false; +static bool g_shutdown = false; +static uint64_t g_shutdown_tsc; +static bool g_zcopy = true; +static unsigned g_master_core; +static int g_time_in_sec; +static bool g_mix_specified; + +static struct spdk_poller *g_perf_timer = NULL; + +static void bdevperf_submit_single(struct io_target *target, struct bdevperf_task *task); + +struct io_target { + char *name; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *ch; + struct io_target *next; + unsigned lcore; + uint64_t io_completed; + uint64_t prev_io_completed; + double ema_io_per_second; + int current_queue_depth; + uint64_t size_in_ios; + uint64_t offset_in_ios; + uint64_t io_size_blocks; + bool is_draining; + struct spdk_poller *run_timer; + struct spdk_poller *reset_timer; + TAILQ_HEAD(, bdevperf_task) task_list; +}; + +struct io_target **g_head; +uint32_t *coremap; +static int g_target_count = 0; + +/* + * Used to determine how the I/O buffers should be aligned. + * This alignment will be bumped up for blockdevs that + * require alignment based on block length - for example, + * AIO blockdevs. + */ +static size_t g_min_alignment = 8; + +static int +blockdev_heads_init(void) +{ + uint32_t i, idx = 0; + uint32_t core_count = spdk_env_get_core_count(); + + g_head = calloc(core_count, sizeof(struct io_target *)); + if (!g_head) { + fprintf(stderr, "Cannot allocate g_head array with size=%u\n", + core_count); + return -1; + } + + coremap = calloc(core_count, sizeof(uint32_t)); + if (!coremap) { + free(g_head); + fprintf(stderr, "Cannot allocate coremap array with size=%u\n", + core_count); + return -1; + } + + SPDK_ENV_FOREACH_CORE(i) { + coremap[idx++] = i; + } + + return 0; +} + +static void +bdevperf_free_target(struct io_target *target) +{ + struct bdevperf_task *task, *tmp; + + TAILQ_FOREACH_SAFE(task, &target->task_list, link, tmp) { + TAILQ_REMOVE(&target->task_list, task, link); + spdk_dma_free(task->buf); + free(task); + } + + free(target->name); + free(target); +} + +static void +blockdev_heads_destroy(void) +{ + uint32_t i, core_count; + struct io_target *target, *next_target; + + if (!g_head) { + return; + } + + core_count = spdk_env_get_core_count(); + for (i = 0; i < core_count; i++) { + target = g_head[i]; + while (target != NULL) { + next_target = target->next; + bdevperf_free_target(target); + target = next_target; + } + } + + free(g_head); + free(coremap); +} + +static void +bdevperf_construct_targets(void) +{ + int index = 0; + struct spdk_bdev *bdev; + struct io_target *target; + size_t align; + int rc; + + bdev = spdk_bdev_first_leaf(); + while (bdev != NULL) { + + if (g_unmap && !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { + printf("Skipping %s because it does not support unmap\n", spdk_bdev_get_name(bdev)); + bdev = spdk_bdev_next_leaf(bdev); + continue; + } + + target = malloc(sizeof(struct io_target)); + if (!target) { + fprintf(stderr, "Unable to allocate memory for new target.\n"); + /* Return immediately because all mallocs will presumably fail after this */ + return; + } + + target->name = strdup(spdk_bdev_get_name(bdev)); + if (!target->name) { + fprintf(stderr, "Unable to allocate memory for target name.\n"); + free(target); + /* Return immediately because all mallocs will presumably fail after this */ + return; + } + + rc = spdk_bdev_open(bdev, true, NULL, NULL, &target->bdev_desc); + if (rc != 0) { + SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(bdev), rc); + free(target->name); + free(target); + bdev = spdk_bdev_next_leaf(bdev); + continue; + } + + target->bdev = bdev; + /* Mapping each target to lcore */ + index = g_target_count % spdk_env_get_core_count(); + target->next = g_head[index]; + target->lcore = coremap[index]; + target->io_completed = 0; + target->current_queue_depth = 0; + target->offset_in_ios = 0; + target->io_size_blocks = g_io_size / spdk_bdev_get_block_size(bdev); + if (target->io_size_blocks == 0 || + (g_io_size % spdk_bdev_get_block_size(bdev)) != 0) { + SPDK_ERRLOG("IO size (%d) is bigger than blocksize of bdev %s (%"PRIu32") or not a blocksize multiple\n", + g_io_size, spdk_bdev_get_name(bdev), spdk_bdev_get_block_size(bdev)); + spdk_bdev_close(target->bdev_desc); + free(target->name); + free(target); + bdev = spdk_bdev_next_leaf(bdev); + continue; + } + + target->size_in_ios = spdk_bdev_get_num_blocks(bdev) / target->io_size_blocks; + align = spdk_bdev_get_buf_align(bdev); + /* + * TODO: This should actually use the LCM of align and g_min_alignment, but + * it is fairly safe to assume all alignments are powers of two for now. + */ + g_min_alignment = spdk_max(g_min_alignment, align); + + target->is_draining = false; + target->run_timer = NULL; + target->reset_timer = NULL; + TAILQ_INIT(&target->task_list); + + g_head[index] = target; + g_target_count++; + + bdev = spdk_bdev_next_leaf(bdev); + } +} + +static void +end_run(void *arg1, void *arg2) +{ + struct io_target *target = arg1; + + spdk_put_io_channel(target->ch); + spdk_bdev_close(target->bdev_desc); + if (--g_target_count == 0) { + if (g_show_performance_real_time) { + spdk_poller_unregister(&g_perf_timer); + } + if (g_run_failed) { + spdk_app_stop(1); + } else { + spdk_app_stop(0); + } + } +} + +static void +bdevperf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct io_target *target; + struct bdevperf_task *task = cb_arg; + struct spdk_event *complete; + struct iovec *iovs; + int iovcnt; + + target = task->target; + + if (!success) { + if (!g_reset) { + target->is_draining = true; + g_run_failed = true; + printf("task offset: %lu on target bdev=%s fails\n", + task->offset_blocks, target->name); + } + } else if (g_verify || g_reset) { + spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); + assert(iovcnt == 1); + assert(iovs != NULL); + if (memcmp(task->buf, iovs[0].iov_base, g_io_size) != 0) { + printf("Buffer mismatch! Disk Offset: %lu\n", task->offset_blocks); + target->is_draining = true; + g_run_failed = true; + } + } + + target->current_queue_depth--; + + if (success) { + target->io_completed++; + } + + spdk_bdev_free_io(bdev_io); + + /* + * is_draining indicates when time has expired for the test run + * and we are just waiting for the previously submitted I/O + * to complete. In this case, do not submit a new I/O to replace + * the one just completed. + */ + if (!target->is_draining) { + bdevperf_submit_single(target, task); + } else { + TAILQ_INSERT_TAIL(&target->task_list, task, link); + if (target->current_queue_depth == 0) { + complete = spdk_event_allocate(g_master_core, end_run, target, NULL); + spdk_event_call(complete); + } + } +} + +static void +bdevperf_verify_submit_read(void *cb_arg) +{ + struct io_target *target; + struct bdevperf_task *task = cb_arg; + int rc; + + target = task->target; + + /* Read the data back in */ + rc = spdk_bdev_read_blocks(target->bdev_desc, target->ch, NULL, task->offset_blocks, + target->io_size_blocks, bdevperf_complete, task); + if (rc == -ENOMEM) { + task->bdev_io_wait.bdev = target->bdev; + task->bdev_io_wait.cb_fn = bdevperf_verify_submit_read; + task->bdev_io_wait.cb_arg = task; + spdk_bdev_queue_io_wait(target->bdev, target->ch, &task->bdev_io_wait); + } else if (rc != 0) { + printf("Failed to submit read: %d\n", rc); + target->is_draining = true; + g_run_failed = true; + } +} + +static void +bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + if (success) { + spdk_bdev_free_io(bdev_io); + bdevperf_verify_submit_read(cb_arg); + } else { + bdevperf_complete(bdev_io, success, cb_arg); + } +} + +static __thread unsigned int seed = 0; + +static void +bdevperf_prep_task(struct bdevperf_task *task) +{ + struct io_target *target = task->target; + uint64_t offset_in_ios; + + if (g_is_random) { + offset_in_ios = rand_r(&seed) % target->size_in_ios; + } else { + offset_in_ios = target->offset_in_ios++; + if (target->offset_in_ios == target->size_in_ios) { + target->offset_in_ios = 0; + } + } + + task->offset_blocks = offset_in_ios * target->io_size_blocks; + if (g_verify || g_reset) { + memset(task->buf, rand_r(&seed) % 256, g_io_size); + task->iov.iov_base = task->buf; + task->iov.iov_len = g_io_size; + task->io_type = SPDK_BDEV_IO_TYPE_WRITE; + } else if (g_flush) { + task->io_type = SPDK_BDEV_IO_TYPE_FLUSH; + } else if (g_unmap) { + task->io_type = SPDK_BDEV_IO_TYPE_UNMAP; + } else if (g_write_zeroes) { + task->io_type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; + } else if ((g_rw_percentage == 100) || + (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) { + task->io_type = SPDK_BDEV_IO_TYPE_READ; + } else { + task->iov.iov_base = task->buf; + task->iov.iov_len = g_io_size; + task->io_type = SPDK_BDEV_IO_TYPE_WRITE; + } +} + +static void +bdevperf_submit_task(void *arg) +{ + struct bdevperf_task *task = arg; + struct io_target *target = task->target; + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + spdk_bdev_io_completion_cb cb_fn; + void *rbuf; + int rc; + + desc = target->bdev_desc; + ch = target->ch; + + switch (task->io_type) { + case SPDK_BDEV_IO_TYPE_WRITE: + cb_fn = (g_verify || g_reset) ? bdevperf_verify_write_complete : bdevperf_complete; + rc = spdk_bdev_writev_blocks(desc, ch, &task->iov, 1, task->offset_blocks, + target->io_size_blocks, cb_fn, task); + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + rc = spdk_bdev_flush_blocks(desc, ch, task->offset_blocks, + target->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_UNMAP: + rc = spdk_bdev_unmap_blocks(desc, ch, task->offset_blocks, + target->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + rc = spdk_bdev_write_zeroes_blocks(desc, ch, task->offset_blocks, + target->io_size_blocks, bdevperf_complete, task); + break; + case SPDK_BDEV_IO_TYPE_READ: + rbuf = g_zcopy ? NULL : task->buf; + rc = spdk_bdev_read_blocks(desc, ch, rbuf, task->offset_blocks, + target->io_size_blocks, bdevperf_complete, task); + break; + default: + assert(false); + rc = -EINVAL; + break; + } + + if (rc == -ENOMEM) { + task->bdev_io_wait.bdev = target->bdev; + task->bdev_io_wait.cb_fn = bdevperf_submit_task; + task->bdev_io_wait.cb_arg = task; + spdk_bdev_queue_io_wait(target->bdev, ch, &task->bdev_io_wait); + return; + } else if (rc != 0) { + printf("Failed to submit bdev_io: %d\n", rc); + target->is_draining = true; + g_run_failed = true; + return; + } + + target->current_queue_depth++; +} + +static void +bdevperf_submit_single(struct io_target *target, struct bdevperf_task *task) +{ + if (!task) { + if (!TAILQ_EMPTY(&target->task_list)) { + task = TAILQ_FIRST(&target->task_list); + TAILQ_REMOVE(&target->task_list, task, link); + } else { + printf("Task allocation failed\n"); + abort(); + } + } + + bdevperf_prep_task(task); + bdevperf_submit_task(task); +} + +static void +bdevperf_submit_io(struct io_target *target, int queue_depth) +{ + while (queue_depth-- > 0) { + bdevperf_submit_single(target, NULL); + } +} + +static int +end_target(void *arg) +{ + struct io_target *target = arg; + + spdk_poller_unregister(&target->run_timer); + if (g_reset) { + spdk_poller_unregister(&target->reset_timer); + } + + target->is_draining = true; + + return -1; +} + +static int reset_target(void *arg); + +static void +reset_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bdevperf_task *task = cb_arg; + struct io_target *target = task->target; + + if (!success) { + printf("Reset blockdev=%s failed\n", spdk_bdev_get_name(target->bdev)); + target->is_draining = true; + g_run_failed = true; + } + + TAILQ_INSERT_TAIL(&target->task_list, task, link); + spdk_bdev_free_io(bdev_io); + + target->reset_timer = spdk_poller_register(reset_target, target, + 10 * 1000000); +} + +static int +reset_target(void *arg) +{ + struct io_target *target = arg; + struct bdevperf_task *task = NULL; + int rc; + + spdk_poller_unregister(&target->reset_timer); + + /* Do reset. */ + task = TAILQ_FIRST(&target->task_list); + if (!task) { + printf("Task allocation failed\n"); + abort(); + } + TAILQ_REMOVE(&target->task_list, task, link); + + rc = spdk_bdev_reset(target->bdev_desc, target->ch, + reset_cb, task); + if (rc) { + printf("Reset failed: %d\n", rc); + target->is_draining = true; + g_run_failed = true; + } + + return -1; +} + +static void +bdevperf_submit_on_core(void *arg1, void *arg2) +{ + struct io_target *target = arg1; + + /* Submit initial I/O for each block device. Each time one + * completes, another will be submitted. */ + while (target != NULL) { + target->ch = spdk_bdev_get_io_channel(target->bdev_desc); + if (!target->ch) { + printf("Skip this device (%s) as IO channel not setup.\n", + spdk_bdev_get_name(target->bdev)); + g_target_count--; + g_run_failed = true; + spdk_bdev_close(target->bdev_desc); + + target = target->next; + continue; + } + + /* Start a timer to stop this I/O chain when the run is over */ + target->run_timer = spdk_poller_register(end_target, target, + g_time_in_usec); + if (g_reset) { + target->reset_timer = spdk_poller_register(reset_target, target, + 10 * 1000000); + } + bdevperf_submit_io(target, g_queue_depth); + target = target->next; + } +} + +static void +bdevperf_usage(void) +{ + printf(" -q io depth\n"); + printf(" -o io size in bytes\n"); + printf(" -w io pattern type, must be one of (read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n"); + printf(" -t