diff options
Diffstat (limited to 'src/spdk/dpdk/app/test-compress-perf')
14 files changed, 3568 insertions, 0 deletions
diff --git a/src/spdk/dpdk/app/test-compress-perf/Makefile b/src/spdk/dpdk/app/test-compress-perf/Makefile new file mode 100644 index 000000000..2bff53183 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +APP = dpdk-test-compress-perf + +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -O3 + +# all source are stored in SRCS-y +SRCS-y := main.c +SRCS-y += comp_perf_options_parse.c +SRCS-y += comp_perf_test_verify.c +SRCS-y += comp_perf_test_throughput.c +SRCS-y += comp_perf_test_cyclecount.c +SRCS-y += comp_perf_test_common.c + +include $(RTE_SDK)/mk/rte.app.mk diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf.h new file mode 100644 index 000000000..997d46b59 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_ +#define _COMP_PERF_ + +#include <rte_mempool.h> + +struct comp_test_data; + +typedef void *(*cperf_constructor_t)( + uint8_t dev_id, + uint16_t qp_id, + struct comp_test_data *options); + +typedef int (*cperf_runner_t)(void *test_ctx); +typedef void (*cperf_destructor_t)(void *test_ctx); + +struct cperf_test { + cperf_constructor_t constructor; + cperf_runner_t runner; + cperf_destructor_t destructor; +}; + +/* Needed for weak functions*/ + +void * +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused); + +void +cperf_throughput_test_destructor(void *arg __rte_unused); + +int +cperf_throughput_test_runner(void *test_ctx __rte_unused); + +void * +cperf_verify_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused); + +void +cperf_verify_test_destructor(void *arg __rte_unused); + +int +cperf_verify_test_runner(void *test_ctx __rte_unused); + +#endif /* _COMP_PERF_ */ diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_options.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf_options.h new file mode 100644 index 000000000..0b777521c --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_options.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _COMP_PERF_OPS_ +#define _COMP_PERF_OPS_ + +#define MAX_LIST 32 +#define MIN_COMPRESSED_BUF_SIZE 8 +#define EXPANSE_RATIO 1.1 +#define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM) +#define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO)) + +extern const char *comp_perf_test_type_strs[]; + +/* Cleanup state machine */ +enum cleanup_st { + ST_CLEAR = 0, + ST_TEST_DATA, + ST_COMPDEV, + ST_INPUT_DATA, + ST_MEMORY_ALLOC, + ST_DURING_TEST +}; + +enum cperf_test_type { + CPERF_TEST_TYPE_THROUGHPUT, + CPERF_TEST_TYPE_VERIFY, + CPERF_TEST_TYPE_PMDCC +}; + +enum comp_operation { + COMPRESS_ONLY, + DECOMPRESS_ONLY, + COMPRESS_DECOMPRESS +}; + +struct range_list { + uint8_t min; + uint8_t max; + uint8_t inc; + uint8_t count; + uint8_t list[MAX_LIST]; +}; + +struct comp_test_data { + char driver_name[RTE_DEV_NAME_MAX_LEN]; + char input_file[PATH_MAX]; + enum cperf_test_type test; + + uint8_t *input_data; + size_t input_data_sz; + uint16_t nb_qps; + uint16_t seg_sz; + uint16_t out_seg_sz; + uint16_t burst_sz; + uint32_t pool_sz; + uint32_t num_iter; + uint16_t max_sgl_segs; + uint32_t total_segs; + + enum rte_comp_huffman huffman_enc; + enum comp_operation test_op; + int window_sz; + struct range_list level_lst; + uint8_t level; + int use_external_mbufs; + + double ratio; + enum cleanup_st cleanup; + int perf_comp_force_stop; + + uint32_t cyclecount_delay; +}; + +int +comp_perf_options_parse(struct comp_test_data *test_data, int argc, + char **argv); + +void +comp_perf_options_default(struct comp_test_data *test_data); + +int +comp_perf_options_check(struct comp_test_data *test_data); + +#endif diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_options_parse.c b/src/spdk/dpdk/app/test-compress-perf/comp_perf_options_parse.c new file mode 100644 index 000000000..04a8d2fbe --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_options_parse.c @@ -0,0 +1,675 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <getopt.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <errno.h> + +#include <rte_string_fns.h> +#include <rte_comp.h> + +#include "comp_perf_options.h" + +#define CPERF_PTEST_TYPE ("ptest") +#define CPERF_DRIVER_NAME ("driver-name") +#define CPERF_TEST_FILE ("input-file") +#define CPERF_SEG_SIZE ("seg-sz") +#define CPERF_BURST_SIZE ("burst-sz") +#define CPERF_EXTENDED_SIZE ("extended-input-sz") +#define CPERF_POOL_SIZE ("pool-sz") +#define CPERF_MAX_SGL_SEGS ("max-num-sgl-segs") +#define CPERF_NUM_ITER ("num-iter") +#define CPERF_OPTYPE ("operation") +#define CPERF_HUFFMAN_ENC ("huffman-enc") +#define CPERF_LEVEL ("compress-level") +#define CPERF_WINDOW_SIZE ("window-sz") +#define CPERF_EXTERNAL_MBUFS ("external-mbufs") + +/* cyclecount-specific options */ +#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us") + +struct name_id_map { + const char *name; + uint32_t id; +}; + +static void +usage(char *progname) +{ + printf("%s [EAL options] --\n" + " --ptest throughput / verify / pmd-cyclecount\n" + " --driver-name NAME: compress driver to use\n" + " --input-file NAME: file to compress and decompress\n" + " --extended-input-sz N: extend file data up to this size (default: no extension)\n" + " --seg-sz N: size of segment to store the data (default: 2048)\n" + " --burst-sz N: compress operation burst size\n" + " --pool-sz N: mempool size for compress operations/mbufs\n" + " (default: 8192)\n" + " --max-num-sgl-segs N: maximum number of segments for each mbuf\n" + " (default: 16)\n" + " --num-iter N: number of times the file will be\n" + " compressed/decompressed (default: 10000)\n" + " --operation [comp/decomp/comp_and_decomp]: perform test on\n" + " compression, decompression or both operations\n" + " --huffman-enc [fixed/dynamic/default]: Huffman encoding\n" + " (default: dynamic)\n" + " --compress-level N: compression level, which could be a single value, list or range\n" + " (default: range between 1 and 9)\n" + " --window-sz N: base two log value of compression window size\n" + " (e.g.: 15 => 32k, default: max supported by PMD)\n" + " --external-mbufs: use memzones as external buffers instead of\n" + " keeping the data directly in mbuf area\n" + " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n" + " valid only for cyclecount perf test (default: 500 us)\n" + " -h: prints this help\n", + progname); +} + +static int +get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len, + const char *str_key) +{ + unsigned int i; + + for (i = 0; i < map_len; i++) { + + if (strcmp(str_key, map[i].name) == 0) + return map[i].id; + } + + return -1; +} + +static int +parse_cperf_test_type(struct comp_test_data *test_data, const char *arg) +{ + struct name_id_map cperftest_namemap[] = { + { + comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT], + CPERF_TEST_TYPE_THROUGHPUT + }, + { + comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY], + CPERF_TEST_TYPE_VERIFY + }, + { + comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC], + CPERF_TEST_TYPE_PMDCC + } + }; + + int id = get_str_key_id_mapping( + (struct name_id_map *)cperftest_namemap, + RTE_DIM(cperftest_namemap), arg); + if (id < 0) { + RTE_LOG(ERR, USER1, "failed to parse test type"); + return -1; + } + + test_data->test = (enum cperf_test_type)id; + + return 0; +} + +static int +parse_uint32_t(uint32_t *value, const char *arg) +{ + char *end = NULL; + unsigned long n = strtoul(arg, &end, 10); + + if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + + if (n > UINT32_MAX) + return -ERANGE; + + *value = (uint32_t) n; + + return 0; +} + +static int +parse_uint16_t(uint16_t *value, const char *arg) +{ + uint32_t val = 0; + int ret = parse_uint32_t(&val, arg); + + if (ret < 0) + return ret; + + if (val > UINT16_MAX) + return -ERANGE; + + *value = (uint16_t) val; + + return 0; +} + +static int +parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc) +{ + char *token; + uint8_t number; + + char *copy_arg = strdup(arg); + + if (copy_arg == NULL) + return -1; + + errno = 0; + token = strtok(copy_arg, ":"); + + /* Parse minimum value */ + if (token != NULL) { + number = strtoul(token, NULL, 10); + + if (errno == EINVAL || errno == ERANGE) + goto err_range; + + *min = number; + } else + goto err_range; + + token = strtok(NULL, ":"); + + /* Parse increment value */ + if (token != NULL) { + number = strtoul(token, NULL, 10); + + if (errno == EINVAL || errno == ERANGE || + number == 0) + goto err_range; + + *inc = number; + } else + goto err_range; + + token = strtok(NULL, ":"); + + /* Parse maximum value */ + if (token != NULL) { + number = strtoul(token, NULL, 10); + + if (errno == EINVAL || errno == ERANGE || + number < *min) + goto err_range; + + *max = number; + } else + goto err_range; + + if (strtok(NULL, ":") != NULL) + goto err_range; + + free(copy_arg); + return 0; + +err_range: + free(copy_arg); + return -1; +} + +static int +parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max) +{ + char *token; + uint32_t number; + uint8_t count = 0; + uint32_t temp_min; + uint32_t temp_max; + + char *copy_arg = strdup(arg); + + if (copy_arg == NULL) + return -1; + + errno = 0; + token = strtok(copy_arg, ","); + + /* Parse first value */ + if (token != NULL) { + number = strtoul(token, NULL, 10); + + if (errno == EINVAL || errno == ERANGE) + goto err_list; + + list[count++] = number; + temp_min = number; + temp_max = number; + } else + goto err_list; + + token = strtok(NULL, ","); + + while (token != NULL) { + if (count == MAX_LIST) { + RTE_LOG(WARNING, USER1, + "Using only the first %u sizes\n", + MAX_LIST); + break; + } + + number = strtoul(token, NULL, 10); + + if (errno == EINVAL || errno == ERANGE) + goto err_list; + + list[count++] = number; + + if (number < temp_min) + temp_min = number; + if (number > temp_max) + temp_max = number; + + token = strtok(NULL, ","); + } + + if (min) + *min = temp_min; + if (max) + *max = temp_max; + + free(copy_arg); + return count; + +err_list: + free(copy_arg); + return -1; +} + +static int +parse_num_iter(struct comp_test_data *test_data, const char *arg) +{ + int ret = parse_uint32_t(&test_data->num_iter, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n"); + return -1; + } + + if (test_data->num_iter == 0) { + RTE_LOG(ERR, USER1, + "Total number of iterations must be higher than 0\n"); + return -1; + } + + return ret; +} + +static int +parse_pool_sz(struct comp_test_data *test_data, const char *arg) +{ + int ret = parse_uint32_t(&test_data->pool_sz, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse pool size"); + return -1; + } + + if (test_data->pool_sz == 0) { + RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n"); + return -1; + } + + return ret; +} + +static int +parse_burst_sz(struct comp_test_data *test_data, const char *arg) +{ + int ret = parse_uint16_t(&test_data->burst_sz, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n"); + return -1; + } + + if (test_data->burst_sz == 0) { + RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n"); + return -1; + } + + return 0; +} + +static int +parse_extended_input_sz(struct comp_test_data *test_data, const char *arg) +{ + uint32_t tmp; + int ret = parse_uint32_t(&tmp, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse extended input size\n"); + return -1; + } + test_data->input_data_sz = tmp; + + if (tmp == 0) { + RTE_LOG(ERR, USER1, + "Extended file size must be higher than 0\n"); + return -1; + } + return 0; +} + +static int +parse_seg_sz(struct comp_test_data *test_data, const char *arg) +{ + int ret = parse_uint16_t(&test_data->seg_sz, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse segment size\n"); + return -1; + } + + if (test_data->seg_sz < MIN_COMPRESSED_BUF_SIZE) { + RTE_LOG(ERR, USER1, "Segment size must be higher than %d\n", + MIN_COMPRESSED_BUF_SIZE - 1); + return -1; + } + + if (test_data->seg_sz > MAX_SEG_SIZE) { + RTE_LOG(ERR, USER1, "Segment size must be lower than %d\n", + MAX_SEG_SIZE + 1); + return -1; + } + + return 0; +} + +static int +parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg) +{ + int ret = parse_uint16_t(&test_data->max_sgl_segs, arg); + + if (ret) { + RTE_LOG(ERR, USER1, + "Failed to parse max number of segments per mbuf chain\n"); + return -1; + } + + if (test_data->max_sgl_segs == 0) { + RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain " + "must be higher than 0\n"); + return -1; + } + + return 0; +} + +static int +parse_window_sz(struct comp_test_data *test_data, const char *arg) +{ + uint16_t tmp; + int ret = parse_uint16_t(&tmp, arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse window size\n"); + return -1; + } + test_data->window_sz = (int)tmp; + + return 0; +} + +static int +parse_driver_name(struct comp_test_data *test_data, const char *arg) +{ + if (strlen(arg) > (sizeof(test_data->driver_name) - 1)) + return -1; + + strlcpy(test_data->driver_name, arg, + sizeof(test_data->driver_name)); + + return 0; +} + +static int +parse_test_file(struct comp_test_data *test_data, const char *arg) +{ + if (strlen(arg) > (sizeof(test_data->input_file) - 1)) + return -1; + + strlcpy(test_data->input_file, arg, sizeof(test_data->input_file)); + + return 0; +} + +static int +parse_op_type(struct comp_test_data *test_data, const char *arg) +{ + struct name_id_map optype_namemap[] = { + { + "comp", + COMPRESS_ONLY + }, + { + "decomp", + DECOMPRESS_ONLY + }, + { + "comp_and_decomp", + COMPRESS_DECOMPRESS + } + }; + + int id = get_str_key_id_mapping(optype_namemap, + RTE_DIM(optype_namemap), arg); + if (id < 0) { + RTE_LOG(ERR, USER1, "Invalid operation type specified\n"); + return -1; + } + + test_data->test_op = (enum comp_operation)id; + + return 0; +} + +static int +parse_huffman_enc(struct comp_test_data *test_data, const char *arg) +{ + struct name_id_map huffman_namemap[] = { + { + "default", + RTE_COMP_HUFFMAN_DEFAULT + }, + { + "fixed", + RTE_COMP_HUFFMAN_FIXED + }, + { + "dynamic", + RTE_COMP_HUFFMAN_DYNAMIC + } + }; + + int id = get_str_key_id_mapping(huffman_namemap, + RTE_DIM(huffman_namemap), arg); + if (id < 0) { + RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n"); + return -1; + } + + test_data->huffman_enc = (enum rte_comp_huffman)id; + + return 0; +} + +static int +parse_level(struct comp_test_data *test_data, const char *arg) +{ + int ret; + + /* + * Try parsing the argument as a range, if it fails, + * arse it as a list + */ + if (parse_range(arg, &test_data->level_lst.min, + &test_data->level_lst.max, + &test_data->level_lst.inc) < 0) { + ret = parse_list(arg, test_data->level_lst.list, + &test_data->level_lst.min, + &test_data->level_lst.max); + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Failed to parse compression level/s\n"); + return -1; + } + test_data->level_lst.count = ret; + + if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) { + RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n", + RTE_COMP_LEVEL_MAX); + return -1; + } + } + + return 0; +} + +static int +parse_external_mbufs(struct comp_test_data *test_data, + const char *arg __rte_unused) +{ + test_data->use_external_mbufs = 1; + return 0; +} + +static int +parse_cyclecount_delay_us(struct comp_test_data *test_data, + const char *arg) +{ + int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n"); + return -1; + } + return 0; +} + +typedef int (*option_parser_t)(struct comp_test_data *test_data, + const char *arg); + +struct long_opt_parser { + const char *lgopt_name; + option_parser_t parser_fn; +}; + +static struct option lgopts[] = { + { CPERF_PTEST_TYPE, required_argument, 0, 0 }, + { CPERF_DRIVER_NAME, required_argument, 0, 0 }, + { CPERF_TEST_FILE, required_argument, 0, 0 }, + { CPERF_SEG_SIZE, required_argument, 0, 0 }, + { CPERF_BURST_SIZE, required_argument, 0, 0 }, + { CPERF_EXTENDED_SIZE, required_argument, 0, 0 }, + { CPERF_POOL_SIZE, required_argument, 0, 0 }, + { CPERF_MAX_SGL_SEGS, required_argument, 0, 0}, + { CPERF_NUM_ITER, required_argument, 0, 0 }, + { CPERF_OPTYPE, required_argument, 0, 0 }, + { CPERF_HUFFMAN_ENC, required_argument, 0, 0 }, + { CPERF_LEVEL, required_argument, 0, 0 }, + { CPERF_WINDOW_SIZE, required_argument, 0, 0 }, + { CPERF_EXTERNAL_MBUFS, 0, 0, 0 }, + { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 }, + { NULL, 0, 0, 0 } +}; + +static int +comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data) +{ + struct long_opt_parser parsermap[] = { + { CPERF_PTEST_TYPE, parse_cperf_test_type }, + { CPERF_DRIVER_NAME, parse_driver_name }, + { CPERF_TEST_FILE, parse_test_file }, + { CPERF_SEG_SIZE, parse_seg_sz }, + { CPERF_BURST_SIZE, parse_burst_sz }, + { CPERF_EXTENDED_SIZE, parse_extended_input_sz }, + { CPERF_POOL_SIZE, parse_pool_sz }, + { CPERF_MAX_SGL_SEGS, parse_max_num_sgl_segs }, + { CPERF_NUM_ITER, parse_num_iter }, + { CPERF_OPTYPE, parse_op_type }, + { CPERF_HUFFMAN_ENC, parse_huffman_enc }, + { CPERF_LEVEL, parse_level }, + { CPERF_WINDOW_SIZE, parse_window_sz }, + { CPERF_EXTERNAL_MBUFS, parse_external_mbufs }, + { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us }, + }; + unsigned int i; + + for (i = 0; i < RTE_DIM(parsermap); i++) { + if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name, + strlen(lgopts[opt_idx].name)) == 0) + return parsermap[i].parser_fn(test_data, optarg); + } + + return -EINVAL; +} + +int +comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv) +{ + int opt, retval, opt_idx; + + while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) { + switch (opt) { + case 'h': + usage(argv[0]); + rte_exit(EXIT_SUCCESS, "Displayed help\n"); + break; + /* long options */ + case 0: + retval = comp_perf_opts_parse_long(opt_idx, test_data); + if (retval != 0) + return retval; + + break; + + default: + usage(argv[0]); + return -EINVAL; + } + } + + return 0; +} + +void +comp_perf_options_default(struct comp_test_data *test_data) +{ + test_data->seg_sz = 2048; + test_data->burst_sz = 32; + test_data->pool_sz = 8192; + test_data->max_sgl_segs = 16; + test_data->num_iter = 10000; + test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC; + test_data->test_op = COMPRESS_DECOMPRESS; + test_data->window_sz = -1; + test_data->level_lst.min = RTE_COMP_LEVEL_MIN; + test_data->level_lst.max = RTE_COMP_LEVEL_MAX; + test_data->level_lst.inc = 1; + test_data->test = CPERF_TEST_TYPE_THROUGHPUT; + test_data->use_external_mbufs = 0; + test_data->cyclecount_delay = 500; +} + +int +comp_perf_options_check(struct comp_test_data *test_data) +{ + if (test_data->driver_name[0] == '\0') { + RTE_LOG(ERR, USER1, "Driver name has to be set\n"); + return -1; + } + + if (test_data->input_file[0] == '\0') { + RTE_LOG(ERR, USER1, "Input file name has to be set\n"); + return -1; + } + + return 0; +} diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.c b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.c new file mode 100644 index 000000000..b402a0d83 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.c @@ -0,0 +1,569 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_compressdev.h> + +#include "comp_perf.h" +#include "comp_perf_options.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_cyclecount.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + + +#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0)) + +struct cperf_buffer_info { + uint16_t total_segments; + uint16_t segment_sz; + uint16_t last_segment_sz; + uint32_t total_buffs; /*number of buffers = number of ops*/ + uint16_t segments_per_buff; + uint16_t segments_per_last_buff; + size_t input_data_sz; +}; + +static struct cperf_buffer_info buffer_info; + +int +param_range_check(uint16_t size, const struct rte_param_log2_range *range) +{ + unsigned int next_size; + + /* Check lower/upper bounds */ + if (size < range->min) + return -1; + + if (size > range->max) + return -1; + + /* If range is actually only one value, size is correct */ + if (range->increment == 0) + return 0; + + /* Check if value is one of the supported sizes */ + for (next_size = range->min; next_size <= range->max; + next_size += range->increment) + if (size == next_size) + return 0; + + return -1; +} + +static uint32_t +find_buf_size(uint32_t input_size) +{ + uint32_t i; + + /* From performance point of view the buffer size should be a + * power of 2 but also should be enough to store incompressible data + */ + + /* We're looking for nearest power of 2 buffer size, which is greater + * than input_size + */ + uint32_t size = + !input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1); + + for (i = UINT16_MAX + 1; !(i & size); i >>= 1) + ; + + return i > ((UINT16_MAX + 1) >> 1) + ? (uint32_t)((float)input_size * EXPANSE_RATIO) + : i; +} + +void +comp_perf_free_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem) +{ + uint32_t i; + + if (mem->decomp_bufs != NULL) + for (i = 0; i < mem->total_bufs; i++) + rte_pktmbuf_free(mem->decomp_bufs[i]); + + if (mem->comp_bufs != NULL) + for (i = 0; i < mem->total_bufs; i++) + rte_pktmbuf_free(mem->comp_bufs[i]); + + rte_free(mem->decomp_bufs); + rte_free(mem->comp_bufs); + rte_free(mem->decompressed_data); + rte_free(mem->compressed_data); + rte_mempool_free(mem->op_pool); + rte_mempool_free(mem->decomp_buf_pool); + rte_mempool_free(mem->comp_buf_pool); + + /* external mbuf support */ + if (mem->decomp_memzones != NULL) { + for (i = 0; i < test_data->total_segs; i++) + rte_memzone_free(mem->decomp_memzones[i]); + rte_free(mem->decomp_memzones); + } + if (mem->comp_memzones != NULL) { + for (i = 0; i < test_data->total_segs; i++) + rte_memzone_free(mem->comp_memzones[i]); + rte_free(mem->comp_memzones); + } + rte_free(mem->decomp_buf_infos); + rte_free(mem->comp_buf_infos); +} + +static void +comp_perf_extbuf_free_cb(void *addr __rte_unused, void *opaque __rte_unused) +{ +} + +static const struct rte_memzone * +comp_perf_make_memzone(const char *name, struct cperf_mem_resources *mem, + unsigned int number, size_t size) +{ + unsigned int socket_id = rte_socket_id(); + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *memzone; + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "%s_s%u_d%u_q%u_%d", name, + socket_id, mem->dev_id, mem->qp_id, number); + memzone = rte_memzone_lookup(mz_name); + if (memzone != NULL && memzone->len != size) { + rte_memzone_free(memzone); + memzone = NULL; + } + if (memzone == NULL) { + memzone = rte_memzone_reserve_aligned(mz_name, size, socket_id, + RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE); + if (memzone == NULL) + RTE_LOG(ERR, USER1, "Can't allocate memory zone %s\n", + mz_name); + } + return memzone; +} + +static int +comp_perf_allocate_external_mbufs(struct comp_test_data *test_data, + struct cperf_mem_resources *mem) +{ + uint32_t i; + + mem->comp_memzones = rte_zmalloc_socket(NULL, + test_data->total_segs * sizeof(struct rte_memzone *), + 0, rte_socket_id()); + + if (mem->comp_memzones == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the compression memzones could not be allocated\n"); + return -1; + } + + mem->decomp_memzones = rte_zmalloc_socket(NULL, + test_data->total_segs * sizeof(struct rte_memzone *), + 0, rte_socket_id()); + + if (mem->decomp_memzones == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the decompression memzones could not be allocated\n"); + return -1; + } + + mem->comp_buf_infos = rte_zmalloc_socket(NULL, + test_data->total_segs * sizeof(struct rte_mbuf_ext_shared_info), + 0, rte_socket_id()); + + if (mem->comp_buf_infos == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the compression buf infos could not be allocated\n"); + return -1; + } + + mem->decomp_buf_infos = rte_zmalloc_socket(NULL, + test_data->total_segs * sizeof(struct rte_mbuf_ext_shared_info), + 0, rte_socket_id()); + + if (mem->decomp_buf_infos == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the decompression buf infos could not be allocated\n"); + return -1; + } + + for (i = 0; i < test_data->total_segs; i++) { + mem->comp_memzones[i] = comp_perf_make_memzone("comp", mem, + i, test_data->out_seg_sz); + if (mem->comp_memzones[i] == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the compression memzone could not be allocated\n"); + return -1; + } + + mem->decomp_memzones[i] = comp_perf_make_memzone("decomp", mem, + i, test_data->seg_sz); + if (mem->decomp_memzones[i] == NULL) { + RTE_LOG(ERR, USER1, + "Memory to hold the decompression memzone could not be allocated\n"); + return -1; + } + + mem->comp_buf_infos[i].free_cb = + comp_perf_extbuf_free_cb; + mem->comp_buf_infos[i].fcb_opaque = NULL; + rte_mbuf_ext_refcnt_set(&mem->comp_buf_infos[i], 1); + + mem->decomp_buf_infos[i].free_cb = + comp_perf_extbuf_free_cb; + mem->decomp_buf_infos[i].fcb_opaque = NULL; + rte_mbuf_ext_refcnt_set(&mem->decomp_buf_infos[i], 1); + } + + return 0; +} + +int +comp_perf_allocate_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem) +{ + uint16_t comp_mbuf_size; + uint16_t decomp_mbuf_size; + + test_data->out_seg_sz = find_buf_size(test_data->seg_sz); + + /* Number of segments for input and output + * (compression and decompression) + */ + test_data->total_segs = DIV_CEIL(test_data->input_data_sz, + test_data->seg_sz); + + if (test_data->use_external_mbufs != 0) { + if (comp_perf_allocate_external_mbufs(test_data, mem) < 0) + return -1; + comp_mbuf_size = 0; + decomp_mbuf_size = 0; + } else { + comp_mbuf_size = test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM; + decomp_mbuf_size = test_data->seg_sz + RTE_PKTMBUF_HEADROOM; + } + + char pool_name[32] = ""; + + snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name, + test_data->total_segs, + 0, 0, + comp_mbuf_size, + rte_socket_id()); + if (mem->comp_buf_pool == NULL) { + RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); + return -1; + } + + snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name, + test_data->total_segs, + 0, 0, + decomp_mbuf_size, + rte_socket_id()); + if (mem->decomp_buf_pool == NULL) { + RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n"); + return -1; + } + + mem->total_bufs = DIV_CEIL(test_data->total_segs, + test_data->max_sgl_segs); + + snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u", + mem->dev_id, mem->qp_id); + + /* one mempool for both src and dst mbufs */ + mem->op_pool = rte_comp_op_pool_create(pool_name, + mem->total_bufs * 2, + 0, 0, rte_socket_id()); + if (mem->op_pool == NULL) { + RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n"); + return -1; + } + + /* + * Compressed data might be a bit larger than input data, + * if data cannot be compressed + */ + mem->compressed_data = rte_zmalloc_socket(NULL, + RTE_MAX( + (size_t) test_data->out_seg_sz * + test_data->total_segs, + (size_t) MIN_COMPRESSED_BUF_SIZE), + 0, + rte_socket_id()); + if (mem->compressed_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the input " + "file could not be allocated\n"); + return -1; + } + + mem->decompressed_data = rte_zmalloc_socket(NULL, + test_data->input_data_sz, 0, + rte_socket_id()); + if (mem->decompressed_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the input " + "file could not be allocated\n"); + return -1; + } + + mem->comp_bufs = rte_zmalloc_socket(NULL, + mem->total_bufs * sizeof(struct rte_mbuf *), + 0, rte_socket_id()); + if (mem->comp_bufs == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs" + " could not be allocated\n"); + return -1; + } + + mem->decomp_bufs = rte_zmalloc_socket(NULL, + mem->total_bufs * sizeof(struct rte_mbuf *), + 0, rte_socket_id()); + if (mem->decomp_bufs == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs" + " could not be allocated\n"); + return -1; + } + + buffer_info.total_segments = test_data->total_segs; + buffer_info.segment_sz = test_data->seg_sz; + buffer_info.total_buffs = mem->total_bufs; + buffer_info.segments_per_buff = test_data->max_sgl_segs; + buffer_info.input_data_sz = test_data->input_data_sz; + + return 0; +} + +int +prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem) +{ + uint32_t remaining_data = test_data->input_data_sz; + uint8_t *input_data_ptr = test_data->input_data; + size_t data_sz = 0; + uint8_t *data_addr; + uint32_t i, j; + uint16_t segs_per_mbuf = 0; + uint32_t cmz = 0; + uint32_t dmz = 0; + + for (i = 0; i < mem->total_bufs; i++) { + /* Allocate data in input mbuf and copy data from input file */ + mem->decomp_bufs[i] = + rte_pktmbuf_alloc(mem->decomp_buf_pool); + if (mem->decomp_bufs[i] == NULL) { + RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); + return -1; + } + + data_sz = RTE_MIN(remaining_data, test_data->seg_sz); + + if (test_data->use_external_mbufs != 0) { + rte_pktmbuf_attach_extbuf(mem->decomp_bufs[i], + mem->decomp_memzones[dmz]->addr, + mem->decomp_memzones[dmz]->iova, + test_data->seg_sz, + &mem->decomp_buf_infos[dmz]); + dmz++; + } + + data_addr = (uint8_t *) rte_pktmbuf_append( + mem->decomp_bufs[i], data_sz); + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + rte_memcpy(data_addr, input_data_ptr, data_sz); + + input_data_ptr += data_sz; + remaining_data -= data_sz; + + /* Already one segment in the mbuf */ + segs_per_mbuf = 1; + + /* Chain mbufs if needed for input mbufs */ + while (segs_per_mbuf < test_data->max_sgl_segs + && remaining_data > 0) { + struct rte_mbuf *next_seg = + rte_pktmbuf_alloc(mem->decomp_buf_pool); + + if (next_seg == NULL) { + RTE_LOG(ERR, USER1, + "Could not allocate mbuf\n"); + return -1; + } + + data_sz = RTE_MIN(remaining_data, test_data->seg_sz); + + if (test_data->use_external_mbufs != 0) { + rte_pktmbuf_attach_extbuf( + next_seg, + mem->decomp_memzones[dmz]->addr, + mem->decomp_memzones[dmz]->iova, + test_data->seg_sz, + &mem->decomp_buf_infos[dmz]); + dmz++; + } + + data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, + data_sz); + + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + rte_memcpy(data_addr, input_data_ptr, data_sz); + input_data_ptr += data_sz; + remaining_data -= data_sz; + + if (rte_pktmbuf_chain(mem->decomp_bufs[i], + next_seg) < 0) { + RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); + return -1; + } + segs_per_mbuf++; + } + + /* Allocate data in output mbuf */ + mem->comp_bufs[i] = + rte_pktmbuf_alloc(mem->comp_buf_pool); + if (mem->comp_bufs[i] == NULL) { + RTE_LOG(ERR, USER1, "Could not allocate mbuf\n"); + return -1; + } + + if (test_data->use_external_mbufs != 0) { + rte_pktmbuf_attach_extbuf(mem->comp_bufs[i], + mem->comp_memzones[cmz]->addr, + mem->comp_memzones[cmz]->iova, + test_data->out_seg_sz, + &mem->comp_buf_infos[cmz]); + cmz++; + } + + data_addr = (uint8_t *) rte_pktmbuf_append( + mem->comp_bufs[i], + test_data->out_seg_sz); + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + /* Chain mbufs if needed for output mbufs */ + for (j = 1; j < segs_per_mbuf; j++) { + struct rte_mbuf *next_seg = + rte_pktmbuf_alloc(mem->comp_buf_pool); + + if (next_seg == NULL) { + RTE_LOG(ERR, USER1, + "Could not allocate mbuf\n"); + return -1; + } + + if (test_data->use_external_mbufs != 0) { + rte_pktmbuf_attach_extbuf( + next_seg, + mem->comp_memzones[cmz]->addr, + mem->comp_memzones[cmz]->iova, + test_data->out_seg_sz, + &mem->comp_buf_infos[cmz]); + cmz++; + } + + data_addr = (uint8_t *)rte_pktmbuf_append(next_seg, + test_data->out_seg_sz); + if (data_addr == NULL) { + RTE_LOG(ERR, USER1, "Could not append data\n"); + return -1; + } + + if (rte_pktmbuf_chain(mem->comp_bufs[i], + next_seg) < 0) { + RTE_LOG(ERR, USER1, "Could not chain mbufs\n"); + return -1; + } + } + } + + buffer_info.segments_per_last_buff = segs_per_mbuf; + buffer_info.last_segment_sz = data_sz; + + return 0; +} + +void +print_test_dynamics(const struct comp_test_data *test_data) +{ + uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz, + MAX_SEG_SIZE); + + if (buffer_info.total_buffs > 1) { + if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) { + printf("\nWarning: for the current input parameters, number" + " of ops is higher than one, which may result" + " in sub-optimal performance.\n"); + printf("To improve the performance (for the current" + " input data) following parameters are" + " suggested:\n"); + printf(" * Segment size: %d\n", + MAX_SEG_SIZE); + printf(" * Number of segments: %u\n", + opt_total_segs); + } + } else if (buffer_info.total_buffs == 1) { + printf("\nInfo: there is only one op with %u segments -" + " the compression ratio is the best.\n", + buffer_info.segments_per_last_buff); + if (buffer_info.segment_sz < MAX_SEG_SIZE) + printf("To reduce compression time, please use" + " bigger segment size: %d.\n", + MAX_SEG_SIZE); + else if (buffer_info.segment_sz == MAX_SEG_SIZE) + printf("Segment size is optimal for the best" + " performance.\n"); + } else + printf("Warning: something wrong happened!!\n"); + + printf("\nFor the current input parameters (segment size = %u," + " maximum segments per SGL = %u):\n", + buffer_info.segment_sz, + buffer_info.segments_per_buff); + printf(" * Total number of buffers: %d\n", + buffer_info.total_segments); + printf(" * %u buffer(s) %u bytes long, last buffer %u" + " byte(s) long\n", + buffer_info.total_segments - 1, + buffer_info.segment_sz, + buffer_info.last_segment_sz); + printf(" * Number of ops: %u\n", buffer_info.total_buffs); + printf(" * Total memory allocation: %u\n", + (buffer_info.total_segments - 1) * buffer_info.segment_sz + + buffer_info.last_segment_sz); + if (buffer_info.total_buffs > 1) + printf(" * %u ops: %u segment(s) in each," + " segment size %u\n", + buffer_info.total_buffs - 1, + buffer_info.segments_per_buff, + buffer_info.segment_sz); + if (buffer_info.segments_per_last_buff > 1) { + printf(" * 1 op %u segments:\n", + buffer_info.segments_per_last_buff); + printf(" o %u segment size %u\n", + buffer_info.segments_per_last_buff - 1, + buffer_info.segment_sz); + printf(" o last segment size %u\n", + buffer_info.last_segment_sz); + } else if (buffer_info.segments_per_last_buff == 1) { + printf(" * 1 op (the last one): %u segment %u" + " byte(s) long\n\n", + buffer_info.segments_per_last_buff, + buffer_info.last_segment_sz); + } + printf("\n"); +} diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.h new file mode 100644 index 000000000..72705c6a2 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_common.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_COMMON_H_ +#define _COMP_PERF_TEST_COMMON_H_ + +#include <stdint.h> + +#include <rte_mempool.h> + +struct cperf_mem_resources { + uint8_t dev_id; + uint16_t qp_id; + uint8_t lcore_id; + + rte_atomic16_t print_info_once; + + uint32_t total_bufs; + uint8_t *compressed_data; + uint8_t *decompressed_data; + + struct rte_mbuf **comp_bufs; + struct rte_mbuf **decomp_bufs; + + struct rte_mempool *comp_buf_pool; + struct rte_mempool *decomp_buf_pool; + struct rte_mempool *op_pool; + + /* external mbuf support */ + const struct rte_memzone **comp_memzones; + const struct rte_memzone **decomp_memzones; + struct rte_mbuf_ext_shared_info *comp_buf_infos; + struct rte_mbuf_ext_shared_info *decomp_buf_infos; +}; + +int +param_range_check(uint16_t size, const struct rte_param_log2_range *range); + +void +comp_perf_free_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem); + +int +comp_perf_allocate_memory(struct comp_test_data *test_data, + struct cperf_mem_resources *mem); + +int +prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem); + +void +print_test_dynamics(const struct comp_test_data *test_data); + +#endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c new file mode 100644 index 000000000..55559a7d5 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c @@ -0,0 +1,614 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_cycles.h> +#include "rte_spinlock.h" +#include <rte_compressdev.h> + +#include "comp_perf_test_cyclecount.h" + +struct cperf_cyclecount_ctx { + struct cperf_verify_ctx ver; + + uint32_t ops_enq_retries; + uint32_t ops_deq_retries; + + uint64_t duration_op; + uint64_t duration_enq; + uint64_t duration_deq; +}; + +void +cperf_cyclecount_test_destructor(void *arg) +{ + struct cperf_cyclecount_ctx *ctx = arg; + + if (arg) { + comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); + rte_free(arg); + } +} + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_cyclecount_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_cyclecount_test_destructor(ctx); + return NULL; +} + +static int +cperf_cyclecount_op_setup(struct rte_comp_op **ops, + struct cperf_cyclecount_ctx *ctx, + struct rte_mbuf **input_bufs, + struct rte_mbuf **output_bufs, + void *priv_xform, + uint32_t out_seg_sz) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + + uint32_t i, iter, num_iter; + int res = 0; + uint16_t ops_needed; + + num_iter = test_data->num_iter; + + for (iter = 0; iter < num_iter; iter++) { + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + ops_needed = num_ops; + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Cyclecount: could not allocate enough operations\n"); + res = -1; + goto end; + } + + for (i = 0; i < ops_needed; i++) { + + /* Calculate next buffer to attach */ + /* to operation */ + uint32_t buf_id = total_enq_ops + i; + uint16_t op_id = i; + + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + /* E N Q U E U I N G */ + /* assuming that all ops are enqueued */ + /* instead of the real enqueue operation */ + num_enq = num_ops; + + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + /* D E Q U E U I N G */ + /* assuming that all ops dequeued */ + /* instead of the real dequeue operation */ + num_deq = num_ops; + + total_deq_ops += num_deq; + rte_mempool_put_bulk(mem->op_pool, + (void **)ops, num_deq); + } + } + return res; +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); + rte_free(ops); + + return res; +} + +static int +main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + struct rte_mbuf **input_bufs, **output_bufs; + int ret, res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + uint64_t tsc_start, tsc_end, tsc_duration; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, "Unknown burst size\n"); + return -1; + } + ctx->duration_enq = 0; + ctx->duration_deq = 0; + ctx->ops_enq_retries = 0; + ctx->ops_deq_retries = 0; + + /* one array for both enqueue and dequeue */ + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + tsc_start = rte_rdtsc_precise(); + ret = cperf_cyclecount_op_setup(ops, + ctx, + input_bufs, + output_bufs, + priv_xform, + out_seg_sz); + + tsc_end = rte_rdtsc_precise(); + + /* ret value check postponed a bit to cancel extra 'if' bias */ + if (ret < 0) { + RTE_LOG(ERR, USER1, "Setup function failed\n"); + res = -1; + goto end; + } + + tsc_duration = tsc_end - tsc_start; + ctx->duration_op = tsc_duration; + + num_iter = test_data->num_iter; + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_enq += tsc_duration; + + if (num_enq < num_ops) + ctx->ops_enq_retries++; + + if (test_data->cyclecount_delay) + rte_delay_us_block(test_data->cyclecount_delay); + + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + allocated); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + + if (num_deq < allocated) + ctx->ops_deq_retries++; + + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + ctx->ops_deq_retries++; + + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + allocated = 0; + +end: + if (allocated) + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + return res; +} + +int +cperf_cyclecount_test_runner(void *test_ctx) +{ + struct cperf_cyclecount_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + static rte_spinlock_t print_spinlock; + int i; + + uint32_t ops_enq_retries_comp; + uint32_t ops_deq_retries_comp; + + uint32_t ops_enq_retries_decomp; + uint32_t ops_deq_retries_decomp; + + uint32_t duration_setup_per_op; + + uint32_t duration_enq_per_op_comp; + uint32_t duration_deq_per_op_comp; + + uint32_t duration_enq_per_op_decomp; + uint32_t duration_deq_per_op_decomp; + + ctx->ver.mem.lcore_id = lcore; + + /* + * printing information about current compression thread + */ + if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) + printf(" lcore: %u," + " driver name: %s," + " device name: %s," + " device id: %u," + " socket id: %u," + " queue pair id: %u\n", + lcore, + ctx->ver.options->driver_name, + rte_compressdev_name_get(ctx->ver.mem.dev_id), + ctx->ver.mem.dev_id, + rte_compressdev_socket_id(ctx->ver.mem.dev_id), + ctx->ver.mem.qp_id); + + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) + return EXIT_FAILURE; + + /* + * Run the tests twice, discarding the first performance + * results, before the cache is warmed up + */ + + /* C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_comp = ctx->ops_enq_retries; + ops_deq_retries_comp = ctx->ops_deq_retries; + + duration_enq_per_op_comp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_comp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* D E C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_decomp = ctx->ops_enq_retries; + ops_deq_retries_decomp = ctx->ops_deq_retries; + + duration_enq_per_op_decomp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_decomp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + duration_setup_per_op = ctx->duration_op / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* R E P O R T processing */ + if (rte_atomic16_test_and_set(&display_once)) { + + rte_spinlock_lock(&print_spinlock); + + printf("\nLegend for the table\n" + " - Retries section: number of retries for the following operations:\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n" + " - Cycles section: number of cycles per 'op' for the following operations:\n" + " setup/op - memory allocation, op configuration and memory dealocation\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n\n"); + + printf("\n%12s%6s%12s%17s", + "lcore id", "Level", "Comp size", "Comp ratio [%]"); + + printf(" |%10s %6s %8s %6s %8s", + " Retries:", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + printf(" |%9s %9s %9s %9s %9s %9s\n", + " Cycles:", + "setup/op", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + rte_spinlock_unlock(&print_spinlock); + } + + rte_spinlock_lock(&print_spinlock); + + printf("%12u" + "%6u" + "%12zu" + "%17.2f", + ctx->ver.mem.lcore_id, + test_data->level, + ctx->ver.comp_data_sz, + ctx->ver.ratio); + + printf(" |%10s %6u %8u %6u %8u", + " ", + ops_enq_retries_comp, + ops_deq_retries_comp, + ops_enq_retries_decomp, + ops_deq_retries_decomp); + + printf(" |%9s %9u %9u %9u %9u %9u\n", + " ", + duration_setup_per_op, + duration_enq_per_op_comp, + duration_deq_per_op_comp, + duration_enq_per_op_decomp, + duration_deq_per_op_decomp); + + rte_spinlock_unlock(&print_spinlock); + + return EXIT_SUCCESS; +} diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.h new file mode 100644 index 000000000..8e1b4d9e9 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_CYCLECOUNT_ +#define _COMP_PERF_TEST_CYCLECOUNT_ + +#include <stdint.h> + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +void +cperf_cyclecount_test_destructor(void *arg); + +int +cperf_cyclecount_test_runner(void *test_ctx); + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.c b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.c new file mode 100644 index 000000000..13922b658 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.c @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_cycles.h> +#include <rte_compressdev.h> + +#include "comp_perf_test_throughput.h" + +void +cperf_throughput_test_destructor(void *arg) +{ + if (arg) { + comp_perf_free_memory( + ((struct cperf_benchmark_ctx *)arg)->ver.options, + &((struct cperf_benchmark_ctx *)arg)->ver.mem); + rte_free(arg); + } +} + +void * +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_benchmark_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_throughput_test_destructor(ctx); + return NULL; +} + +static int +main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + struct rte_mbuf **input_bufs, **output_bufs; + int res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, + "Unknown burst size\n"); + return -1; + } + + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + uint64_t tsc_start, tsc_end, tsc_duration; + + num_iter = test_data->num_iter; + tsc_start = tsc_end = tsc_duration = 0; + tsc_start = rte_rdtsc_precise(); + + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && !rte_comp_op_bulk_alloc( + mem->op_pool, + &ops[ops_unused], + ops_needed)) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + + if (type == RTE_COMP_COMPRESS) + ctx->comp_tsc_duration[test_data->level] = + tsc_duration / num_iter; + else + ctx->decomp_tsc_duration[test_data->level] = + tsc_duration / num_iter; + +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + return res; +} + +int +cperf_throughput_test_runner(void *test_ctx) +{ + struct cperf_benchmark_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + int i, ret = EXIT_SUCCESS; + + ctx->ver.mem.lcore_id = lcore; + + /* + * printing information about current compression thread + */ + if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) + printf(" lcore: %u," + " driver name: %s," + " device name: %s," + " device id: %u," + " socket id: %u," + " queue pair id: %u\n", + lcore, + ctx->ver.options->driver_name, + rte_compressdev_name_get(ctx->ver.mem.dev_id), + ctx->ver.mem.dev_id, + rte_compressdev_socket_id(ctx->ver.mem.dev_id), + ctx->ver.mem.qp_id); + + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) { + ret = EXIT_FAILURE; + goto end; + } + + /* + * Run the tests twice, discarding the first performance + * results, before the cache is warmed up + */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + } + + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + } + + ctx->comp_tsc_byte = + (double)(ctx->comp_tsc_duration[test_data->level]) / + test_data->input_data_sz; + + ctx->decomp_tsc_byte = + (double)(ctx->decomp_tsc_duration[test_data->level]) / + test_data->input_data_sz; + + ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / + 1000000000; + + ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / + 1000000000; + + if (rte_atomic16_test_and_set(&display_once)) { + printf("\n%12s%6s%12s%17s%15s%16s\n", + "lcore id", "Level", "Comp size", "Comp ratio [%]", + "Comp [Gbps]", "Decomp [Gbps]"); + } + + printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", + ctx->ver.mem.lcore_id, + test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, + ctx->comp_gbps, + ctx->decomp_gbps); + +end: + return ret; +} diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.h new file mode 100644 index 000000000..467e3aa78 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_throughput.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_BENCHMARK_ +#define _COMP_PERF_TEST_BENCHMARK_ + +#include <stdint.h> + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +struct cperf_benchmark_ctx { + struct cperf_verify_ctx ver; + + /* Store TSC duration for all levels (including level 0) */ + uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + double comp_gbps; + double decomp_gbps; + double comp_tsc_byte; + double decomp_tsc_byte; +}; + +void +cperf_throughput_test_destructor(void *arg); + +int +cperf_throughput_test_runner(void *test_ctx); + +void * +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.c b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.c new file mode 100644 index 000000000..5e13257b7 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.c @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_compressdev.h> + +#include "comp_perf_test_verify.h" +#include "comp_perf_test_common.h" + +void +cperf_verify_test_destructor(void *arg) +{ + if (arg) { + comp_perf_free_memory( + ((struct cperf_verify_ctx *)arg)->options, + &((struct cperf_verify_ctx *)arg)->mem); + rte_free(arg); + } +} + +void * +cperf_verify_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_verify_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_verify_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->mem.dev_id = dev_id; + ctx->mem.qp_id = qp_id; + ctx->options = options; + + if (!comp_perf_allocate_memory(ctx->options, &ctx->mem) && + !prepare_bufs(ctx->options, &ctx->mem)) + return ctx; + + cperf_verify_test_destructor(ctx); + return NULL; +} + +static int +main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->options; + uint8_t *output_data_ptr = NULL; + size_t *output_data_sz = NULL; + struct cperf_mem_resources *mem = &ctx->mem; + + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + size_t output_size = 0; + struct rte_mbuf **input_bufs, **output_bufs; + int res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, + "Unknown burst size\n"); + return -1; + } + + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + output_data_ptr = ctx->mem.compressed_data; + output_data_sz = &ctx->comp_data_sz; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + output_data_ptr = ctx->mem.decompressed_data; + output_data_sz = &ctx->decomp_data_sz; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + num_iter = 1; + + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + output_size = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && !rte_comp_op_bulk_alloc( + mem->op_pool, + &ops[ops_unused], + ops_needed)) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + total_deq_ops += num_deq; + + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status == + RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED || + op->status == + RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE) { + RTE_LOG(ERR, USER1, +"Out of space error occurred due to uncompressible input data expanding to larger than destination buffer. Increase the EXPANSE_RATIO constant to use this data.\n"); + res = -1; + goto end; + } else if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + + const void *read_data_addr = + rte_pktmbuf_read(op->m_dst, 0, + op->produced, output_data_ptr); + if (read_data_addr == NULL) { + RTE_LOG(ERR, USER1, + "Could not copy buffer in destination\n"); + res = -1; + goto end; + } + + if (read_data_addr != output_data_ptr) + rte_memcpy(output_data_ptr, + rte_pktmbuf_mtod(op->m_dst, + uint8_t *), + op->produced); + output_data_ptr += op->produced; + output_size += op->produced; + + } + + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + + total_deq_ops += num_deq; + + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status == + RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED || + op->status == + RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE) { + RTE_LOG(ERR, USER1, +"Out of space error occurred due to uncompressible input data expanding to larger than destination buffer. Increase the EXPANSE_RATIO constant to use this data.\n"); + res = -1; + goto end; + } else if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + const void *read_data_addr = + rte_pktmbuf_read(op->m_dst, + op->dst.offset, + op->produced, output_data_ptr); + if (read_data_addr == NULL) { + RTE_LOG(ERR, USER1, + "Could not copy buffer in destination\n"); + res = -1; + goto end; + } + + if (read_data_addr != output_data_ptr) + rte_memcpy(output_data_ptr, + rte_pktmbuf_mtod( + op->m_dst, uint8_t *), + op->produced); + output_data_ptr += op->produced; + output_size += op->produced; + + } + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + + if (output_data_sz) + *output_data_sz = output_size; +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + + return res; +} + +int +cperf_verify_test_runner(void *test_ctx) +{ + struct cperf_verify_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->options; + int ret = EXIT_SUCCESS; + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + uint32_t lcore = rte_lcore_id(); + + ctx->mem.lcore_id = lcore; + + test_data->ratio = 0; + + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + + if (ctx->decomp_data_sz != test_data->input_data_sz) { + RTE_LOG(ERR, USER1, + "Decompressed data length not equal to input data length\n"); + RTE_LOG(ERR, USER1, + "Decompressed size = %zu, expected = %zu\n", + ctx->decomp_data_sz, test_data->input_data_sz); + ret = EXIT_FAILURE; + goto end; + } else { + if (memcmp(ctx->mem.decompressed_data, + test_data->input_data, + test_data->input_data_sz) != 0) { + RTE_LOG(ERR, USER1, + "Decompressed data is not the same as file data\n"); + ret = EXIT_FAILURE; + goto end; + } + } + + ctx->ratio = (double) ctx->comp_data_sz / + test_data->input_data_sz * 100; + + if (!ctx->silent) { + if (rte_atomic16_test_and_set(&display_once)) { + printf("%12s%6s%12s%17s\n", + "lcore id", "Level", "Comp size", "Comp ratio [%]"); + } + printf("%12u%6u%12zu%17.2f\n", + ctx->mem.lcore_id, + test_data->level, ctx->comp_data_sz, ctx->ratio); + } + +end: + return ret; +} diff --git a/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.h b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.h new file mode 100644 index 000000000..ae8b7429c --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/comp_perf_test_verify.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018-2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_VERIFY_ +#define _COMP_PERF_TEST_VERIFY_ + +#include <stdint.h> + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" + +struct cperf_verify_ctx { + struct cperf_mem_resources mem; + struct comp_test_data *options; + + int silent; + size_t comp_data_sz; + size_t decomp_data_sz; + double ratio; +}; + +void +cperf_verify_test_destructor(void *arg); + +int +cperf_verify_test_runner(void *test_ctx); + +void * +cperf_verify_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/src/spdk/dpdk/app/test-compress-perf/main.c b/src/spdk/dpdk/app/test-compress-perf/main.c new file mode 100644 index 000000000..ed21605d8 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/main.c @@ -0,0 +1,548 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include <rte_malloc.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_compressdev.h> + +#include "comp_perf.h" +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_cyclecount.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_verify.h" + +#define NUM_MAX_XFORMS 16 +#define NUM_MAX_INFLIGHT_OPS 512 + +__extension__ +const char *comp_perf_test_type_strs[] = { + [CPERF_TEST_TYPE_THROUGHPUT] = "throughput", + [CPERF_TEST_TYPE_VERIFY] = "verify", + [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount" +}; + +__extension__ +static const struct cperf_test cperf_testmap[] = { + [CPERF_TEST_TYPE_THROUGHPUT] = { + cperf_throughput_test_constructor, + cperf_throughput_test_runner, + cperf_throughput_test_destructor + + }, + [CPERF_TEST_TYPE_VERIFY] = { + cperf_verify_test_constructor, + cperf_verify_test_runner, + cperf_verify_test_destructor + }, + + [CPERF_TEST_TYPE_PMDCC] = { + cperf_cyclecount_test_constructor, + cperf_cyclecount_test_runner, + cperf_cyclecount_test_destructor + } +}; + +static struct comp_test_data *test_data; + +static int +comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id) +{ + const struct rte_compressdev_capabilities *cap; + + cap = rte_compressdev_capability_get(cdev_id, + RTE_COMP_ALGO_DEFLATE); + + if (cap == NULL) { + RTE_LOG(ERR, USER1, + "Compress device does not support DEFLATE\n"); + return -1; + } + + uint64_t comp_flags = cap->comp_feature_flags; + + /* Huffman enconding */ + if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED && + (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) { + RTE_LOG(ERR, USER1, + "Compress device does not supported Fixed Huffman\n"); + return -1; + } + + if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC && + (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) { + RTE_LOG(ERR, USER1, + "Compress device does not supported Dynamic Huffman\n"); + return -1; + } + + /* Window size */ + if (test_data->window_sz != -1) { + if (param_range_check(test_data->window_sz, &cap->window_size) + < 0) { + RTE_LOG(ERR, USER1, + "Compress device does not support " + "this window size\n"); + return -1; + } + } else + /* Set window size to PMD maximum if none was specified */ + test_data->window_sz = cap->window_size.max; + + /* Check if chained mbufs is supported */ + if (test_data->max_sgl_segs > 1 && + (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) { + RTE_LOG(INFO, USER1, "Compress device does not support " + "chained mbufs. Max SGL segments set to 1\n"); + test_data->max_sgl_segs = 1; + } + + /* Level 0 support */ + if (test_data->level_lst.min == 0 && + (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) { + RTE_LOG(ERR, USER1, "Compress device does not support " + "level 0 (no compression)\n"); + return -1; + } + + return 0; +} + +static int +comp_perf_initialize_compressdev(struct comp_test_data *test_data, + uint8_t *enabled_cdevs) +{ + uint8_t enabled_cdev_count, nb_lcores, cdev_id; + unsigned int i, j; + int ret; + + enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name, + enabled_cdevs, RTE_COMPRESS_MAX_DEVS); + if (enabled_cdev_count == 0) { + RTE_LOG(ERR, USER1, "No compress devices type %s available," + " please check the list of specified devices in EAL section\n", + test_data->driver_name); + return -EINVAL; + } + + nb_lcores = rte_lcore_count() - 1; + /* + * Use fewer devices, + * if there are more available than cores. + */ + if (enabled_cdev_count > nb_lcores) { + if (nb_lcores == 0) { + RTE_LOG(ERR, USER1, "Cannot run with 0 cores! Increase the number of cores\n"); + return -EINVAL; + } + enabled_cdev_count = nb_lcores; + RTE_LOG(INFO, USER1, + "There's more available devices than cores!" + " The number of devices has been aligned to %d cores\n", + nb_lcores); + } + + /* + * Calculate number of needed queue pairs, based on the amount + * of available number of logical cores and compression devices. + * For instance, if there are 4 cores and 2 compression devices, + * 2 queue pairs will be set up per device. + * One queue pair per one core. + * if e.g.: there're 3 cores and 2 compression devices, + * 2 queue pairs will be set up per device but one queue pair + * will left unused in the last one device + */ + test_data->nb_qps = (nb_lcores % enabled_cdev_count) ? + (nb_lcores / enabled_cdev_count) + 1 : + nb_lcores / enabled_cdev_count; + + for (i = 0; i < enabled_cdev_count && + i < RTE_COMPRESS_MAX_DEVS; i++, + nb_lcores -= test_data->nb_qps) { + cdev_id = enabled_cdevs[i]; + + struct rte_compressdev_info cdev_info; + uint8_t socket_id = rte_compressdev_socket_id(cdev_id); + + rte_compressdev_info_get(cdev_id, &cdev_info); + if (cdev_info.max_nb_queue_pairs && + test_data->nb_qps > cdev_info.max_nb_queue_pairs) { + RTE_LOG(ERR, USER1, + "Number of needed queue pairs is higher " + "than the maximum number of queue pairs " + "per device.\n"); + RTE_LOG(ERR, USER1, + "Lower the number of cores or increase " + "the number of crypto devices\n"); + return -EINVAL; + } + + if (comp_perf_check_capabilities(test_data, cdev_id) < 0) + return -EINVAL; + + /* Configure compressdev */ + struct rte_compressdev_config config = { + .socket_id = socket_id, + .nb_queue_pairs = nb_lcores > test_data->nb_qps + ? test_data->nb_qps : nb_lcores, + .max_nb_priv_xforms = NUM_MAX_XFORMS, + .max_nb_streams = 0 + }; + + if (rte_compressdev_configure(cdev_id, &config) < 0) { + RTE_LOG(ERR, USER1, "Device configuration failed\n"); + return -EINVAL; + } + + for (j = 0; j < test_data->nb_qps; j++) { + ret = rte_compressdev_queue_pair_setup(cdev_id, j, + NUM_MAX_INFLIGHT_OPS, socket_id); + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Failed to setup queue pair %u on compressdev %u", + j, cdev_id); + return -EINVAL; + } + } + + ret = rte_compressdev_start(cdev_id); + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Failed to start device %u: error %d\n", + cdev_id, ret); + return -EPERM; + } + } + + return enabled_cdev_count; +} + +static int +comp_perf_dump_input_data(struct comp_test_data *test_data) +{ + FILE *f = fopen(test_data->input_file, "r"); + int ret = -1; + + if (f == NULL) { + RTE_LOG(ERR, USER1, "Input file could not be opened\n"); + return -1; + } + + if (fseek(f, 0, SEEK_END) != 0) { + RTE_LOG(ERR, USER1, "Size of input could not be calculated\n"); + goto end; + } + size_t actual_file_sz = ftell(f); + /* If extended input data size has not been set, + * input data size = file size + */ + + if (test_data->input_data_sz == 0) + test_data->input_data_sz = actual_file_sz; + + if (test_data->input_data_sz <= 0 || actual_file_sz <= 0 || + fseek(f, 0, SEEK_SET) != 0) { + RTE_LOG(ERR, USER1, "Size of input could not be calculated\n"); + goto end; + } + + test_data->input_data = rte_zmalloc_socket(NULL, + test_data->input_data_sz, 0, rte_socket_id()); + + if (test_data->input_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the input " + "file could not be allocated\n"); + goto end; + } + + size_t remaining_data = test_data->input_data_sz; + uint8_t *data = test_data->input_data; + + while (remaining_data > 0) { + size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz); + + if (fread(data, data_to_read, 1, f) != 1) { + RTE_LOG(ERR, USER1, "Input file could not be read\n"); + goto end; + } + if (fseek(f, 0, SEEK_SET) != 0) { + RTE_LOG(ERR, USER1, + "Size of input could not be calculated\n"); + goto end; + } + remaining_data -= data_to_read; + data += data_to_read; + } + + printf("\n"); + if (test_data->input_data_sz > actual_file_sz) + RTE_LOG(INFO, USER1, + "%zu bytes read from file %s, extending the file %.2f times\n", + test_data->input_data_sz, test_data->input_file, + (double)test_data->input_data_sz/actual_file_sz); + else + RTE_LOG(INFO, USER1, + "%zu bytes read from file %s\n", + test_data->input_data_sz, test_data->input_file); + + ret = 0; + +end: + fclose(f); + return ret; +} + +static void +comp_perf_cleanup_on_signal(int signalNumber __rte_unused) +{ + test_data->perf_comp_force_stop = 1; +} + +static void +comp_perf_register_cleanup_on_signal(void) +{ + signal(SIGTERM, comp_perf_cleanup_on_signal); + signal(SIGINT, comp_perf_cleanup_on_signal); +} + +int +main(int argc, char **argv) +{ + uint8_t level_idx = 0; + int ret, i; + void *ctx[RTE_MAX_LCORE] = {}; + uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS]; + int nb_compressdevs = 0; + uint16_t total_nb_qps = 0; + uint8_t cdev_id; + uint32_t lcore_id; + + /* Initialise DPDK EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n"); + argc -= ret; + argv += ret; + + test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data), + 0, rte_socket_id()); + + if (test_data == NULL) + rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n", + rte_socket_id()); + + comp_perf_register_cleanup_on_signal(); + + ret = EXIT_SUCCESS; + test_data->cleanup = ST_TEST_DATA; + comp_perf_options_default(test_data); + + if (comp_perf_options_parse(test_data, argc, argv) < 0) { + RTE_LOG(ERR, USER1, + "Parsing one or more user options failed\n"); + ret = EXIT_FAILURE; + goto end; + } + + if (comp_perf_options_check(test_data) < 0) { + ret = EXIT_FAILURE; + goto end; + } + + nb_compressdevs = + comp_perf_initialize_compressdev(test_data, enabled_cdevs); + + if (nb_compressdevs < 1) { + ret = EXIT_FAILURE; + goto end; + } + + test_data->cleanup = ST_COMPDEV; + if (comp_perf_dump_input_data(test_data) < 0) { + ret = EXIT_FAILURE; + goto end; + } + + test_data->cleanup = ST_INPUT_DATA; + + if (test_data->level_lst.inc != 0) + test_data->level = test_data->level_lst.min; + else + test_data->level = test_data->level_lst.list[0]; + + printf("\nApp uses socket: %u\n", rte_socket_id()); + printf("Burst size = %u\n", test_data->burst_sz); + printf("Input data size = %zu\n", test_data->input_data_sz); + if (test_data->test == CPERF_TEST_TYPE_PMDCC) + printf("Cycle-count delay = %u [us]\n", + test_data->cyclecount_delay); + + test_data->cleanup = ST_DURING_TEST; + total_nb_qps = nb_compressdevs * test_data->nb_qps; + + i = 0; + uint8_t qp_id = 0, cdev_index = 0; + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + if (i == total_nb_qps) + break; + + cdev_id = enabled_cdevs[cdev_index]; + ctx[i] = cperf_testmap[test_data->test].constructor( + cdev_id, qp_id, + test_data); + if (ctx[i] == NULL) { + RTE_LOG(ERR, USER1, "Test run constructor failed\n"); + goto end; + } + qp_id = (qp_id + 1) % test_data->nb_qps; + if (qp_id == 0) + cdev_index++; + i++; + } + + print_test_dynamics(test_data); + + while (test_data->level <= test_data->level_lst.max) { + + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + if (i == total_nb_qps) + break; + + rte_eal_remote_launch( + cperf_testmap[test_data->test].runner, + ctx[i], lcore_id); + i++; + } + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + if (i == total_nb_qps) + break; + ret |= rte_eal_wait_lcore(lcore_id); + i++; + } + + if (ret != EXIT_SUCCESS) + break; + + if (test_data->level_lst.inc != 0) + test_data->level += test_data->level_lst.inc; + else { + if (++level_idx == test_data->level_lst.count) + break; + test_data->level = test_data->level_lst.list[level_idx]; + } + } + +end: + switch (test_data->cleanup) { + + case ST_DURING_TEST: + i = 0; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (i == total_nb_qps) + break; + + if (ctx[i] && cperf_testmap[test_data->test].destructor) + cperf_testmap[test_data->test].destructor( + ctx[i]); + i++; + } + /* fallthrough */ + case ST_INPUT_DATA: + rte_free(test_data->input_data); + /* fallthrough */ + case ST_COMPDEV: + for (i = 0; i < nb_compressdevs && + i < RTE_COMPRESS_MAX_DEVS; i++) { + rte_compressdev_stop(enabled_cdevs[i]); + rte_compressdev_close(enabled_cdevs[i]); + } + /* fallthrough */ + case ST_TEST_DATA: + rte_free(test_data); + /* fallthrough */ + case ST_CLEAR: + default: + i = rte_eal_cleanup(); + if (i) { + RTE_LOG(ERR, USER1, + "Error from rte_eal_cleanup(), %d\n", i); + ret = i; + } + break; + } + return ret; +} + +__rte_weak void * +cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_cyclecount_test_destructor(void *arg __rte_unused) +{ + RTE_LOG(INFO, USER1, "Something wrong happened!!!\n"); +} + +__rte_weak int +cperf_cyclecount_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} + +__rte_weak void * +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_throughput_test_destructor(void *arg __rte_unused) +{ + +} + +__rte_weak int +cperf_throughput_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} +__rte_weak void * +cperf_verify_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Verify test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_verify_test_destructor(void *arg __rte_unused) +{ + +} + +__rte_weak int +cperf_verify_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} diff --git a/src/spdk/dpdk/app/test-compress-perf/meson.build b/src/spdk/dpdk/app/test-compress-perf/meson.build new file mode 100644 index 000000000..a1a484da9 --- /dev/null +++ b/src/spdk/dpdk/app/test-compress-perf/meson.build @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +sources = files('comp_perf_options_parse.c', + 'main.c', + 'comp_perf_test_verify.c', + 'comp_perf_test_throughput.c', + 'comp_perf_test_cyclecount.c', + 'comp_perf_test_common.c') +deps = ['compressdev'] |