diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/scripts/perf | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/scripts/perf')
-rw-r--r-- | src/spdk/scripts/perf/nvme/README | 12 | ||||
-rw-r--r-- | src/spdk/scripts/perf/nvme/fio_test.conf | 20 | ||||
-rwxr-xr-x | src/spdk/scripts/perf/nvme/run_fio_test.py | 166 | ||||
-rwxr-xr-x | src/spdk/scripts/perf/nvme/run_fio_test.sh | 19 | ||||
-rw-r--r-- | src/spdk/scripts/perf/nvmf/README.md | 159 | ||||
-rw-r--r-- | src/spdk/scripts/perf/nvmf/common.py | 42 | ||||
-rw-r--r-- | src/spdk/scripts/perf/nvmf/config.json | 37 | ||||
-rwxr-xr-x | src/spdk/scripts/perf/nvmf/run_nvmf.py | 941 | ||||
-rw-r--r-- | src/spdk/scripts/perf/vhost/fio_test.conf | 20 | ||||
-rw-r--r-- | src/spdk/scripts/perf/vhost/run_vhost_test.py | 219 |
10 files changed, 1635 insertions, 0 deletions
diff --git a/src/spdk/scripts/perf/nvme/README b/src/spdk/scripts/perf/nvme/README new file mode 100644 index 000000000..6468399a7 --- /dev/null +++ b/src/spdk/scripts/perf/nvme/README @@ -0,0 +1,12 @@ +These scripts are used to perform benchmark testing with fio. +The run_fio_test.py is the main script that runs the performance test and parses the test results. +Users can populate test parameters for different fio workloads in the lists (q_depth, io_size, workload_type, mix, core_mask and run_time) at the top of the run_fio_test.py script. +The run_fio_test.py puts the test results in a csv file named <hostname>_<num ssds>_perf_output.csv. +The run_fio_test.sh script demonstrates how to invoke the run_fio_test.py script with the +input parameters: path_to_fio_conf, path_to_ioengine and num_ssds. The run_fio_test.sh script will +call the SPDK setup.sh script to unbind NVMe devices from the kernel driver and bind them to the uio driver. +We include a sample fio configuration file that includes the parameters we use in our test environment. +The run_fio_test.py will append the NVMe devices to the end of the configuration file. The number of +NVMe devices used is specified using the num_ssds parameter. + +Usage: ./run_fio_test.sh diff --git a/src/spdk/scripts/perf/nvme/fio_test.conf b/src/spdk/scripts/perf/nvme/fio_test.conf new file mode 100644 index 000000000..a03c6a1e9 --- /dev/null +++ b/src/spdk/scripts/perf/nvme/fio_test.conf @@ -0,0 +1,20 @@ +[global] +ioengine=${IOENGINE} +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 +cpumask=1 +percentile_list=50:90:99:99.5:99.9:99.99:99.999 + +[perf_test] +stonewall +description="Run NVMe driver performance test for a given workload" +bs=${BLK_SIZE} +rw=${RW} +rwmixread=${MIX} +iodepth=${IODEPTH} +time_based=1 +ramp_time=10s +runtime=${RUNTIME} diff --git a/src/spdk/scripts/perf/nvme/run_fio_test.py b/src/spdk/scripts/perf/nvme/run_fio_test.py new file mode 100755 index 000000000..79d9e566d --- /dev/null +++ b/src/spdk/scripts/perf/nvme/run_fio_test.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +# This script runs fio benchmark test on the local nvme device using the SPDK NVMe driver. +# Prework: Run script/setup.sh to bind SSDs to SPDK driver. +# Prework: Change any fio configurations in the template fio config file fio_test.conf +# Output: A csv file <hostname>_<num ssds>_perf_output.csv + +import subprocess +from subprocess import check_call, call, check_output, Popen, PIPE +import random +import os +import sys +import re +import signal +import getopt +from datetime import datetime +from itertools import * +import csv +import itertools +from shutil import copyfile +import json + +# Populate test parameters into these lists to run different workloads +# The configuration below runs QD 1 & 128. To add QD 32 set q_depth=['1', '32', '128'] +q_depth = ['1', '128'] +# io_size specifies the size in bytes of the IO workload. +# To add 64K IOs set io_size = ['4096', '65536'] +io_size = ['4096'] +workload_type = ['randrw'] +mix = ['100'] +core_mask = ['0x1'] +# run_time parameter specifies how long to run each test. +# Set run_time = ['600'] to run the test for 10 minutes +run_time = ['60'] +# iter_num parameter is used to run the test multiple times. +# set iter_num = ['1', '2', '3'] to repeat each test 3 times +iter_num = ['1'] + + +def run_fio(io_size_bytes, qd, rw_mix, cpu_mask, run_num, workload, run_time_sec): + print("Running Test: IO Size={} QD={} Mix={} CPU Mask={}".format(io_size_bytes, qd, rw_mix, cpu_mask)) + string = "s_" + str(io_size_bytes) + "_q_" + str(qd) + "_m_" + str(rw_mix) + "_c_" + str(cpu_mask) + "_run_" + str(run_num) + + # Call fio + path_to_fio_conf = config_file_for_test + path_to_ioengine = sys.argv[2] + command = "BLK_SIZE=" + str(io_size_bytes) + " RW=" + str(workload) + " MIX=" + str(rw_mix) \ + + " IODEPTH=" + str(qd) + " RUNTIME=" + str(run_time_sec) + " IOENGINE=" + path_to_ioengine \ + + " fio " + str(path_to_fio_conf) + " -output=" + string + " -output-format=json" + output = subprocess.check_output(command, shell=True) + + print("Finished Test: IO Size={} QD={} Mix={} CPU Mask={}".format(io_size_bytes, qd, rw_mix, cpu_mask)) + return + + +def parse_results(io_size_bytes, qd, rw_mix, cpu_mask, run_num, workload, run_time_sec): + results_array = [] + + # If json file has results for multiple fio jobs pick the results from the right job + job_pos = 0 + + # generate the next result line that will be added to the output csv file + results = str(io_size_bytes) + "," + str(qd) + "," + str(rw_mix) + "," \ + + str(workload) + "," + str(cpu_mask) + "," + str(run_time_sec) + "," + str(run_num) + + # Read the results of this run from the test result file + string = "s_" + str(io_size_bytes) + "_q_" + str(qd) + "_m_" + str(rw_mix) + "_c_" + str(cpu_mask) + "_run_" + str(run_num) + with open(string) as json_file: + data = json.load(json_file) + job_name = data['jobs'][job_pos]['jobname'] + # print "FIO job name: ", job_name + if 'lat_ns' in data['jobs'][job_pos]['read']: + lat = 'lat_ns' + lat_units = 'ns' + else: + lat = 'lat' + lat_units = 'us' + read_iops = float(data['jobs'][job_pos]['read']['iops']) + read_bw = float(data['jobs'][job_pos]['read']['bw']) + read_avg_lat = float(data['jobs'][job_pos]['read'][lat]['mean']) + read_min_lat = float(data['jobs'][job_pos]['read'][lat]['min']) + read_max_lat = float(data['jobs'][job_pos]['read'][lat]['max']) + write_iops = float(data['jobs'][job_pos]['write']['iops']) + write_bw = float(data['jobs'][job_pos]['write']['bw']) + write_avg_lat = float(data['jobs'][job_pos]['write'][lat]['mean']) + write_min_lat = float(data['jobs'][job_pos]['write'][lat]['min']) + write_max_lat = float(data['jobs'][job_pos]['write'][lat]['max']) + print("%-10s" % "IO Size", "%-10s" % "QD", "%-10s" % "Mix", + "%-10s" % "Workload Type", "%-10s" % "CPU Mask", + "%-10s" % "Run Time", "%-10s" % "Run Num", + "%-15s" % "Read IOps", + "%-10s" % "Read MBps", "%-15s" % "Read Avg. Lat(" + lat_units + ")", + "%-15s" % "Read Min. Lat(" + lat_units + ")", "%-15s" % "Read Max. Lat(" + lat_units + ")", + "%-15s" % "Write IOps", + "%-10s" % "Write MBps", "%-15s" % "Write Avg. Lat(" + lat_units + ")", + "%-15s" % "Write Min. Lat(" + lat_units + ")", "%-15s" % "Write Max. Lat(" + lat_units + ")") + print("%-10s" % io_size_bytes, "%-10s" % qd, "%-10s" % rw_mix, + "%-10s" % workload, "%-10s" % cpu_mask, "%-10s" % run_time_sec, + "%-10s" % run_num, "%-15s" % read_iops, "%-10s" % read_bw, + "%-15s" % read_avg_lat, "%-15s" % read_min_lat, "%-15s" % read_max_lat, + "%-15s" % write_iops, "%-10s" % write_bw, "%-15s" % write_avg_lat, + "%-15s" % write_min_lat, "%-15s" % write_max_lat) + results = results + "," + str(read_iops) + "," + str(read_bw) + "," \ + + str(read_avg_lat) + "," + str(read_min_lat) + "," + str(read_max_lat) \ + + "," + str(write_iops) + "," + str(write_bw) + "," + str(write_avg_lat) \ + + "," + str(write_min_lat) + "," + str(write_max_lat) + with open(result_file_name, "a") as result_file: + result_file.write(results + "\n") + results_array = [] + return + + +def get_nvme_devices_count(): + output = check_output('lspci | grep -i Non | wc -l', shell=True) + return int(output) + + +def get_nvme_devices_bdf(): + output = check_output('lspci | grep -i Non | awk \'{print $1}\'', shell=True).decode("utf-8") + output = output.split() + return output + + +def add_filename_to_conf(conf_file_name, bdf): + filestring = "filename=trtype=PCIe traddr=0000." + bdf.replace(":", ".") + " ns=1" + with open(conf_file_name, "a") as conf_file: + conf_file.write(filestring + "\n") + + +if len(sys.argv) != 4: + print("usage: " % sys.argv[0] % " path_to_fio_conf path_to_ioengine num_ssds") + sys.exit() + +num_ssds = int(sys.argv[3]) +if num_ssds > get_nvme_devices_count(): + print("System does not have {} NVMe SSDs.".format(num_ssds)) + sys.exit() + +host_name = os.uname()[1] +result_file_name = host_name + "_" + sys.argv[3] + "ssds_perf_output.csv" + +bdf = get_nvme_devices_bdf() +config_file_for_test = sys.argv[1] + "_" + sys.argv[3] + "ssds" +copyfile(sys.argv[1], config_file_for_test) + +# Add the number of threads to the fio config file +with open(config_file_for_test, "a") as conf_file: + conf_file.write("numjobs=" + str(1) + "\n") + +# Add the NVMe bdf to the fio config file +for i in range(0, num_ssds): + add_filename_to_conf(config_file_for_test, bdf[i]) + +# Set up for output +columns = "IO_Size,Q_Depth,Workload_Mix,Workload_Type,Core_Mask,Run_Time,Run,Read_IOPS,Read_bw(KiB/s), \ + Read_Avg_lat(us),Read_Min_Lat(us),Read_Max_Lat(us),Write_IOPS,Write_bw(KiB/s),Write_Avg_lat(us), \ + Write_Min_Lat(us),Write_Max_Lat(us)" + +with open(result_file_name, "w+") as result_file: + result_file.write(columns + "\n") + +for i, (s, q, m, w, c, t) in enumerate(itertools.product(io_size, q_depth, mix, workload_type, core_mask, run_time)): + run_fio(s, q, m, c, i, w, t) + parse_results(s, q, m, c, i, w, t) + +result_file.close() diff --git a/src/spdk/scripts/perf/nvme/run_fio_test.sh b/src/spdk/scripts/perf/nvme/run_fio_test.sh new file mode 100755 index 000000000..bc888d0f4 --- /dev/null +++ b/src/spdk/scripts/perf/nvme/run_fio_test.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) + +# Bind devices to NVMe driver +$rootdir/scripts/setup.sh + +# Run Performance Test with 1 SSD +$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/build/fio/spdk_nvme 1 + +# 2 SSDs test run +$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/build/fio/spdk_nvme 2 + +# 4 SSDs test run +$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/build/fio/spdk_nvme 4 + +# 8 SSDs test run +$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/build/fio/spdk_nvme 8 diff --git a/src/spdk/scripts/perf/nvmf/README.md b/src/spdk/scripts/perf/nvmf/README.md new file mode 100644 index 000000000..6cd65790e --- /dev/null +++ b/src/spdk/scripts/perf/nvmf/README.md @@ -0,0 +1,159 @@ +## Running NVMe-OF Performace Testcases + +In order to reproduce test cases described in [SPDK NVMe-OF Performance Test Cases](https://ci.spdk.io/download/performance-reports/SPDK_nvmeof_perf_report_18.04.pdf) follow the following instructions. + +Currently RDMA NIC IP address assignment must be done manually before running the tests. + +# Prepare the configuration file + +Configure the target, initiators, and FIO workload in the json configuration file. + +## General + +Options which apply to both target and all initiator servers such as "password" and "username" fields. +All servers are required to have the same user credentials for running the test. +Test results can be found in /tmp/results directory. + +### transport + +Transport layer to use between Target and Initiator servers - rdma or tcp. + +## Target + +Configure the target server information. + +### nic_ips + +List of IP addresses othat will be used in this test.. +NVMe namespaces will be split between provided IP addresses. +So for example providing 2 IP's with 16 NVMe drives present will result in each IP managing +8 NVMe subystems. + +### mode + +"spdk" or "kernel" values allowed. + +### use_null_block + +Use null block device instead of present NVMe drives. Used for latency measurements as described +in Test Case 3 of performance report. + +### num_cores + +List of CPU cores to assign for running SPDK NVMe-OF Target process. Can specify exact core numbers or ranges, eg: +[0, 1, 10-15]. + +### nvmet_bin + +Path to nvmetcli application executable. If not provided then system-wide package will be used +by default. Not used if "mode" is set to "spdk". + +### num_shared_buffers + +Number of shared buffers to use when creating transport layer. + +## Initiator + +Describes initiator arguments. There can be more than one initiator section in the configuration file. +For the sake of easier results parsing from multiple initiators please use only digits and letters +in initiator section name. + +### ip + +Management IP address used for SSH communication with initiator server. + +### nic_ips + +List of target IP addresses to which the initiator should try to connect. + +### mode + +"spdk" or "kernel" values allowed. + +### cpus_allowed + +List of CPU cores to assign for running SPDK NVMe-OF initiator process. +Can specify exact core numbers: 0,5 +or ranges: 10-15 +or binding to CPUs 0, 5, and 8 to 15: `cpus_allowed=0,5,8-15`. +If not specified then will use num_cores option. +If specified with num_cores then cpu_allowed parameter has higher priority than num_cores. + +### num_cores + +Applies only to SPDK initiator. Number of CPUs core to use for running FIO job. +If not specified then by default each connected subsystem gets its own CPU core. + +### nvmecli_dir + +Path to directory with nvme-cli application. If not provided then system-wide package will be used +by default. Not used if "mode" is set to "spdk". + +### fio_bin + +Path to the fio binary that will be used to compile SPDK and run the test. +If not specified, then the script will use /usr/src/fio/fio as the default. + +### extra_params + +Space separated string with additional settings for "nvme connect" command +other than -t, -s, -n and -a. + +## fio + +Fio job parameters. + +- bs: block size +- qd: io depth - Per connected fio filename target +- rw: workload mode +- rwmixread: percentage of reads in readwrite workloads +- run_time: time (in seconds) to run workload +- ramp_time: time (in seconds) to run workload before statistics are gathered +- run_num: how many times to run given workload in loop + +# Running Test + +Before running the test script use the setup.sh script to bind the devices you want to +use in the test to the VFIO/UIO driver. +Run the script on the NVMe-oF target system: + + cd spdk + sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py +The script uses the config.json configuration file in the scripts/perf/nvmf directory by default. You can +specify a different configuration file at runtime as shown below: +sudo PYTHONPATH=$PYTHONPATH:$PWD/scripts scripts/perf/nvmf/run_nvmf.py /path/to/config file/json config file + +The script uses another spdk script (scripts/rpc.py) so we pass the path to rpc.py by setting the Python path +as a runtime environment parameter. + +# Test Results + +When the test completes, you will find a csv file (nvmf_results.csv) containing the results in the target node +directory /tmp/results. + +# Processor Counter Monitor (PCM) +PCM Tools provides a number of command-line utilities for real-time monitoring. +Before using PCM Tools in nvmf perf scripts it needs to be installed on Target machine. +PCM source and instructions are available on https://github.com/opcm/pcm. +To enable PCM in perf test you need to add Target setting in config.json file: +``` +"pcm_settings": ["pcm_directory", "measure_cpu", "measure_memory", delay_time, measure_interval, sample_count] +``` +example: +``` +"pcm_settings": ["/tmp/pcm", true, true, 10, 1, 30] +``` +Example above will run PCM measure for cpu and memory, with start delay 10s, sample every 1 second, +and 30 samples for cpu measure. PCM memory do not support sample count. + +# Bandwidth monitor (bwm-ng) +PCM Tools provides a number of command-line utilities for real-time monitoring. +Before using bwm-ng in nvmf perf scripts it needs to be installed on Target machine. +To enable bandwidth monitor in perf test you need to add Target setting in config.json file: +``` +"bandwidth_settings": [bool, sample_count] +``` +example: +``` +"bandwidth_settings": [true, 30] +``` diff --git a/src/spdk/scripts/perf/nvmf/common.py b/src/spdk/scripts/perf/nvmf/common.py new file mode 100644 index 000000000..8c0d435f3 --- /dev/null +++ b/src/spdk/scripts/perf/nvmf/common.py @@ -0,0 +1,42 @@ +import os +import re +import json +from itertools import product, chain +from subprocess import check_output, Popen + + +def get_used_numa_nodes(): + used_numa_nodes = set() + for bdf in get_nvme_devices_bdf(): + with open("/sys/bus/pci/devices/%s/numa_node" % bdf, "r") as numa_file: + output = numa_file.read() + used_numa_nodes.add(int(output)) + return used_numa_nodes + + +def get_nvme_devices_count(): + output = get_nvme_devices_bdf() + return len(output) + + +def get_nvme_devices_bdf(): + print("Getting BDFs for NVMe section") + output = check_output("rootdir=$PWD; \ + source test/common/autotest_common.sh; \ + get_nvme_bdfs 01 08 02", + executable="/bin/bash", shell=True) + output = [str(x, encoding="utf-8") for x in output.split()] + print("Done getting BDFs") + return output + + +def get_nvme_devices(): + print("Getting kernel NVMe names") + output = check_output("lsblk -o NAME -nlp", shell=True).decode(encoding="utf-8") + output = [x for x in output.split("\n") if "nvme" in x] + print("Done getting kernel NVMe names") + return output + + +def nvmet_command(nvmet_bin, command): + return check_output("%s %s" % (nvmet_bin, command), shell=True).decode(encoding="utf-8") diff --git a/src/spdk/scripts/perf/nvmf/config.json b/src/spdk/scripts/perf/nvmf/config.json new file mode 100644 index 000000000..d8b16be93 --- /dev/null +++ b/src/spdk/scripts/perf/nvmf/config.json @@ -0,0 +1,37 @@ +{ + "general": { + "username": "uname", + "password": "pass", + "transport": "transport_type" + }, + "target": { + "nic_ips": ["192.0.1.1", "192.0.2.1"], + "mode": "spdk", + "use_null_block": false, + "nvmet_dir": "/path/to/nvmetcli", + "num_cores": "1", + "num_shared_buffers": 4096 + }, + "initiator1": { + "ip": "10.0.0.1", + "nic_ips": ["192.0.1.1"], + "mode": "spdk", + "nvmecli_dir": "/path/to/nvmecli", + "fio_dir": "/path/to/fio binary", + "extra_params": "Extra nvme connect params" + }, + "initiator2": { + "ip": "10.0.0.2", + "nic_ips": ["192.0.2.1"], + "mode": "spdk" + }, + "fio": { + "bs": ["4k"], + "qd": [128], + "rw": ["randrw"], + "rwmixread": 100, + "run_time": 5, + "ramp_time": 1, + "run_num": 3 + } +} diff --git a/src/spdk/scripts/perf/nvmf/run_nvmf.py b/src/spdk/scripts/perf/nvmf/run_nvmf.py new file mode 100755 index 000000000..05b94ec09 --- /dev/null +++ b/src/spdk/scripts/perf/nvmf/run_nvmf.py @@ -0,0 +1,941 @@ +#!/usr/bin/env python3 + +import os +import re +import sys +import json +import paramiko +import zipfile +import threading +import subprocess +import itertools +import time +import uuid +import rpc +import rpc.client +import pandas as pd +from collections import OrderedDict +from common import * + + +class Server: + def __init__(self, name, username, password, mode, nic_ips, transport): + self.name = name + self.mode = mode + self.username = username + self.password = password + self.nic_ips = nic_ips + self.transport = transport.lower() + + if not re.match("^[A-Za-z0-9]*$", name): + self.log_print("Please use a name which contains only letters or numbers") + sys.exit(1) + + def log_print(self, msg): + print("[%s] %s" % (self.name, msg), flush=True) + + +class Target(Server): + def __init__(self, name, username, password, mode, nic_ips, transport="rdma", + use_null_block=False, sar_settings=None, pcm_settings=None, + bandwidth_settings=None): + + super(Target, self).__init__(name, username, password, mode, nic_ips, transport) + self.null_block = bool(use_null_block) + self.enable_sar = False + self.enable_pcm_memory = False + self.enable_pcm = False + self.enable_bandwidth = False + + if sar_settings: + self.enable_sar, self.sar_delay, self.sar_interval, self.sar_count = sar_settings + + if pcm_settings: + self.pcm_dir, self.enable_pcm, self.enable_pcm_memory, self.pcm_delay, self.pcm_interval, self.pcm_count = pcm_settings + + if bandwidth_settings: + self.enable_bandwidth, self.bandwidth_count = bandwidth_settings + + self.script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) + self.spdk_dir = os.path.abspath(os.path.join(self.script_dir, "../../../")) + + def zip_spdk_sources(self, spdk_dir, dest_file): + self.log_print("Zipping SPDK source directory") + fh = zipfile.ZipFile(dest_file, "w", zipfile.ZIP_DEFLATED) + for root, directories, files in os.walk(spdk_dir, followlinks=True): + for file in files: + fh.write(os.path.relpath(os.path.join(root, file))) + fh.close() + self.log_print("Done zipping") + + def read_json_stats(self, file): + with open(file, "r") as json_data: + data = json.load(json_data) + job_pos = 0 # job_post = 0 because using aggregated results + + # Check if latency is in nano or microseconds to choose correct dict key + def get_lat_unit(key_prefix, dict_section): + # key prefix - lat, clat or slat. + # dict section - portion of json containing latency bucket in question + # Return dict key to access the bucket and unit as string + for k, v in dict_section.items(): + if k.startswith(key_prefix): + return k, k.split("_")[1] + + read_iops = float(data["jobs"][job_pos]["read"]["iops"]) + read_bw = float(data["jobs"][job_pos]["read"]["bw"]) + lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["read"]) + read_avg_lat = float(data["jobs"][job_pos]["read"][lat_key]["mean"]) + read_min_lat = float(data["jobs"][job_pos]["read"][lat_key]["min"]) + read_max_lat = float(data["jobs"][job_pos]["read"][lat_key]["max"]) + clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["read"]) + read_p99_lat = float(data["jobs"][job_pos]["read"][clat_key]["percentile"]["99.000000"]) + read_p99_9_lat = float(data["jobs"][job_pos]["read"][clat_key]["percentile"]["99.900000"]) + read_p99_99_lat = float(data["jobs"][job_pos]["read"][clat_key]["percentile"]["99.990000"]) + read_p99_999_lat = float(data["jobs"][job_pos]["read"][clat_key]["percentile"]["99.999000"]) + + if "ns" in lat_unit: + read_avg_lat, read_min_lat, read_max_lat = [x / 1000 for x in [read_avg_lat, read_min_lat, read_max_lat]] + if "ns" in clat_unit: + read_p99_lat = read_p99_lat / 1000 + read_p99_9_lat = read_p99_9_lat / 1000 + read_p99_99_lat = read_p99_99_lat / 1000 + read_p99_999_lat = read_p99_999_lat / 1000 + + write_iops = float(data["jobs"][job_pos]["write"]["iops"]) + write_bw = float(data["jobs"][job_pos]["write"]["bw"]) + lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["write"]) + write_avg_lat = float(data["jobs"][job_pos]["write"][lat_key]["mean"]) + write_min_lat = float(data["jobs"][job_pos]["write"][lat_key]["min"]) + write_max_lat = float(data["jobs"][job_pos]["write"][lat_key]["max"]) + clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["write"]) + write_p99_lat = float(data["jobs"][job_pos]["write"][clat_key]["percentile"]["99.000000"]) + write_p99_9_lat = float(data["jobs"][job_pos]["write"][clat_key]["percentile"]["99.900000"]) + write_p99_99_lat = float(data["jobs"][job_pos]["write"][clat_key]["percentile"]["99.990000"]) + write_p99_999_lat = float(data["jobs"][job_pos]["write"][clat_key]["percentile"]["99.999000"]) + + if "ns" in lat_unit: + write_avg_lat, write_min_lat, write_max_lat = [x / 1000 for x in [write_avg_lat, write_min_lat, write_max_lat]] + if "ns" in clat_unit: + write_p99_lat = write_p99_lat / 1000 + write_p99_9_lat = write_p99_9_lat / 1000 + write_p99_99_lat = write_p99_99_lat / 1000 + write_p99_999_lat = write_p99_999_lat / 1000 + + return [read_iops, read_bw, read_avg_lat, read_min_lat, read_max_lat, + read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat, + write_iops, write_bw, write_avg_lat, write_min_lat, write_max_lat, + write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat] + + def parse_results(self, results_dir, initiator_count=None, run_num=None): + files = os.listdir(results_dir) + fio_files = filter(lambda x: ".fio" in x, files) + json_files = [x for x in files if ".json" in x] + + headers = ["read_iops", "read_bw", "read_avg_lat_us", "read_min_lat_us", "read_max_lat_us", + "read_p99_lat_us", "read_p99.9_lat_us", "read_p99.99_lat_us", "read_p99.999_lat_us", + "write_iops", "write_bw", "write_avg_lat_us", "write_min_lat_us", "write_max_lat_us", + "write_p99_lat_us", "write_p99.9_lat_us", "write_p99.99_lat_us", "write_p99.999_lat_us"] + + aggr_headers = ["iops", "bw", "avg_lat_us", "min_lat_us", "max_lat_us", + "p99_lat_us", "p99.9_lat_us", "p99.99_lat_us", "p99.999_lat_us"] + + header_line = ",".join(["Name", *headers]) + aggr_header_line = ",".join(["Name", *aggr_headers]) + + # Create empty results file + csv_file = "nvmf_results.csv" + with open(os.path.join(results_dir, csv_file), "w") as fh: + fh.write(aggr_header_line + "\n") + rows = set() + + for fio_config in fio_files: + self.log_print("Getting FIO stats for %s" % fio_config) + job_name, _ = os.path.splitext(fio_config) + + # Look in the filename for rwmixread value. Function arguments do + # not have that information. + # TODO: Improve this function by directly using workload params instead + # of regexing through filenames. + if "read" in job_name: + rw_mixread = 1 + elif "write" in job_name: + rw_mixread = 0 + else: + rw_mixread = float(re.search(r"m_(\d+)", job_name).group(1)) / 100 + + # If "_CPU" exists in name - ignore it + # Initiators for the same job could have diffrent num_cores parameter + job_name = re.sub(r"_\d+CPU", "", job_name) + job_result_files = [x for x in json_files if job_name in x] + self.log_print("Matching result files for current fio config:") + for j in job_result_files: + self.log_print("\t %s" % j) + + # There may have been more than 1 initiator used in test, need to check that + # Result files are created so that string after last "_" separator is server name + inits_names = set([os.path.splitext(x)[0].split("_")[-1] for x in job_result_files]) + inits_avg_results = [] + for i in inits_names: + self.log_print("\tGetting stats for initiator %s" % i) + # There may have been more than 1 test run for this job, calculate average results for initiator + i_results = [x for x in job_result_files if i in x] + i_results_filename = re.sub(r"run_\d+_", "", i_results[0].replace("json", "csv")) + + separate_stats = [] + for r in i_results: + stats = self.read_json_stats(os.path.join(results_dir, r)) + separate_stats.append(stats) + self.log_print(stats) + + init_results = [sum(x) for x in zip(*separate_stats)] + init_results = [x / len(separate_stats) for x in init_results] + inits_avg_results.append(init_results) + + self.log_print("\tAverage results for initiator %s" % i) + self.log_print(init_results) + with open(os.path.join(results_dir, i_results_filename), "w") as fh: + fh.write(header_line + "\n") + fh.write(",".join([job_name, *["{0:.3f}".format(x) for x in init_results]]) + "\n") + + # Sum results of all initiators running this FIO job. + # Latency results are an average of latencies from accros all initiators. + inits_avg_results = [sum(x) for x in zip(*inits_avg_results)] + inits_avg_results = OrderedDict(zip(headers, inits_avg_results)) + for key in inits_avg_results: + if "lat" in key: + inits_avg_results[key] /= len(inits_names) + + # Aggregate separate read/write values into common labels + # Take rw_mixread into consideration for mixed read/write workloads. + aggregate_results = OrderedDict() + for h in aggr_headers: + read_stat, write_stat = [float(value) for key, value in inits_avg_results.items() if h in key] + if "lat" in h: + _ = rw_mixread * read_stat + (1 - rw_mixread) * write_stat + else: + _ = read_stat + write_stat + aggregate_results[h] = "{0:.3f}".format(_) + + rows.add(",".join([job_name, *aggregate_results.values()])) + + # Save results to file + for row in rows: + with open(os.path.join(results_dir, csv_file), "a") as fh: + fh.write(row + "\n") + self.log_print("You can find the test results in the file %s" % os.path.join(results_dir, csv_file)) + + def measure_sar(self, results_dir, sar_file_name): + self.log_print("Waiting %d delay before measuring SAR stats" % self.sar_delay) + time.sleep(self.sar_delay) + out = subprocess.check_output("sar -P ALL %s %s" % (self.sar_interval, self.sar_count), shell=True).decode(encoding="utf-8") + with open(os.path.join(results_dir, sar_file_name), "w") as fh: + for line in out.split("\n"): + if "Average" in line and "CPU" in line: + self.log_print("Summary CPU utilization from SAR:") + self.log_print(line) + if "Average" in line and "all" in line: + self.log_print(line) + fh.write(out) + + def measure_pcm_memory(self, results_dir, pcm_file_name): + time.sleep(self.pcm_delay) + pcm_memory = subprocess.Popen("%s/pcm-memory.x %s -csv=%s/%s" % (self.pcm_dir, self.pcm_interval, + results_dir, pcm_file_name), shell=True) + time.sleep(self.pcm_count) + pcm_memory.kill() + + def measure_pcm(self, results_dir, pcm_file_name): + time.sleep(self.pcm_delay) + subprocess.run("%s/pcm.x %s -i=%s -csv=%s/%s" % (self.pcm_dir, self.pcm_interval, self.pcm_count, + results_dir, pcm_file_name), shell=True, check=True) + df = pd.read_csv(os.path.join(results_dir, pcm_file_name), header=[0, 1]) + df = df.rename(columns=lambda x: re.sub(r'Unnamed:[\w\s]*$', '', x)) + skt = df.loc[:, df.columns.get_level_values(1).isin({'UPI0', 'UPI1', 'UPI2'})] + skt_pcm_file_name = "_".join(["skt", pcm_file_name]) + skt.to_csv(os.path.join(results_dir, skt_pcm_file_name), index=False) + + def measure_bandwidth(self, results_dir, bandwidth_file_name): + bwm = subprocess.run("bwm-ng -o csv -F %s/%s -a 1 -t 1000 -c %s" % (results_dir, bandwidth_file_name, + self.bandwidth_count), shell=True, check=True) + + +class Initiator(Server): + def __init__(self, name, username, password, mode, nic_ips, ip, transport="rdma", cpu_frequency=None, + nvmecli_bin="nvme", workspace="/tmp/spdk", cpus_allowed=None, + cpus_allowed_policy="shared", fio_bin="/usr/src/fio/fio"): + + super(Initiator, self).__init__(name, username, password, mode, nic_ips, transport) + + self.ip = ip + self.spdk_dir = workspace + if os.getenv('SPDK_WORKSPACE'): + self.spdk_dir = os.getenv('SPDK_WORKSPACE') + self.fio_bin = fio_bin + self.cpus_allowed = cpus_allowed + self.cpus_allowed_policy = cpus_allowed_policy + self.cpu_frequency = cpu_frequency + self.nvmecli_bin = nvmecli_bin + self.ssh_connection = paramiko.SSHClient() + self.ssh_connection.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + self.ssh_connection.connect(self.ip, username=self.username, password=self.password) + self.remote_call("sudo rm -rf %s/nvmf_perf" % self.spdk_dir) + self.remote_call("mkdir -p %s" % self.spdk_dir) + self.set_cpu_frequency() + + def __del__(self): + self.ssh_connection.close() + + def put_file(self, local, remote_dest): + ftp = self.ssh_connection.open_sftp() + ftp.put(local, remote_dest) + ftp.close() + + def get_file(self, remote, local_dest): + ftp = self.ssh_connection.open_sftp() + ftp.get(remote, local_dest) + ftp.close() + + def remote_call(self, cmd): + stdin, stdout, stderr = self.ssh_connection.exec_command(cmd) + out = stdout.read().decode(encoding="utf-8") + err = stderr.read().decode(encoding="utf-8") + return out, err + + def copy_result_files(self, dest_dir): + self.log_print("Copying results") + + if not os.path.exists(dest_dir): + os.mkdir(dest_dir) + + # Get list of result files from initiator and copy them back to target + stdout, stderr = self.remote_call("ls %s/nvmf_perf" % self.spdk_dir) + file_list = stdout.strip().split("\n") + + for file in file_list: + self.get_file(os.path.join(self.spdk_dir, "nvmf_perf", file), + os.path.join(dest_dir, file)) + self.log_print("Done copying results") + + def discover_subsystems(self, address_list, subsys_no): + num_nvmes = range(0, subsys_no) + nvme_discover_output = "" + for ip, subsys_no in itertools.product(address_list, num_nvmes): + self.log_print("Trying to discover: %s:%s" % (ip, 4420 + subsys_no)) + nvme_discover_cmd = ["sudo", + "%s" % self.nvmecli_bin, + "discover", "-t %s" % self.transport, + "-s %s" % (4420 + subsys_no), + "-a %s" % ip] + nvme_discover_cmd = " ".join(nvme_discover_cmd) + + stdout, stderr = self.remote_call(nvme_discover_cmd) + if stdout: + nvme_discover_output = nvme_discover_output + stdout + + subsystems = re.findall(r'trsvcid:\s(\d+)\s+' # get svcid number + r'subnqn:\s+([a-zA-Z0-9\.\-\:]+)\s+' # get NQN id + r'traddr:\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', # get IP address + nvme_discover_output) # from nvme discovery output + subsystems = filter(lambda x: x[-1] in address_list, subsystems) + subsystems = list(set(subsystems)) + subsystems.sort(key=lambda x: x[1]) + self.log_print("Found matching subsystems on target side:") + for s in subsystems: + self.log_print(s) + + return subsystems + + def gen_fio_config(self, rw, rwmixread, block_size, io_depth, subsys_no, num_jobs=None, ramp_time=0, run_time=10): + fio_conf_template = """ +[global] +ioengine={ioengine} +{spdk_conf} +thread=1 +group_reporting=1 +direct=1 +percentile_list=50:90:99:99.5:99.9:99.99:99.999 + +norandommap=1 +rw={rw} +rwmixread={rwmixread} +bs={block_size} +time_based=1 +ramp_time={ramp_time} +runtime={run_time} +""" + if "spdk" in self.mode: + subsystems = self.discover_subsystems(self.nic_ips, subsys_no) + bdev_conf = self.gen_spdk_bdev_conf(subsystems) + self.remote_call("echo '%s' > %s/bdev.conf" % (bdev_conf, self.spdk_dir)) + ioengine = "%s/build/fio/spdk_bdev" % self.spdk_dir + spdk_conf = "spdk_conf=%s/bdev.conf" % self.spdk_dir + else: + ioengine = "libaio" + spdk_conf = "" + out, err = self.remote_call("sudo nvme list | grep -E 'SPDK|Linux' | awk '{print $1}'") + subsystems = [x for x in out.split("\n") if "nvme" in x] + + if self.cpus_allowed is not None: + self.log_print("Limiting FIO workload execution on specific cores %s" % self.cpus_allowed) + cpus_num = 0 + cpus = self.cpus_allowed.split(",") + for cpu in cpus: + if "-" in cpu: + a, b = cpu.split("-") + a = int(a) + b = int(b) + cpus_num += len(range(a, b)) + else: + cpus_num += 1 + threads = range(0, cpus_num) + elif hasattr(self, 'num_cores'): + self.log_print("Limiting FIO workload execution to %s cores" % self.num_cores) + threads = range(0, int(self.num_cores)) + else: + threads = range(0, len(subsystems)) + + if "spdk" in self.mode: + filename_section = self.gen_fio_filename_conf(subsystems, threads, io_depth, num_jobs) + else: + filename_section = self.gen_fio_filename_conf(threads, io_depth, num_jobs) + + fio_config = fio_conf_template.format(ioengine=ioengine, spdk_conf=spdk_conf, + rw=rw, rwmixread=rwmixread, block_size=block_size, + ramp_time=ramp_time, run_time=run_time) + if num_jobs: + fio_config = fio_config + "numjobs=%s \n" % num_jobs + if self.cpus_allowed is not None: + fio_config = fio_config + "cpus_allowed=%s \n" % self.cpus_allowed + fio_config = fio_config + "cpus_allowed_policy=%s \n" % self.cpus_allowed_policy + fio_config = fio_config + filename_section + + fio_config_filename = "%s_%s_%s_m_%s" % (block_size, io_depth, rw, rwmixread) + if hasattr(self, "num_cores"): + fio_config_filename += "_%sCPU" % self.num_cores + fio_config_filename += ".fio" + + self.remote_call("mkdir -p %s/nvmf_perf" % self.spdk_dir) + self.remote_call("echo '%s' > %s/nvmf_perf/%s" % (fio_config, self.spdk_dir, fio_config_filename)) + self.log_print("Created FIO Config:") + self.log_print(fio_config) + + return os.path.join(self.spdk_dir, "nvmf_perf", fio_config_filename) + + def set_cpu_frequency(self): + if self.cpu_frequency is not None: + try: + self.remote_call('sudo cpupower frequency-set -g userspace') + self.remote_call('sudo cpupower frequency-set -f %s' % self.cpu_frequency) + cmd = "sudo cpupower frequency-info" + output, error = self.remote_call(cmd) + self.log_print(output) + self.log_print(error) + except Exception: + self.log_print("ERROR: cpu_frequency will not work when intel_pstate is enabled!") + sys.exit() + else: + self.log_print("WARNING: you have disabled intel_pstate and using default cpu governance.") + + def run_fio(self, fio_config_file, run_num=None): + job_name, _ = os.path.splitext(fio_config_file) + self.log_print("Starting FIO run for job: %s" % job_name) + self.log_print("Using FIO: %s" % self.fio_bin) + + if run_num: + for i in range(1, run_num + 1): + output_filename = job_name + "_run_" + str(i) + "_" + self.name + ".json" + cmd = "sudo %s %s --output-format=json --output=%s" % (self.fio_bin, fio_config_file, output_filename) + output, error = self.remote_call(cmd) + self.log_print(output) + self.log_print(error) + else: + output_filename = job_name + "_" + self.name + ".json" + cmd = "sudo %s %s --output-format=json --output=%s" % (self.fio_bin, fio_config_file, output_filename) + output, error = self.remote_call(cmd) + self.log_print(output) + self.log_print(error) + self.log_print("FIO run finished. Results in: %s" % output_filename) + + +class KernelTarget(Target): + def __init__(self, name, username, password, mode, nic_ips, transport="rdma", + use_null_block=False, sar_settings=None, pcm_settings=None, + bandwidth_settings=None, nvmet_bin="nvmetcli", **kwargs): + + super(KernelTarget, self).__init__(name, username, password, mode, nic_ips, transport, + use_null_block, sar_settings, pcm_settings, bandwidth_settings) + self.nvmet_bin = nvmet_bin + + def __del__(self): + nvmet_command(self.nvmet_bin, "clear") + + def kernel_tgt_gen_nullblock_conf(self, address): + nvmet_cfg = { + "ports": [], + "hosts": [], + "subsystems": [], + } + + nvmet_cfg["subsystems"].append({ + "allowed_hosts": [], + "attr": { + "allow_any_host": "1", + "serial": "SPDK0001", + "version": "1.3" + }, + "namespaces": [ + { + "device": { + "path": "/dev/nullb0", + "uuid": "%s" % uuid.uuid4() + }, + "enable": 1, + "nsid": 1 + } + ], + "nqn": "nqn.2018-09.io.spdk:cnode1" + }) + + nvmet_cfg["ports"].append({ + "addr": { + "adrfam": "ipv4", + "traddr": address, + "trsvcid": "4420", + "trtype": "%s" % self.transport, + }, + "portid": 1, + "referrals": [], + "subsystems": ["nqn.2018-09.io.spdk:cnode1"] + }) + with open("kernel.conf", 'w') as fh: + fh.write(json.dumps(nvmet_cfg, indent=2)) + + def kernel_tgt_gen_subsystem_conf(self, nvme_list, address_list): + + nvmet_cfg = { + "ports": [], + "hosts": [], + "subsystems": [], + } + + # Split disks between NIC IP's + disks_per_ip = int(len(nvme_list) / len(address_list)) + disk_chunks = [nvme_list[i * disks_per_ip:disks_per_ip + disks_per_ip * i] for i in range(0, len(address_list))] + + subsys_no = 1 + port_no = 0 + for ip, chunk in zip(address_list, disk_chunks): + for disk in chunk: + nvmet_cfg["subsystems"].append({ + "allowed_hosts": [], + "attr": { + "allow_any_host": "1", + "serial": "SPDK00%s" % subsys_no, + "version": "1.3" + }, + "namespaces": [ + { + "device": { + "path": disk, + "uuid": "%s" % uuid.uuid4() + }, + "enable": 1, + "nsid": subsys_no + } + ], + "nqn": "nqn.2018-09.io.spdk:cnode%s" % subsys_no + }) + + nvmet_cfg["ports"].append({ + "addr": { + "adrfam": "ipv4", + "traddr": ip, + "trsvcid": "%s" % (4420 + port_no), + "trtype": "%s" % self.transport + }, + "portid": subsys_no, + "referrals": [], + "subsystems": ["nqn.2018-09.io.spdk:cnode%s" % subsys_no] + }) + subsys_no += 1 + port_no += 1 + + with open("kernel.conf", "w") as fh: + fh.write(json.dumps(nvmet_cfg, indent=2)) + pass + + def tgt_start(self): + self.log_print("Configuring kernel NVMeOF Target") + + if self.null_block: + print("Configuring with null block device.") + if len(self.nic_ips) > 1: + print("Testing with null block limited to single RDMA NIC.") + print("Please specify only 1 IP address.") + exit(1) + self.subsys_no = 1 + self.kernel_tgt_gen_nullblock_conf(self.nic_ips[0]) + else: + print("Configuring with NVMe drives.") + nvme_list = get_nvme_devices() + self.kernel_tgt_gen_subsystem_conf(nvme_list, self.nic_ips) + self.subsys_no = len(nvme_list) + + nvmet_command(self.nvmet_bin, "clear") + nvmet_command(self.nvmet_bin, "restore kernel.conf") + self.log_print("Done configuring kernel NVMeOF Target") + + +class SPDKTarget(Target): + + def __init__(self, name, username, password, mode, nic_ips, transport="rdma", + use_null_block=False, sar_settings=None, pcm_settings=None, + bandwidth_settings=None, num_shared_buffers=4096, num_cores=1, **kwargs): + + super(SPDKTarget, self).__init__(name, username, password, mode, nic_ips, transport, + use_null_block, sar_settings, pcm_settings, bandwidth_settings) + self.num_cores = num_cores + self.num_shared_buffers = num_shared_buffers + + def spdk_tgt_configure(self): + self.log_print("Configuring SPDK NVMeOF target via RPC") + numa_list = get_used_numa_nodes() + + # Create RDMA transport layer + rpc.nvmf.nvmf_create_transport(self.client, trtype=self.transport, num_shared_buffers=self.num_shared_buffers) + self.log_print("SPDK NVMeOF transport layer:") + rpc.client.print_dict(rpc.nvmf.nvmf_get_transports(self.client)) + + if self.null_block: + nvme_section = self.spdk_tgt_add_nullblock() + subsystems_section = self.spdk_tgt_add_subsystem_conf(self.nic_ips, req_num_disks=1) + else: + nvme_section = self.spdk_tgt_add_nvme_conf() + subsystems_section = self.spdk_tgt_add_subsystem_conf(self.nic_ips) + self.log_print("Done configuring SPDK NVMeOF Target") + + def spdk_tgt_add_nullblock(self): + self.log_print("Adding null block bdev to config via RPC") + rpc.bdev.bdev_null_create(self.client, 102400, 4096, "Nvme0n1") + self.log_print("SPDK Bdevs configuration:") + rpc.client.print_dict(rpc.bdev.bdev_get_bdevs(self.client)) + + def spdk_tgt_add_nvme_conf(self, req_num_disks=None): + self.log_print("Adding NVMe bdevs to config via RPC") + + bdfs = get_nvme_devices_bdf() + bdfs = [b.replace(":", ".") for b in bdfs] + + if req_num_disks: + if req_num_disks > len(bdfs): + self.log_print("ERROR: Requested number of disks is more than available %s" % len(bdfs)) + sys.exit(1) + else: + bdfs = bdfs[0:req_num_disks] + + for i, bdf in enumerate(bdfs): + rpc.bdev.bdev_nvme_attach_controller(self.client, name="Nvme%s" % i, trtype="PCIe", traddr=bdf) + + self.log_print("SPDK Bdevs configuration:") + rpc.client.print_dict(rpc.bdev.bdev_get_bdevs(self.client)) + + def spdk_tgt_add_subsystem_conf(self, ips=None, req_num_disks=None): + self.log_print("Adding subsystems to config") + if not req_num_disks: + req_num_disks = get_nvme_devices_count() + + # Distribute bdevs between provided NICs + num_disks = range(0, req_num_disks) + if len(num_disks) == 1: + disks_per_ip = 1 + else: + disks_per_ip = int(len(num_disks) / len(ips)) + disk_chunks = [num_disks[i * disks_per_ip:disks_per_ip + disks_per_ip * i] for i in range(0, len(ips))] + + # Create subsystems, add bdevs to namespaces, add listeners + for ip, chunk in zip(ips, disk_chunks): + for c in chunk: + nqn = "nqn.2018-09.io.spdk:cnode%s" % c + serial = "SPDK00%s" % c + bdev_name = "Nvme%sn1" % c + rpc.nvmf.nvmf_create_subsystem(self.client, nqn, serial, + allow_any_host=True, max_namespaces=8) + rpc.nvmf.nvmf_subsystem_add_ns(self.client, nqn, bdev_name) + + rpc.nvmf.nvmf_subsystem_add_listener(self.client, nqn, + trtype=self.transport, + traddr=ip, + trsvcid="4420", + adrfam="ipv4") + + self.log_print("SPDK NVMeOF subsystem configuration:") + rpc.client.print_dict(rpc.nvmf.nvmf_get_subsystems(self.client)) + + def tgt_start(self): + if self.null_block: + self.subsys_no = 1 + else: + self.subsys_no = get_nvme_devices_count() + self.log_print("Starting SPDK NVMeOF Target process") + nvmf_app_path = os.path.join(self.spdk_dir, "build/bin/nvmf_tgt") + command = " ".join([nvmf_app_path, "-m", self.num_cores]) + proc = subprocess.Popen(command, shell=True) + self.pid = os.path.join(self.spdk_dir, "nvmf.pid") + + with open(self.pid, "w") as fh: + fh.write(str(proc.pid)) + self.nvmf_proc = proc + self.log_print("SPDK NVMeOF Target PID=%s" % self.pid) + self.log_print("Waiting for spdk to initilize...") + while True: + if os.path.exists("/var/tmp/spdk.sock"): + break + time.sleep(1) + self.client = rpc.client.JSONRPCClient("/var/tmp/spdk.sock") + + self.spdk_tgt_configure() + + def __del__(self): + if hasattr(self, "nvmf_proc"): + try: + self.nvmf_proc.terminate() + self.nvmf_proc.wait() + except Exception as e: + self.log_print(e) + self.nvmf_proc.kill() + self.nvmf_proc.communicate() + + +class KernelInitiator(Initiator): + def __init__(self, name, username, password, mode, nic_ips, ip, transport, + cpus_allowed=None, cpus_allowed_policy="shared", + cpu_frequency=None, fio_bin="/usr/src/fio/fio", **kwargs): + + super(KernelInitiator, self).__init__(name, username, password, mode, nic_ips, ip, transport, + cpus_allowed=cpus_allowed, cpus_allowed_policy=cpus_allowed_policy, + cpu_frequency=cpu_frequency, fio_bin=fio_bin) + + self.extra_params = "" + if kwargs["extra_params"]: + self.extra_params = kwargs["extra_params"] + + def __del__(self): + self.ssh_connection.close() + + def kernel_init_connect(self, address_list, subsys_no): + subsystems = self.discover_subsystems(address_list, subsys_no) + self.log_print("Below connection attempts may result in error messages, this is expected!") + for subsystem in subsystems: + self.log_print("Trying to connect %s %s %s" % subsystem) + self.remote_call("sudo %s connect -t %s -s %s -n %s -a %s %s" % (self.nvmecli_bin, + self.transport, + *subsystem, + self.extra_params)) + time.sleep(2) + + def kernel_init_disconnect(self, address_list, subsys_no): + subsystems = self.discover_subsystems(address_list, subsys_no) + for subsystem in subsystems: + self.remote_call("sudo %s disconnect -n %s" % (self.nvmecli_bin, subsystem[1])) + time.sleep(1) + + def gen_fio_filename_conf(self, threads, io_depth, num_jobs=1): + out, err = self.remote_call("sudo nvme list | grep -E 'SPDK|Linux' | awk '{print $1}'") + nvme_list = [x for x in out.split("\n") if "nvme" in x] + + filename_section = "" + nvme_per_split = int(len(nvme_list) / len(threads)) + remainder = len(nvme_list) % len(threads) + iterator = iter(nvme_list) + result = [] + for i in range(len(threads)): + result.append([]) + for j in range(nvme_per_split): + result[i].append(next(iterator)) + if remainder: + result[i].append(next(iterator)) + remainder -= 1 + for i, r in enumerate(result): + header = "[filename%s]" % i + disks = "\n".join(["filename=%s" % x for x in r]) + job_section_qd = round((io_depth * len(r)) / num_jobs) + if job_section_qd == 0: + job_section_qd = 1 + iodepth = "iodepth=%s" % job_section_qd + filename_section = "\n".join([filename_section, header, disks, iodepth]) + + return filename_section + + +class SPDKInitiator(Initiator): + def __init__(self, name, username, password, mode, nic_ips, ip, transport="rdma", + num_cores=1, cpus_allowed=None, cpus_allowed_policy="shared", + cpu_frequency=None, fio_bin="/usr/src/fio/fio", **kwargs): + super(SPDKInitiator, self).__init__(name, username, password, mode, nic_ips, ip, transport, + cpus_allowed=cpus_allowed, cpus_allowed_policy=cpus_allowed_policy, + cpu_frequency=cpu_frequency, fio_bin=fio_bin) + + self.num_cores = num_cores + + def install_spdk(self, local_spdk_zip): + self.put_file(local_spdk_zip, "/tmp/spdk_drop.zip") + self.log_print("Copied sources zip from target") + self.remote_call("unzip -qo /tmp/spdk_drop.zip -d %s" % self.spdk_dir) + + self.log_print("Sources unpacked") + self.log_print("Using fio binary %s" % self.fio_bin) + self.remote_call("cd %s; git submodule update --init; make clean; ./configure --with-rdma --with-fio=%s;" + "make -j$(($(nproc)*2))" % (self.spdk_dir, os.path.dirname(self.fio_bin))) + + self.log_print("SPDK built") + self.remote_call("sudo %s/scripts/setup.sh" % self.spdk_dir) + + def gen_spdk_bdev_conf(self, remote_subsystem_list): + header = "[Nvme]" + row_template = """ TransportId "trtype:{transport} adrfam:IPv4 traddr:{ip} trsvcid:{svc} subnqn:{nqn}" Nvme{i}""" + + bdev_rows = [row_template.format(transport=self.transport, + svc=x[0], + nqn=x[1], + ip=x[2], + i=i) for i, x in enumerate(remote_subsystem_list)] + bdev_rows = "\n".join(bdev_rows) + bdev_section = "\n".join([header, bdev_rows]) + return bdev_section + + def gen_fio_filename_conf(self, subsystems, threads, io_depth, num_jobs=1): + filename_section = "" + if len(threads) >= len(subsystems): + threads = range(0, len(subsystems)) + filenames = ["Nvme%sn1" % x for x in range(0, len(subsystems))] + nvme_per_split = int(len(subsystems) / len(threads)) + remainder = len(subsystems) % len(threads) + iterator = iter(filenames) + result = [] + for i in range(len(threads)): + result.append([]) + for j in range(nvme_per_split): + result[i].append(next(iterator)) + if remainder: + result[i].append(next(iterator)) + remainder -= 1 + for i, r in enumerate(result): + header = "[filename%s]" % i + disks = "\n".join(["filename=%s" % x for x in r]) + job_section_qd = round((io_depth * len(r)) / num_jobs) + if job_section_qd == 0: + job_section_qd = 1 + iodepth = "iodepth=%s" % job_section_qd + filename_section = "\n".join([filename_section, header, disks, iodepth]) + + return filename_section + + +if __name__ == "__main__": + spdk_zip_path = "/tmp/spdk.zip" + target_results_dir = "/tmp/results" + + if (len(sys.argv) > 1): + config_file_path = sys.argv[1] + else: + script_full_dir = os.path.dirname(os.path.realpath(__file__)) + config_file_path = os.path.join(script_full_dir, "config.json") + + print("Using config file: %s" % config_file_path) + with open(config_file_path, "r") as config: + data = json.load(config) + + initiators = [] + fio_cases = [] + + for k, v in data.items(): + if "target" in k: + if data[k]["mode"] == "spdk": + target_obj = SPDKTarget(name=k, **data["general"], **v) + elif data[k]["mode"] == "kernel": + target_obj = KernelTarget(name=k, **data["general"], **v) + elif "initiator" in k: + if data[k]["mode"] == "spdk": + init_obj = SPDKInitiator(name=k, **data["general"], **v) + elif data[k]["mode"] == "kernel": + init_obj = KernelInitiator(name=k, **data["general"], **v) + initiators.append(init_obj) + elif "fio" in k: + fio_workloads = itertools.product(data[k]["bs"], + data[k]["qd"], + data[k]["rw"]) + + fio_run_time = data[k]["run_time"] + fio_ramp_time = data[k]["ramp_time"] + fio_rw_mix_read = data[k]["rwmixread"] + fio_run_num = data[k]["run_num"] if "run_num" in data[k].keys() else None + fio_num_jobs = data[k]["num_jobs"] if "num_jobs" in data[k].keys() else None + else: + continue + + # Copy and install SPDK on remote initiators + if "skip_spdk_install" not in data["general"]: + target_obj.zip_spdk_sources(target_obj.spdk_dir, spdk_zip_path) + threads = [] + for i in initiators: + if i.mode == "spdk": + t = threading.Thread(target=i.install_spdk, args=(spdk_zip_path,)) + threads.append(t) + t.start() + for t in threads: + t.join() + + target_obj.tgt_start() + + # Poor mans threading + # Run FIO tests + for block_size, io_depth, rw in fio_workloads: + threads = [] + configs = [] + for i in initiators: + if i.mode == "kernel": + i.kernel_init_connect(i.nic_ips, target_obj.subsys_no) + + cfg = i.gen_fio_config(rw, fio_rw_mix_read, block_size, io_depth, target_obj.subsys_no, + fio_num_jobs, fio_ramp_time, fio_run_time) + configs.append(cfg) + + for i, cfg in zip(initiators, configs): + t = threading.Thread(target=i.run_fio, args=(cfg, fio_run_num)) + threads.append(t) + if target_obj.enable_sar: + sar_file_name = "_".join([str(block_size), str(rw), str(io_depth), "sar"]) + sar_file_name = ".".join([sar_file_name, "txt"]) + t = threading.Thread(target=target_obj.measure_sar, args=(target_results_dir, sar_file_name)) + threads.append(t) + + if target_obj.enable_pcm: + pcm_file_name = "_".join(["pcm_cpu", str(block_size), str(rw), str(io_depth)]) + pcm_file_name = ".".join([pcm_file_name, "csv"]) + t = threading.Thread(target=target_obj.measure_pcm, args=(target_results_dir, pcm_file_name,)) + threads.append(t) + + if target_obj.enable_pcm_memory: + pcm_file_name = "_".join(["pcm_memory", str(block_size), str(rw), str(io_depth)]) + pcm_file_name = ".".join([pcm_file_name, "csv"]) + t = threading.Thread(target=target_obj.measure_pcm_memory, args=(target_results_dir, pcm_file_name,)) + threads.append(t) + + if target_obj.enable_bandwidth: + bandwidth_file_name = "_".join(["bandwidth", str(block_size), str(rw), str(io_depth)]) + bandwidth_file_name = ".".join([bandwidth_file_name, "csv"]) + t = threading.Thread(target=target_obj.measure_bandwidth, args=(target_results_dir, bandwidth_file_name,)) + threads.append(t) + + for t in threads: + t.start() + for t in threads: + t.join() + + for i in initiators: + if i.mode == "kernel": + i.kernel_init_disconnect(i.nic_ips, target_obj.subsys_no) + i.copy_result_files(target_results_dir) + + target_obj.parse_results(target_results_dir) diff --git a/src/spdk/scripts/perf/vhost/fio_test.conf b/src/spdk/scripts/perf/vhost/fio_test.conf new file mode 100644 index 000000000..c480f1966 --- /dev/null +++ b/src/spdk/scripts/perf/vhost/fio_test.conf @@ -0,0 +1,20 @@ +[global] +ioengine=libaio +thread=1 +group_reporting=1 +direct=1 +verify=0 +norandommap=1 + +[perf_test] +stonewall +description="Run NVMe driver performance test for a given workload" +bs={blksize} +rw={rw} +rwmixread={rwmixread} +iodepth={iodepth} +time_based=1 +ramp_time={ramptime} +runtime={runtime} +numjobs={numjobs} +filename= diff --git a/src/spdk/scripts/perf/vhost/run_vhost_test.py b/src/spdk/scripts/perf/vhost/run_vhost_test.py new file mode 100644 index 000000000..e6d86161f --- /dev/null +++ b/src/spdk/scripts/perf/vhost/run_vhost_test.py @@ -0,0 +1,219 @@ +import os +import sys +import argparse +import multiprocessing +import subprocess +from subprocess import check_call, call, check_output, Popen, PIPE + + +def range_incl(a, b): + return list(range(a, b + 1)) + + +def list_spdk_used_cpus(cpus): + cpu_list = [] + for chunk in cpus.split(","): + if "-" in chunk: + _ = chunk.split("-") + _ = list(map(int, _)) + cpu_list.extend(list(range_incl(*_))) + else: + cpu_list.append(int(chunk)) + return cpu_list + + +def gen_cpu_mask_config(output_dir, spdk_cpu_list, vm_count, vm_cpu_num): + spdk = gen_spdk_cpu_mask_config(spdk_cpu_list) + qemu = gen_qemu_cpu_mask_config(spdk_cpu_list, vm_count, vm_cpu_num) + file_path = os.path.join(output_dir, "mask_config") + with open(file_path, "w") as fh: + fh.write("".join([spdk, qemu])) + return file_path + + +def gen_spdk_cpu_mask_config(spdk_cpu_list): + cpus = "vhost_0_reactor_mask=[%s]" % (spdk_cpu_list) + + # Go through assigned CPUs and use the lowest CPU index as + # default primary core + cpu_indexes = list_spdk_used_cpus(spdk_cpu_list) + cpu_indexes.sort() + print(cpu_indexes) + + pr_core = "vhost_0_master_core=%s" % (cpu_indexes[0]) + return "\n".join([cpus, pr_core, "\n"]) + + +def get_host_cpus(): + cpu_num = multiprocessing.cpu_count() + cpu_list = list(range(0, cpu_num)) + output = check_output("lscpu | grep 'per core'", shell=True) + + # Assuming 2-socket server + if "2" in str(output): + ht_enabled = True + cpu_chunk = int(cpu_num/4) + numa0_cpus = cpu_list[0:cpu_chunk] + numa0_cpus.extend(cpu_list[2*cpu_chunk:3*cpu_chunk]) + numa1_cpus = cpu_list[cpu_chunk:2*cpu_chunk] + numa1_cpus.extend(cpu_list[3*cpu_chunk:4*cpu_chunk]) + else: + ht_enabled = False + cpu_chunk = int(cpu_num/2) + numa0_cpus = cpu_list[:cpu_chunk] + numa1_cpus = cpu_list[cpu_chunk:] + return [numa0_cpus, numa1_cpus] + + +def gen_qemu_cpu_mask_config(spdk_cpu_list, vm_count, vm_cpu_num): + print("Creating masks for QEMU") + ret = "" + + # Exclude SPDK cores from available CPU list + numa0_cpus, numa1_cpus = get_host_cpus() + spdk_cpus = list_spdk_used_cpus(spdk_cpu_list) + spdk_cpus.sort() + + numa0_cpus = sorted(list(set(numa0_cpus) - set(spdk_cpus))) + numa1_cpus = sorted(list(set(numa1_cpus) - set(spdk_cpus))) + + # Generate qemu cpu mask and numa param for VMs out of + # remaining free CPU cores. + # All CPUs assigned to a VM will come from the same NUMA node. + # Assuming 2 socket server. + used_numa = 0 + available = numa0_cpus + for i in range(0, vm_count): + cpus = [str(x) for x in available[0:vm_cpu_num]] + + # If there is not enough cores on first numa node for a VM + # then switch to next numa node + if len(cpus) < vm_cpu_num and used_numa == 0: + available = numa1_cpus + used_numa = 1 + cpus = [str(x) for x in available[0:vm_cpu_num]] + + # If not enough cores on second numa node - break and exit + if len(cpus) < vm_cpu_num and used_numa == 1: + print("There is not enough CPU Cores available on \ + Numa node1 to create VM %s" % i) + break + + cpus = ",".join(cpus) + cpus = "VM_%s_qemu_mask=%s" % (i, cpus) + numa = "VM_%s_qemu_numa_node=%s\n" % (i, used_numa) + + # Remove used CPU cores from available list + available = available[vm_cpu_num:] + ret = "\n".join([ret, cpus, numa]) + + return ret + + +def create_fio_cfg(template_dir, output_dir, **kwargs): + fio_tempalte = os.path.join(template_dir, "fio_test.conf") + with open("scripts/perf/vhost/fio_test.conf", "r") as fh: + cfg = fh.read() + cfg = cfg.format(**kwargs) + + file_path = os.path.join(output_dir, "fio_job.cfg") + with open(file_path, "w") as fh: + fh.write(cfg) + return file_path + + +script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) +parser = argparse.ArgumentParser() + +parser.add_argument('blksize', default="4k", type=str, + help="Block size param for FIO. Default: 4k") +parser.add_argument('iodepth', default="128", type=str, + help="Iodepth param for FIO. Default: 128") +parser.add_argument('rw', default="randread", type=str, + help="RW param for FIO. Default: randread") +parser.add_argument('-m', '--rwmixread', default="70", type=str, + help="Percentage of reads in read-write mode. Default: 70") +parser.add_argument('-n', '--numjobs', default="1", type=str, + help="Numjobs value for FIO job. Default: 1") +parser.add_argument('-r', '--runtime', default="10", type=str, + help="Run time param for FIO (in seconds). Default: 10") +parser.add_argument('-R', '--ramptime', default="10", type=str, + help="Ramp time param for FIO (in seconds). Default: 10") +parser.add_argument('-c', '--ctrl-type', default="spdk_vhost_scsi", type=str, + help="Type of vhost controller to use in test.\ + Possible options: spdk_vhost_scsi, spdk_vhost_blk\ + Default: spdk_vhost_scsi") +parser.add_argument('-s', '--split', default=False, type=bool, + help="Use split vbdevs instead of logical volumes. Default: false") +parser.add_argument('-d', '--max-disks', default=0, type=int, + help="How many physical disks to use in test. Default: all disks.\ + Depending on the number of --vm-count disks may be split into\ + smaller logical bdevs (splits or logical volumes) so that\ + each virtual machine gets it's own bdev to work on.") +parser.add_argument('-v', '--vm-count', default=1, type=int, + help="How many VMs to run in test. Default: 1") +parser.add_argument('-i', '--vm-image', default="$HOME/vhost_vm_image.qcow2", + type=str, help="VM image to use for running VMs.") + +subparsers = parser.add_subparsers() +cpu_cfg_create = subparsers.add_parser('create_cpu_cfg', + help="Generate a CPU config file for test.\ + This option will attempt to automatically\ + generate config file with SPDK/QEMU cpu lists.\ + CPU cores on NUMA Node 0 will be used first\ + (including logical cores when HT is enabled)\ + and NUMA Node 1 will be used last.") +cpu_cfg_create.add_argument('spdk_cpu_list', default=None, + help="List of CPU cores to be used by SPDK vhost app.\ + Accepted format examples:\ + single cpus: 0,2,4\ + ranges (inclusive!): 0-2\ + mixed: 0,2-5,9") +cpu_cfg_create.add_argument('vm_cpu_num', default=None, type=int) + +cpu_cfg_load = subparsers.add_parser('load_cpu_cfg', + help="Load and use a CPU config file for test\ + Example configuration files can be found in:\ + test/vhost/common/autotest.config") +cpu_cfg_load.add_argument('custom_mask_file', default=None, + help="Path to file with custom values for vhost's\ + reactor mask and master core, and each VM's qemu mask\ + and qemu numa node") + +args = parser.parse_args() +fio_cfg_path = create_fio_cfg(script_dir, script_dir, **vars(args)) + +cpu_cfg_arg = "" +disk_arg = "" +split_arg = "" +if "spdk_cpu_list" in args: + cfg_path = gen_cpu_mask_config(script_dir, args.spdk_cpu_list, args.vm_count, args.vm_cpu_num) + cpu_cfg_arg = "--custom-cpu-cfg=%s" % cfg_path +if "custom_mask_file" in args: + cpu_cfg_arg = "--custom-cpu-cfg=%s" % args.custom_mask_file +if args.split is True: + split_arg = "--use-split" +if args.max_disks > 0: + disk_arg = "--max-disks=%s" % args.max_disks + + +command = " ".join(["test/vhost/perf_bench/vhost_perf.sh", + "--vm-image=%s" % args.vm_image, + "--vm-count=%s" % args.vm_count, + "--ctrl-type=%s" % args.ctrl_type, + "%s" % split_arg, + "%s" % disk_arg, + "--fio-job=%s" % fio_cfg_path, + "%s" % cpu_cfg_arg]) +# TODO: Disabled for now. +# Reason: initially this script was supposed to be a wrapper for .sh script and would +# - generate FIO config +# - generate SPDK/QEMU CPU mask configuration file +# - run test script +# Auto-generating CPU masks configuration needs some more work to be done +# and increasing number of params makes .py script hard to use. +# Will cleanup here soon. + +# print("INFO: Running perf test with command:") +# print(command) +# pr = check_output(command, shell=True) |