Adding upstream version 14.2.21.upstream/14.2.21 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
commit: 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree: e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/scripts/perf
parent: Initial commit. (diff)
download: ceph-upstream.tar.xz
ceph-upstream.zip
6 files changed, 446 insertions, 0 deletions
diff --git a/src/spdk/scripts/perf/nvme/README b/src/spdk/scripts/perf/nvme/README
new file mode 100644
index 00000000..6468399a
--- /dev/null
+++ b/src/spdk/scripts/perf/nvme/README
@@ -0,0 +1,12 @@
+These scripts are used to perform benchmark testing with fio.
+The run_fio_test.py is the main script that runs the performance test and parses the test results.
+Users can populate test parameters for different fio workloads in the lists (q_depth, io_size, workload_type, mix, core_mask and run_time) at the top of the run_fio_test.py script.
+The run_fio_test.py puts the test results in a csv file named <hostname>_<num ssds>_perf_output.csv.
+The run_fio_test.sh script demonstrates how to invoke the run_fio_test.py script with the
+input parameters: path_to_fio_conf, path_to_ioengine and num_ssds. The run_fio_test.sh script will
+call the SPDK setup.sh script to unbind NVMe devices from the kernel driver and bind them to the uio driver.
+We include a sample fio configuration file that includes the parameters we use in our test environment.
+The run_fio_test.py will append the NVMe devices to the end of the configuration file. The number of
+NVMe devices used is specified using the num_ssds parameter.
+
+Usage: ./run_fio_test.sh
diff --git a/src/spdk/scripts/perf/nvme/fio_test.conf b/src/spdk/scripts/perf/nvme/fio_test.conf
new file mode 100644
index 00000000..a03c6a1e
--- /dev/null
+++ b/src/spdk/scripts/perf/nvme/fio_test.conf
@@ -0,0 +1,20 @@
+[global]
+ioengine=${IOENGINE}
+thread=1
+group_reporting=1
+direct=1
+verify=0
+norandommap=1
+cpumask=1
+percentile_list=50:90:99:99.5:99.9:99.99:99.999
+
+[perf_test]
+stonewall
+description="Run NVMe driver performance test for a given workload"
+bs=${BLK_SIZE}
+rw=${RW}
+rwmixread=${MIX}
+iodepth=${IODEPTH}
+time_based=1
+ramp_time=10s
+runtime=${RUNTIME}
diff --git a/src/spdk/scripts/perf/nvme/run_fio_test.py b/src/spdk/scripts/perf/nvme/run_fio_test.py
new file mode 100755
index 00000000..79d9e566
--- /dev/null
+++ b/src/spdk/scripts/perf/nvme/run_fio_test.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+
+# This script runs fio benchmark test on the local nvme device using the SPDK NVMe driver.
+# Prework: Run script/setup.sh to bind SSDs to SPDK driver.
+# Prework: Change any fio configurations in the template fio config file fio_test.conf
+# Output: A csv file <hostname>_<num ssds>_perf_output.csv
+
+import subprocess
+from subprocess import check_call, call, check_output, Popen, PIPE
+import random
+import os
+import sys
+import re
+import signal
+import getopt
+from datetime import datetime
+from itertools import *
+import csv
+import itertools
+from shutil import copyfile
+import json
+
+# Populate test parameters into these lists to run different workloads
+# The configuration below runs QD 1 & 128. To add QD 32 set q_depth=['1', '32', '128']
+q_depth = ['1', '128']
+# io_size specifies the size in bytes of the IO workload.
+# To add 64K IOs set io_size = ['4096', '65536']
+io_size = ['4096']
+workload_type = ['randrw']
+mix = ['100']
+core_mask = ['0x1']
+# run_time parameter specifies how long to run each test.
+# Set run_time = ['600'] to run the test for 10 minutes
+run_time = ['60']
+# iter_num parameter is used to run the test multiple times.
+# set iter_num = ['1', '2', '3'] to repeat each test 3 times
+iter_num = ['1']
+
+
+def run_fio(io_size_bytes, qd, rw_mix, cpu_mask, run_num, workload, run_time_sec):
+    print("Running Test: IO Size={} QD={} Mix={} CPU Mask={}".format(io_size_bytes, qd, rw_mix, cpu_mask))
+    string = "s_" + str(io_size_bytes) + "_q_" + str(qd) + "_m_" + str(rw_mix) + "_c_" + str(cpu_mask) + "_run_" + str(run_num)
+
+    # Call fio
+    path_to_fio_conf = config_file_for_test
+    path_to_ioengine = sys.argv[2]
+    command = "BLK_SIZE=" + str(io_size_bytes) + " RW=" + str(workload) + " MIX=" + str(rw_mix) \
+        + " IODEPTH=" + str(qd) + " RUNTIME=" + str(run_time_sec) + " IOENGINE=" + path_to_ioengine \
+        + " fio " + str(path_to_fio_conf) + " -output=" + string + " -output-format=json"
+    output = subprocess.check_output(command, shell=True)
+
+    print("Finished Test: IO Size={} QD={} Mix={} CPU Mask={}".format(io_size_bytes, qd, rw_mix, cpu_mask))
+    return
+
+
+def parse_results(io_size_bytes, qd, rw_mix, cpu_mask, run_num, workload, run_time_sec):
+    results_array = []
+
+    # If json file has results for multiple fio jobs pick the results from the right job
+    job_pos = 0
+
+    # generate the next result line that will be added to the output csv file
+    results = str(io_size_bytes) + "," + str(qd) + "," + str(rw_mix) + "," \
+        + str(workload) + "," + str(cpu_mask) + "," + str(run_time_sec) + "," + str(run_num)
+
+    # Read the results of this run from the test result file
+    string = "s_" + str(io_size_bytes) + "_q_" + str(qd) + "_m_" + str(rw_mix) + "_c_" + str(cpu_mask) + "_run_" + str(run_num)
+    with open(string) as json_file:
+        data = json.load(json_file)
+        job_name = data['jobs'][job_pos]['jobname']
+        # print "FIO job name: ", job_name
+        if 'lat_ns' in data['jobs'][job_pos]['read']:
+            lat = 'lat_ns'
+            lat_units = 'ns'
+        else:
+            lat = 'lat'
+            lat_units = 'us'
+        read_iops = float(data['jobs'][job_pos]['read']['iops'])
+        read_bw = float(data['jobs'][job_pos]['read']['bw'])
+        read_avg_lat = float(data['jobs'][job_pos]['read'][lat]['mean'])
+        read_min_lat = float(data['jobs'][job_pos]['read'][lat]['min'])
+        read_max_lat = float(data['jobs'][job_pos]['read'][lat]['max'])
+        write_iops = float(data['jobs'][job_pos]['write']['iops'])
+        write_bw = float(data['jobs'][job_pos]['write']['bw'])
+        write_avg_lat = float(data['jobs'][job_pos]['write'][lat]['mean'])
+        write_min_lat = float(data['jobs'][job_pos]['write'][lat]['min'])
+        write_max_lat = float(data['jobs'][job_pos]['write'][lat]['max'])
+        print("%-10s" % "IO Size", "%-10s" % "QD", "%-10s" % "Mix",
+              "%-10s" % "Workload Type", "%-10s" % "CPU Mask",
+              "%-10s" % "Run Time", "%-10s" % "Run Num",
+              "%-15s" % "Read IOps",
+              "%-10s" % "Read MBps", "%-15s" % "Read Avg. Lat(" + lat_units + ")",
+              "%-15s" % "Read Min. Lat(" + lat_units + ")", "%-15s" % "Read Max. Lat(" + lat_units + ")",
+              "%-15s" % "Write IOps",
+              "%-10s" % "Write MBps", "%-15s" % "Write Avg. Lat(" + lat_units + ")",
+              "%-15s" % "Write Min. Lat(" + lat_units + ")", "%-15s" % "Write Max. Lat(" + lat_units + ")")
+        print("%-10s" % io_size_bytes, "%-10s" % qd, "%-10s" % rw_mix,
+              "%-10s" % workload, "%-10s" % cpu_mask, "%-10s" % run_time_sec,
+              "%-10s" % run_num, "%-15s" % read_iops, "%-10s" % read_bw,
+              "%-15s" % read_avg_lat, "%-15s" % read_min_lat, "%-15s" % read_max_lat,
+              "%-15s" % write_iops, "%-10s" % write_bw, "%-15s" % write_avg_lat,
+              "%-15s" % write_min_lat, "%-15s" % write_max_lat)
+        results = results + "," + str(read_iops) + "," + str(read_bw) + "," \
+            + str(read_avg_lat) + "," + str(read_min_lat) + "," + str(read_max_lat) \
+            + "," + str(write_iops) + "," + str(write_bw) + "," + str(write_avg_lat) \
+            + "," + str(write_min_lat) + "," + str(write_max_lat)
+        with open(result_file_name, "a") as result_file:
+            result_file.write(results + "\n")
+        results_array = []
+    return
+
+
+def get_nvme_devices_count():
+    output = check_output('lspci | grep -i Non | wc -l', shell=True)
+    return int(output)
+
+
+def get_nvme_devices_bdf():
+    output = check_output('lspci | grep -i Non | awk \'{print $1}\'', shell=True).decode("utf-8")
+    output = output.split()
+    return output
+
+
+def add_filename_to_conf(conf_file_name, bdf):
+    filestring = "filename=trtype=PCIe traddr=0000." + bdf.replace(":", ".") + " ns=1"
+    with open(conf_file_name, "a") as conf_file:
+        conf_file.write(filestring + "\n")
+
+
+if len(sys.argv) != 4:
+    print("usage: " % sys.argv[0] % " path_to_fio_conf path_to_ioengine num_ssds")
+    sys.exit()
+
+num_ssds = int(sys.argv[3])
+if num_ssds > get_nvme_devices_count():
+    print("System does not have {} NVMe SSDs.".format(num_ssds))
+    sys.exit()
+
+host_name = os.uname()[1]
+result_file_name = host_name + "_" + sys.argv[3] + "ssds_perf_output.csv"
+
+bdf = get_nvme_devices_bdf()
+config_file_for_test = sys.argv[1] + "_" + sys.argv[3] + "ssds"
+copyfile(sys.argv[1], config_file_for_test)
+
+# Add the number of threads to the fio config file
+with open(config_file_for_test, "a") as conf_file:
+    conf_file.write("numjobs=" + str(1) + "\n")
+
+# Add the NVMe bdf to the fio config file
+for i in range(0, num_ssds):
+    add_filename_to_conf(config_file_for_test, bdf[i])
+
+# Set up for output
+columns = "IO_Size,Q_Depth,Workload_Mix,Workload_Type,Core_Mask,Run_Time,Run,Read_IOPS,Read_bw(KiB/s), \
+    Read_Avg_lat(us),Read_Min_Lat(us),Read_Max_Lat(us),Write_IOPS,Write_bw(KiB/s),Write_Avg_lat(us), \
+    Write_Min_Lat(us),Write_Max_Lat(us)"
+
+with open(result_file_name, "w+") as result_file:
+    result_file.write(columns + "\n")
+
+for i, (s, q, m, w, c, t) in enumerate(itertools.product(io_size, q_depth, mix, workload_type, core_mask, run_time)):
+    run_fio(s, q, m, c, i, w, t)
+    parse_results(s, q, m, c, i, w, t)
+
+result_file.close()
diff --git a/src/spdk/scripts/perf/nvme/run_fio_test.sh b/src/spdk/scripts/perf/nvme/run_fio_test.sh
new file mode 100755
index 00000000..454ea555
--- /dev/null
+++ b/src/spdk/scripts/perf/nvme/run_fio_test.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+testdir=$(readlink -f $(dirname $0))
+rootdir=$(readlink -f $testdir/../../..)
+
+# Bind devices to NVMe driver
+$rootdir/scripts/setup.sh
+
+# Run Performance Test with 1 SSD
+$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/examples/nvme/fio_plugin/fio_plugin 1
+
+# 2 SSDs test run
+$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/examples/nvme/fio_plugin/fio_plugin 2
+
+# 4 SSDs test run
+$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/examples/nvme/fio_plugin/fio_plugin 4
+
+# 8 SSDs test run
+$testdir/run_fio_test.py $testdir/fio_test.conf $rootdir/examples/nvme/fio_plugin/fio_plugin 8
diff --git a/src/spdk/scripts/perf/vhost/fio_test.conf b/src/spdk/scripts/perf/vhost/fio_test.conf
new file mode 100644
index 00000000..e1054e07
--- /dev/null
+++ b/src/spdk/scripts/perf/vhost/fio_test.conf
@@ -0,0 +1,21 @@
+[global]
+ioengine=libaio
+thread=1
+group_reporting=1
+direct=1
+verify=0
+norandommap=1
+cpumask=1
+percentile_list=50:90:99:99.5:99.9:99.99:99.999
+
+[perf_test]
+stonewall
+description="Run NVMe driver performance test for a given workload"
+bs={blksize}
+rw={rw}
+rwmixread={rwmixread}
+iodepth={iodepth}
+time_based=1
+ramp_time={ramptime}
+runtime={runtime}
+filename=
diff --git a/src/spdk/scripts/perf/vhost/run_vhost_test.py b/src/spdk/scripts/perf/vhost/run_vhost_test.py
new file mode 100644
index 00000000..bb1f9985
--- /dev/null
+++ b/src/spdk/scripts/perf/vhost/run_vhost_test.py
@@ -0,0 +1,208 @@
+import os
+import sys
+import argparse
+import multiprocessing
+import subprocess
+from subprocess import check_call, call, check_output, Popen, PIPE
+
+
+def range_incl(a, b):
+    return list(range(a, b + 1))
+
+
+def list_spdk_used_cpus(cpus):
+    cpu_list = []
+    for chunk in cpus.split(","):
+        if "-" in chunk:
+            _ = chunk.split("-")
+            _ = list(map(int, _))
+            cpu_list.extend(list(range_incl(*_)))
+        else:
+            cpu_list.append(int(chunk))
+    return cpu_list
+
+
+def gen_cpu_mask_config(output_dir, spdk_cpu_list, vm_count, vm_cpu_num):
+    spdk = gen_spdk_cpu_mask_config(spdk_cpu_list)
+    qemu = gen_qemu_cpu_mask_config(spdk_cpu_list, vm_count, vm_cpu_num)
+    file_path = os.path.join(output_dir, "mask_config")
+    with open(file_path, "w") as fh:
+        fh.write("".join([spdk, qemu]))
+    return file_path
+
+
+def gen_spdk_cpu_mask_config(spdk_cpu_list):
+    cpus = "vhost_0_reactor_mask=[%s]" % (spdk_cpu_list)
+
+    # Go through assigned CPUs and use the lowest CPU index as
+    # default primary core
+    cpu_indexes = list_spdk_used_cpus(spdk_cpu_list)
+    cpu_indexes.sort()
+    print(cpu_indexes)
+
+    pr_core = "vhost_0_master_core=%s" % (cpu_indexes[0])
+    return "\n".join([cpus, pr_core, "\n"])
+
+
+def get_host_cpus():
+    cpu_num = multiprocessing.cpu_count()
+    cpu_list = list(range(0, cpu_num))
+    output = check_output("lscpu | grep 'per core'", shell=True)
+
+    # Assuming 2-socket server
+    if "2" in str(output):
+        ht_enabled = True
+        cpu_chunk = int(cpu_num/4)
+        numa0_cpus = cpu_list[0:cpu_chunk]
+        numa0_cpus.extend(cpu_list[2*cpu_chunk:3*cpu_chunk])
+        numa1_cpus = cpu_list[cpu_chunk:2*cpu_chunk]
+        numa1_cpus.extend(cpu_list[3*cpu_chunk:4*cpu_chunk])
+    else:
+        ht_enabled = False
+        cpu_chunk = int(cpu_num/2)
+        numa0_cpus = cpu_list[:cpu_chunk]
+        numa1_cpus = cpu_list[cpu_chunk:]
+    return [numa0_cpus, numa1_cpus]
+
+
+def gen_qemu_cpu_mask_config(spdk_cpu_list, vm_count, vm_cpu_num):
+    print("Creating masks for QEMU")
+    ret = ""
+
+    # Exclude SPDK cores from available CPU list
+    numa0_cpus, numa1_cpus = get_host_cpus()
+    spdk_cpus = list_spdk_used_cpus(spdk_cpu_list)
+    spdk_cpus.sort()
+
+    numa0_cpus = sorted(list(set(numa0_cpus) - set(spdk_cpus)))
+    numa1_cpus = sorted(list(set(numa1_cpus) - set(spdk_cpus)))
+
+    # Generate qemu cpu mask and numa param for VMs out of
+    # remaining free CPU cores.
+    # All CPUs assigned to a VM will come from the same NUMA node.
+    # Assuming 2 socket server.
+    used_numa = 0
+    available = numa0_cpus
+    for i in range(0, vm_count):
+        cpus = [str(x) for x in available[0:vm_cpu_num]]
+
+        # If there is not enough cores on first numa node for a VM
+        # then switch to next numa node
+        if len(cpus) < vm_cpu_num and used_numa == 0:
+            available = numa1_cpus
+            used_numa = 1
+            cpus = [str(x) for x in available[0:vm_cpu_num]]
+
+        # If not enough cores on second numa node - break and exit
+        if len(cpus) < vm_cpu_num and used_numa == 1:
+            print("There is not enough CPU Cores available on \
+            Numa node1 to create VM %s" % i)
+            break
+
+        cpus = ",".join(cpus)
+        cpus = "VM_%s_qemu_mask=%s" % (i, cpus)
+        numa = "VM_%s_qemu_numa_node=%s\n" % (i, used_numa)
+
+        # Remove used CPU cores from available list
+        available = available[vm_cpu_num:]
+        ret = "\n".join([ret, cpus, numa])
+
+    return ret
+
+
+def create_fio_cfg(template_dir, output_dir, **kwargs):
+    fio_tempalte = os.path.join(template_dir, "fio_test.conf")
+    with open("scripts/perf/vhost/fio_test.conf", "r") as fh:
+        cfg = fh.read()
+    cfg = cfg.format(**kwargs)
+
+    file_path = os.path.join(output_dir, "fio_job.cfg")
+    with open(file_path, "w") as fh:
+        fh.write(cfg)
+    return file_path
+
+
+script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
+parser = argparse.ArgumentParser()
+
+parser.add_argument('blksize', default="4k", type=str,
+                    help="Block size param for FIO. Default: 4k")
+parser.add_argument('iodepth', default="128", type=str,
+                    help="Iodepth param for FIO. Default: 128")
+parser.add_argument('rw', default="randread", type=str,
+                    help="RW param for FIO. Default: randread")
+parser.add_argument('-m', '--rwmixread', default="70", type=str,
+                    help="Percentage of reads in read-write mode. Default: 70")
+parser.add_argument('-r', '--runtime', default="10", type=str,
+                    help="Run time param for FIO (in seconds). Default: 10")
+parser.add_argument('-R', '--ramptime', default="10", type=str,
+                    help="Ramp time param for FIO (in seconds). Default: 10")
+parser.add_argument('-c', '--ctrl-type', default="spdk_vhost_scsi", type=str,
+                    help="Type of vhost controller to use in test.\
+                    Possible options: spdk_vhost_scsi, spdk_vhost_blk.\
+                    Default: spdk_vhost_scsi")
+parser.add_argument('-s', '--split', default=False, type=bool,
+                    help="Use split vbdevs instead of logical volumes. Default: false")
+parser.add_argument('-d', '--max-disks', default=0, type=int,
+                    help="How many physical disks to use in test. Default: all disks.\
+                    Depending on the number of --vm-count disks may be split into\
+                    smaller logical bdevs (splits or logical volumes) so that\
+                    each virtual machine gets it's own bdev to work on.")
+parser.add_argument('-v', '--vm-count', default=1, type=int,
+                    help="How many VMs to run in test. Default: 1")
+parser.add_argument('-i', '--vm-image', default="/home/sys_sgsw/vhost_vm_image.qcow2",
+                    type=str, help="VM image to use for running VMs.")
+
+subparsers = parser.add_subparsers()
+cpu_cfg_create = subparsers.add_parser('create_cpu_cfg',
+                                       help="Generate a CPU config file for test.\
+                                       This option will attempt to automatically\
+                                       generate config file with SPDK/QEMU cpu lists.\
+                                       CPU cores on NUMA Node 0 will be used first\
+                                       (including logical cores when HT is enabled)\
+                                       and NUMA Node 1 will be used last.")
+cpu_cfg_create.add_argument('spdk_cpu_list', default=None,
+                            help="List of CPU cores to be used by SPDK vhost app.\
+                            Accepted format examples:\
+                            single cpus: 0,2,4\
+                            ranges (inclusive!): 0-2\
+                            mixed: 0,2-5,9")
+cpu_cfg_create.add_argument('vm_cpu_num', default=None, type=int)
+
+cpu_cfg_load = subparsers.add_parser('load_cpu_cfg',
+                                     help="Load and use a CPU config file for test\
+                                     Example configuration files can be found in:\
+                                     test/vhost/common/autotest.config")
+cpu_cfg_load.add_argument('custom_mask_file', default=None,
+                          help="Path to file with custom values for vhost's\
+                          reactor mask and master core, and each VM's qemu mask\
+                          and qemu numa node")
+
+args = parser.parse_args()
+fio_cfg_path = create_fio_cfg(script_dir, script_dir, **vars(args))
+
+cpu_cfg_arg = ""
+disk_arg = ""
+split_arg = ""
+if "spdk_cpu_list" in args:
+    cfg_path = gen_cpu_mask_config(script_dir, args.spdk_cpu_list, args.vm_count, args.vm_cpu_num)
+    cpu_cfg_arg = "--custom-cpu-cfg=%s" % cfg_path
+if "custom_mask_file" in args:
+    cpu_cfg_arg = "--custom-cpu-cfg=%s" % args.custom_mask_file
+if args.split is True:
+    split_arg = "--use-split"
+if args.max_disks > 0:
+    disk_arg = "--max-disks=%s" % args.max_disks
+
+
+command = " ".join(["test/vhost/perf_bench/vhost_perf.sh",
+                    "--vm-image=%s" % args.vm_image,
+                    "--vm-count=%s" % args.vm_count,
+                    "--ctrl-type=%s" % args.ctrl_type,
+                    "%s" % split_arg,
+                    "%s" % disk_arg,
+                    "--fio-job=%s" % fio_cfg_path,
+                    "%s" % cpu_cfg_arg])
+print("INFO: Running perf test with command:")
+print(command)
+pr = check_output(command, shell=True)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
commit	483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree	e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/scripts/perf
parent	Initial commit. (diff)
download	ceph-upstream.tar.xz ceph-upstream.zip