diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/test/nvme/perf/run_perf.sh | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/test/nvme/perf/run_perf.sh')
-rwxr-xr-x | src/spdk/test/nvme/perf/run_perf.sh | 374 |
1 files changed, 374 insertions, 0 deletions
diff --git a/src/spdk/test/nvme/perf/run_perf.sh b/src/spdk/test/nvme/perf/run_perf.sh new file mode 100755 index 000000000..133aaa75c --- /dev/null +++ b/src/spdk/test/nvme/perf/run_perf.sh @@ -0,0 +1,374 @@ +#!/usr/bin/env bash +set -e + +# Dir variables and sourcing common files +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +plugin_dir=$rootdir/build/fio +bdevperf_dir=$rootdir/test/bdev/bdevperf +nvmeperf_dir=$rootdir/build/examples +source $testdir/common.sh +source $rootdir/scripts/common.sh || exit 1 +source $rootdir/test/common/autotest_common.sh + +# Global & default variables +declare -A KERNEL_ENGINES +KERNEL_ENGINES=( + ["kernel-libaio"]="--ioengine=libaio" + ["kernel-classic-polling"]="--ioengine=pvsync2 --hipri=100" + ["kernel-hybrid-polling"]="--ioengine=pvsync2 --hipri=100" + ["kernel-io-uring"]="--ioengine=io_uring") + +RW=randrw +MIX=100 +IODEPTH=256 +BLK_SIZE=4096 +RUNTIME=600 +RAMP_TIME=30 +NUMJOBS=1 +REPEAT_NO=3 +GTOD_REDUCE=false +SAMPLING_INT=0 +FIO_BIN=$CONFIG_FIO_SOURCE_DIR/fio +TMP_RESULT_FILE=$testdir/result.json +PLUGIN="nvme" +DISKCFG="" +BDEV_CACHE="" +BDEV_POOL="" +DISKNO="ALL" +CPUS_ALLOWED=1 +NOIOSCALING=false +PRECONDITIONING=true +CPUFREQ="" +PERFTOP=false +DPDKMEM=false +DATE="$(date +'%m_%d_%Y_%H%M%S')" + +function usage() { + set +x + [[ -n $2 ]] && ( + echo "$2" + echo "" + ) + echo "Run NVMe PMD/BDEV performance test. Change options for easier debug and setup configuration" + echo "Usage: $(basename $1) [options]" + echo "-h, --help Print help and exit" + echo + echo "Workload parameters:" + echo " --rw=STR Type of I/O pattern. Accepted values are randrw,rw. [default=$RW]" + echo " --rwmixread=INT Percentage of a mixed workload that should be reads. [default=$MIX]" + echo " --iodepth=INT Number of I/Os to keep in flight against the file. [default=$IODEPTH]" + echo " --block-size=INT The block size in bytes used for I/O units. [default=$BLK_SIZE]" + echo " --run-time=TIME[s] Tell fio to run the workload for the specified period of time. [default=$RUNTIME]" + echo " --ramp-time=TIME[s] Fio will run the specified workload for this amount of time before" + echo " logging any performance numbers. [default=$RAMP_TIME]. Applicable only for fio-based tests." + echo " --numjobs=INT Create the specified number of clones of this job. [default=$NUMJOBS]" + echo " Applicable only for fio-based tests." + echo " --repeat-no=INT How many times to repeat workload test. [default=$REPEAT_NO]" + echo " Test result will be an average of repeated test runs." + echo " --gtod-reduce Enable fio gtod_reduce option. [default=$GTOD_REDUCE]" + echo " --sampling-int=INT Value for fio log_avg_msec parameters [default=$SAMPLING_INT]" + echo " --fio-bin=PATH Path to fio binary. [default=$FIO_BIN]" + echo " Applicable only for fio-based tests." + echo + echo "Test setup parameters:" + echo " --driver=STR Selects tool used for testing. Choices available:" + echo " - spdk-perf-nvme (SPDK nvme perf)" + echo " - spdk-perf-bdev (SPDK bdev perf)" + echo " - spdk-plugin-nvme (SPDK nvme fio plugin)" + echo " - spdk-plugin-bdev (SPDK bdev fio plugin)" + echo " - kernel-classic-polling" + echo " - kernel-hybrid-polling" + echo " - kernel-libaio" + echo " - kernel-io-uring" + echo " --disk-config Configuration file containing PCI BDF addresses of NVMe disks to use in test." + echo " It consists a single column of PCI addresses. SPDK Bdev names will be assigned" + echo " and Kernel block device names detected." + echo " Lines starting with # are ignored as comments." + echo " --bdev-io-cache-size Set IO cache size for for SPDK bdev subsystem." + echo " --bdev-io-pool-size Set IO pool size for for SPDK bdev subsystem." + echo " --max-disk=INT,ALL Number of disks to test on, this will run multiple workloads with increasing number of disk each run." + echo " If =ALL then test on all found disk. [default=$DISKNO]" + echo " --cpu-allowed=INT/PATH Comma-separated list of CPU cores used to run the workload. Ranges allowed." + echo " Can also point to a file containing list of CPUs. [default=$CPUS_ALLOWED]" + echo " --no-preconditioning Skip preconditioning" + echo " --no-io-scaling Do not scale iodepth for each device in SPDK fio plugin. [default=$NOIOSCALING]" + echo " --cpu-frequency=INT Run tests with CPUs set to a desired frequency. 'intel_pstate=disable' must be set in" + echo " GRUB options. You can use 'cpupower frequency-info' and 'cpupower frequency-set' to" + echo " check list of available frequencies. Example: --cpu-frequency=1100000." + echo + echo "Other options:" + echo " --perftop Run perftop measurements on the same CPU cores as specified in --cpu-allowed option." + echo " --dpdk-mem-stats Dump DPDK memory stats during the test." + set -x +} + +while getopts 'h-:' optchar; do + case "$optchar" in + -) + case "$OPTARG" in + help) + usage $0 + exit 0 + ;; + rw=*) RW="${OPTARG#*=}" ;; + rwmixread=*) MIX="${OPTARG#*=}" ;; + iodepth=*) IODEPTH="${OPTARG#*=}" ;; + block-size=*) BLK_SIZE="${OPTARG#*=}" ;; + run-time=*) RUNTIME="${OPTARG#*=}" ;; + ramp-time=*) RAMP_TIME="${OPTARG#*=}" ;; + numjobs=*) NUMJOBS="${OPTARG#*=}" ;; + repeat-no=*) REPEAT_NO="${OPTARG#*=}" ;; + gtod-reduce) GTOD_REDUCE=true ;; + sampling-int=*) SAMPLING_INT="${OPTARG#*=}" ;; + fio-bin=*) FIO_BIN="${OPTARG#*=}" ;; + driver=*) PLUGIN="${OPTARG#*=}" ;; + disk-config=*) + DISKCFG="${OPTARG#*=}" + if [[ ! -f "$DISKCFG" ]]; then + echo "Disk confiuration file $DISKCFG does not exist!" + exit 1 + fi + ;; + bdev-io-cache-size=*) BDEV_CACHE="${OPTARG#*=}" ;; + bdev-io-pool-size=*) BDEV_POOL="${OPTARG#*=}" ;; + max-disk=*) DISKNO="${OPTARG#*=}" ;; + cpu-allowed=*) + CPUS_ALLOWED="${OPTARG#*=}" + if [[ -f "$CPUS_ALLOWED" ]]; then + CPUS_ALLOWED=$(cat "$CPUS_ALLOWED") + fi + ;; + no-preconditioning) PRECONDITIONING=false ;; + no-io-scaling) NOIOSCALING=true ;; + cpu-frequency=*) CPUFREQ="${OPTARG#*=}" ;; + perftop) PERFTOP=true ;; + dpdk-mem-stats) DPDKMEM=true ;; + *) + usage $0 echo "Invalid argument '$OPTARG'" + exit 1 + ;; + esac + ;; + h) + usage $0 + exit 0 + ;; + *) + usage $0 "Invalid argument '$optchar'" + exit 1 + ;; + esac +done + +result_dir=$testdir/results/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE} +result_file=$result_dir/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.csv +mkdir -p $result_dir +unset iops_disks bw mean_lat_disks_usec p99_lat_disks_usec p99_99_lat_disks_usec stdev_disks_usec +echo "run-time,ramp-time,fio-plugin,QD,block-size,num-cpu-cores,workload,workload-mix" > $result_file +printf "%s,%s,%s,%s,%s,%s,%s,%s\n" $RUNTIME $RAMP_TIME $PLUGIN $IODEPTH $BLK_SIZE $NO_CORES $RW $MIX >> $result_file +echo "num_of_disks,iops,avg_lat[usec],p99[usec],p99.99[usec],stdev[usec],avg_slat[usec],avg_clat[usec],bw[Kib/s]" >> $result_file + +trap 'rm -f *.state $testdir/bdev.conf; kill $perf_pid; wait $dpdk_mem_pid; print_backtrace' ERR SIGTERM SIGABRT + +if [[ "$PLUGIN" =~ "bdev" ]]; then + create_spdk_bdev_conf "$BDEV_CACHE" "$BDEV_POOL" +fi +verify_disk_number +DISK_NAMES=$(get_disks $PLUGIN) +DISKS_NUMA=$(get_numa_node $PLUGIN "$DISK_NAMES") +CORES=$(get_cores "$CPUS_ALLOWED") +NO_CORES_ARRAY=($CORES) +NO_CORES=${#NO_CORES_ARRAY[@]} + +if $PRECONDITIONING; then + preconditioning +fi + +if [[ "$PLUGIN" =~ "kernel" ]]; then + $rootdir/scripts/setup.sh reset + fio_ioengine_opt="${KERNEL_ENGINES[$PLUGIN]}" + + if [[ $PLUGIN = "kernel-classic-polling" ]]; then + for disk in $DISK_NAMES; do + echo -1 > /sys/block/$disk/queue/io_poll_delay + done + elif [[ $PLUGIN = "kernel-hybrid-polling" ]]; then + for disk in $DISK_NAMES; do + echo 0 > /sys/block/$disk/queue/io_poll_delay + done + elif [[ $PLUGIN = "kernel-io-uring" ]]; then + modprobe -rv nvme + modprobe nvme poll_queues=8 + wait_for_nvme_reload $DISK_NAMES + + backup_dir="/tmp/nvme_param_bak" + mkdir -p $backup_dir + + for disk in $DISK_NAMES; do + echo "INFO: Backing up device parameters for $disk" + sysfs=/sys/block/$disk/queue + mkdir -p $backup_dir/$disk + cat $sysfs/iostats > $backup_dir/$disk/iostats + cat $sysfs/rq_affinity > $backup_dir/$disk/rq_affinity + cat $sysfs/nomerges > $backup_dir/$disk/nomerges + cat $sysfs/io_poll_delay > $backup_dir/$disk/io_poll_delay + done + + for disk in $DISK_NAMES; do + echo "INFO: Setting device parameters for $disk" + sysfs=/sys/block/$disk/queue + echo 0 > $sysfs/iostats + echo 0 > $sysfs/rq_affinity + echo 2 > $sysfs/nomerges + echo 0 > $sysfs/io_poll_delay + done + fi +fi + +if [[ -n "$CPUFREQ" ]]; then + if [[ ! "$(cat /proc/cmdline)" =~ "intel_pstate=disable" ]]; then + echo "ERROR: Cannot set custom CPU frequency for test. intel_pstate=disable not in boot options." + false + else + cpu_governor="$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)" + cpupower frequency-set -g userspace + cpupower frequency-set -f $CPUFREQ + fi +fi + +if $PERFTOP; then + echo "INFO: starting perf record on cores $CPUS_ALLOWED" + perf record -C $CPUS_ALLOWED -o "$testdir/perf.data" & + perf_pid=$! +fi + +if $DPDKMEM; then + echo "INFO: waiting to generate DPDK memory usage" + wait_time=$((RUNTIME / 2)) + if [[ ! "$PLUGIN" =~ "perf" ]]; then + wait_time=$((wait_time + RAMP_TIME)) + fi + ( + sleep $wait_time + echo "INFO: generating DPDK memory usage" + $rootdir/scripts/rpc.py env_dpdk_get_mem_stats + ) & + dpdk_mem_pid=$! +fi + +#Run each workolad $REPEAT_NO times +for ((j = 0; j < REPEAT_NO; j++)); do + if [ $PLUGIN = "spdk-perf-bdev" ]; then + run_bdevperf > $TMP_RESULT_FILE + iops_disks=$((iops_disks + $(get_bdevperf_results iops))) + bw=$((bw + $(get_bdevperf_results bw_Kibs))) + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output + elif [ $PLUGIN = "spdk-perf-nvme" ]; then + run_nvmeperf $DISKNO > $TMP_RESULT_FILE + read -r iops bandwidth mean_lat min_lat max_lat <<< $(get_nvmeperf_results) + + iops_disks=$((iops_disks + iops)) + bw=$((bw + bandwidth)) + mean_lat_disks_usec=$((mean_lat_disks_usec + mean_lat)) + min_lat_disks_usec=$((min_lat_disks_usec + min_lat)) + max_lat_disks_usec=$((max_lat_disks_usec + max_lat)) + + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output + else + create_fio_config $DISKNO $PLUGIN "$DISK_NAMES" "$DISKS_NUMA" "$CORES" + + if [[ "$PLUGIN" =~ "spdk-plugin" ]]; then + run_spdk_nvme_fio $PLUGIN "--output=$TMP_RESULT_FILE" \ + "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" + else + run_nvme_fio $fio_ioengine_opt "--output=$TMP_RESULT_FILE" \ + "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" + fi + + #Store values for every number of used disks + #Use recalculated value for mixread param in case rw mode is not rw. + rwmixread=$MIX + if [[ $RW = *"read"* ]]; then + rwmixread=100 + elif [[ $RW = *"write"* ]]; then + rwmixread=0 + fi + iops_disks=$((iops_disks + $(get_results iops $rwmixread))) + mean_lat_disks_usec=$((mean_lat_disks_usec + $(get_results mean_lat_usec $rwmixread))) + p99_lat_disks_usec=$((p99_lat_disks_usec + $(get_results p99_lat_usec $rwmixread))) + p99_99_lat_disks_usec=$((p99_99_lat_disks_usec + $(get_results p99_99_lat_usec $rwmixread))) + stdev_disks_usec=$((stdev_disks_usec + $(get_results stdev_usec $rwmixread))) + + mean_slat_disks_usec=$((mean_slat_disks_usec + $(get_results mean_slat_usec $rwmixread))) + mean_clat_disks_usec=$((mean_clat_disks_usec + $(get_results mean_clat_usec $rwmixread))) + bw=$((bw + $(get_results bw_Kibs $rwmixread))) + + cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.json + cp $testdir/config.fio $result_dir/config_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.fio + rm -f $testdir/config.fio + fi +done + +if $PERFTOP; then + echo "INFO: Stopping perftop measurements." + kill $perf_pid + wait $perf_pid || true + perf report -i "$testdir/perf.data" > $result_dir/perftop_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt + rm -f "$testdir/perf.data" +fi + +if $DPDKMEM; then + mv "/tmp/spdk_mem_dump.txt" $result_dir/spdk_mem_dump_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt + echo "INFO: DPDK memory usage saved in $result_dir" +fi + +#Write results to csv file +iops_disks=$((iops_disks / REPEAT_NO)) +bw=$((bw / REPEAT_NO)) +if [[ "$PLUGIN" =~ "plugin" ]]; then + mean_lat_disks_usec=$((mean_lat_disks_usec / REPEAT_NO)) + p99_lat_disks_usec=$((p99_lat_disks_usec / REPEAT_NO)) + p99_99_lat_disks_usec=$((p99_99_lat_disks_usec / REPEAT_NO)) + stdev_disks_usec=$((stdev_disks_usec / REPEAT_NO)) + mean_slat_disks_usec=$((mean_slat_disks_usec / REPEAT_NO)) + mean_clat_disks_usec=$((mean_clat_disks_usec / REPEAT_NO)) +elif [[ "$PLUGIN" == "spdk-perf-bdev" ]]; then + mean_lat_disks_usec=0 + p99_lat_disks_usec=0 + p99_99_lat_disks_usec=0 + stdev_disks_usec=0 + mean_slat_disks_usec=0 + mean_clat_disks_usec=0 +elif [[ "$PLUGIN" == "spdk-perf-nvme" ]]; then + mean_lat_disks_usec=$((mean_lat_disks_usec / REPEAT_NO)) + p99_lat_disks_usec=0 + p99_99_lat_disks_usec=0 + stdev_disks_usec=0 + mean_slat_disks_usec=0 + mean_clat_disks_usec=0 +fi + +printf "%s,%s,%s,%s,%s,%s,%s,%s,%s\n" ${DISKNO} ${iops_disks} ${mean_lat_disks_usec} ${p99_lat_disks_usec} \ + ${p99_99_lat_disks_usec} ${stdev_disks_usec} ${mean_slat_disks_usec} ${mean_clat_disks_usec} ${bw} >> $result_file + +if [[ -n "$CPUFREQ" ]]; then + cpupower frequency-set -g $cpu_governor +fi + +if [ $PLUGIN = "kernel-io-uring" ]; then + # Reload the nvme driver so that other test runs are not affected + modprobe -rv nvme + modprobe nvme + wait_for_nvme_reload $DISK_NAMES + + for disk in $DISK_NAMES; do + echo "INFO: Restoring device parameters for $disk" + sysfs=/sys/block/$disk/queue + cat $backup_dir/$disk/iostats > $sysfs/iostats + cat $backup_dir/$disk/rq_affinity > $sysfs/rq_affinity + cat $backup_dir/$disk/nomerges > $sysfs/nomerges + cat $backup_dir/$disk/io_poll_delay > $sysfs/io_poll_delay + done +fi +rm -f $testdir/bdev.conf $testdir/config.fio |