diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/dmclock/benchmark | |
parent | Initial commit. (diff) | |
download | ceph-6d07fdb6bb33b1af39833b850bb6cf8af79fe293.tar.xz ceph-6d07fdb6bb33b1af39833b850bb6cf8af79fe293.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/dmclock/benchmark')
-rw-r--r-- | src/dmclock/benchmark/README.md | 42 | ||||
-rw-r--r-- | src/dmclock/benchmark/configs/dmc_sim_100_100.conf | 31 | ||||
-rw-r--r-- | src/dmclock/benchmark/configs/dmc_sim_8_6.conf | 43 | ||||
-rwxr-xr-x | src/dmclock/benchmark/data_gen.sh | 73 | ||||
-rwxr-xr-x | src/dmclock/benchmark/data_parser.py | 191 | ||||
-rwxr-xr-x | src/dmclock/benchmark/plot_gen.sh | 60 | ||||
-rwxr-xr-x | src/dmclock/benchmark/run.sh | 24 |
7 files changed, 464 insertions, 0 deletions
diff --git a/src/dmclock/benchmark/README.md b/src/dmclock/benchmark/README.md new file mode 100644 index 000000000..d945e986f --- /dev/null +++ b/src/dmclock/benchmark/README.md @@ -0,0 +1,42 @@ +# dmclock benchmarking + +**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the +value 1, the shell and Python scripts that generate the PDFs no longer +work exactly correctly. Some effort to debug is necessary. + +This directory contains scripts to evaluate effects of different +branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap +data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding +an ideal value for k (i.e., k=2 or k=3) for a particular work-load is +important. Also, it is well-documented that the right choice of +k-value improves the caching behaviour [Syed -- citation needed +here]. As a result, the overall performance of an application using +k-way heap increases significantly [Syed -- citation needed here]. + +A rule of thumb is the following: + if number of elements are <= 6, use k=1 + otherwise, use k=3. + +## Prerequisites + +requires python 2.7, gnuplot, and awk. + +## Running benchmark + +./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work] + +The "run.sh" script looks for config files in the "configs" directory, +and the final output is generated as +"name_of_the_output.pdf". Internally, "run.sh" calls other scripts +such as data_gen.sh, data_parser.py, and plot_gen.sh. + +## Modifying parameters + +To modify k-value and/or the amount of times each simulation is +repeated, modify the following two variables in "run.sh" file: + + k_way=[your_value] + repeat=[your_value] + +For example, k_way=3 means, the benchmark will compare simulations +using 1-way, 2-way, and 3-way heaps. diff --git a/src/dmclock/benchmark/configs/dmc_sim_100_100.conf b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf new file mode 100644 index 000000000..c93d4c71f --- /dev/null +++ b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf @@ -0,0 +1,31 @@ +[global] +server_groups = 1 +client_groups = 2 +server_random_selection = true +server_soft_limit = true + +[server.0] +server_count = 100 +server_iops = 160 + +[client.0] +client_count = 99 +client_wait = 0 +client_total_ops = 10000 +client_server_select_range = 100 +client_iops_goal = 200 +client_outstanding_ops = 32 +client_reservation = 100.0 +client_limit = 0.0 +client_weight = 1.0 + +[client.1] +client_count = 1 +client_wait = 10 +client_total_ops = 10000 +client_server_select_range = 100 +client_iops_goal = 200 +client_outstanding_ops = 32 +client_reservation = 100.0 +client_limit = 0.0 +client_weight = 1.0 diff --git a/src/dmclock/benchmark/configs/dmc_sim_8_6.conf b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf new file mode 100644 index 000000000..28aeb401d --- /dev/null +++ b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf @@ -0,0 +1,43 @@ +[global] +server_groups = 1 +client_groups = 3 +server_random_selection = true +server_soft_limit = true + +[client.0] +client_count = 2 +client_wait = 0 +client_total_ops = 1000 +client_server_select_range = 8 +client_iops_goal = 200 +client_outstanding_ops = 32 +client_reservation = 0.0 +client_limit = 0.0 +client_weight = 1.0 + +[client.1] +client_count = 2 +client_wait = 5 +client_total_ops = 1000 +client_server_select_range = 8 +client_iops_goal = 200 +client_outstanding_ops = 32 +client_reservation = 20.0 +client_limit = 40.0 +client_weight = 1.0 + +[client.2] +client_count = 2 +client_wait = 10 +client_total_ops = 1000 +client_server_select_range = 8 +client_iops_goal = 200 +client_outstanding_ops = 32 +client_reservation = 0.0 +client_limit = 50.0 +client_weight = 2.0 + + +[server.0] +server_count = 8 +server_iops = 160 diff --git a/src/dmclock/benchmark/data_gen.sh b/src/dmclock/benchmark/data_gen.sh new file mode 100755 index 000000000..80a77bd9a --- /dev/null +++ b/src/dmclock/benchmark/data_gen.sh @@ -0,0 +1,73 @@ +#!/bin/bash +config_dir="configs" +repeat=2 #5 + +# parameter check -- output_file name +if [ "$1" != "" ]; then + output_file="$1" +else + echo "Please provide the name of the output file" + exit +fi + +# parameter check -- k-value +if [ "$2" != "" ]; then + k_way="$2" +else + echo "Please provide the maximum K_WAY value" + exit +fi + +# parameter check --repeat +if [ "$3" != "" ]; then + repeat="$3" +fi + +echo "k-way:$k_way, num_repeat:$repeat" + +# create simulators in different directories +k=2 +while [ $k -le $k_way ] +do + mkdir "build_$k" + cd "build_$k" + rm -rf * + cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../. + make dmclock-sims + cd .. + + k=$(( $k + 1 )) +done + +# run simulators +echo '' > $output_file +for config in "$config_dir"/*.conf +do + k=2 + while [ $k -le $k_way ] + do + cd "build_$k" + + # repeat same experiment + i=0 + while [ $i -lt $repeat ] + do + i=$(( $i + 1 )) + + # clear cache first + sync + #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches' + #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches' + #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches' + + # run with heap + msg="file_name:$k:$config" + echo $msg >> ../$output_file + echo "running $msg ..." + ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file + done # end repeat + cd .. + k=$(( $k + 1 )) + done # end k_way +done # end config + diff --git a/src/dmclock/benchmark/data_parser.py b/src/dmclock/benchmark/data_parser.py new file mode 100755 index 000000000..c90d85fd9 --- /dev/null +++ b/src/dmclock/benchmark/data_parser.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python + +class DataPoint: + def __init__(self): + self.nserver = 0; + self.nclient = 0; + self.heap_type = 0; + self.total_time_to_add_req = 0; + self.total_time_to_complete_req = 0; + self.config = '' + + def set_name(self, config, heap_type): + self.config = config; + self.heap_type = heap_type + + def get_conig(self): + import re + return re.split(r"/|\.", self.config)[1] + + def __str__(self): + return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config); +# end DataPoint + + +def isFloat(elem): + try: + float(elem) + return True + except ValueError: + return False +#end isFloat + + +def parse_config_params(fname): + nclient = 0; + nserver = 0; + # read config file property + with open(fname, 'r') as f: + for line in f: + line = line.strip('\n \t') + if not line: continue; + if line.startswith("client_count"): + nclient += int(line.split('=')[-1]); + if line.startswith("server_count"): + nserver += int(line.split('=')[-1]); + # end of file + return [nserver, nclient]; +# parse_config_params + +def make_aggregate_data_point(dps, config, heap_type): + # create new aggregate point + dp = DataPoint(); + # set set and k_way_heap property + dp.set_name(config, heap_type); + + num_run = 0 + for _dp in dps: + if _dp.config == config and _dp.heap_type == heap_type: + # print _dp, config, heap_type + dp.nserver =_dp.nserver + dp.nclient = _dp.nclient + num_run += 1 + dp.total_time_to_add_req += _dp.total_time_to_add_req + dp.total_time_to_complete_req += _dp.total_time_to_complete_req + + # average + dp.total_time_to_add_req /= num_run; + dp.total_time_to_complete_req /= num_run + #print dp + return dp; + +def parse_data_points(filename): + dps = []; #data-points + dp = None; + state = 0; + configs = {} + k_ways = {} + + with open(filename, 'r') as f: + for line in f: + line = line.strip('\n \t') + if not line: continue; + + # file_name:1:configs/dmc_sim_8_6.conf + if line.startswith("file_name"): + if dp: + dps.append(dp); + state = 0; + + # new data-point + dp = DataPoint(); + parts = line.split(':') + fname = parts[-1]; + dp.heap_type = int(parts[1]); + if dp.heap_type not in k_ways: + k_ways[dp.heap_type] = 1; + + # add to the dictionary + configs[fname] = 1; + + dp.config = fname; + params = parse_config_params(fname) + dp.nserver = params[0]; + dp.nclient = params[-1]; + + elif line.startswith("average"): # take last 2 averages + r = [float(s) for s in line.split(' ') if isFloat(s)] + state +=1; + #print r, dp #if isFloat(s) + if state == 3: + dp.total_time_to_add_req = r[0] + elif state == 4: + dp.total_time_to_complete_req = r[0] + else: pass + + else: + pass; + # final entry + dps.append(dp) + + # compute average of multiple runs + dps_avg = [] + for config in configs: + data_per_config = [] + for k in k_ways: + aggr_dp = make_aggregate_data_point(dps, config , k); + data_per_config.append(aggr_dp); + dps_avg.append(data_per_config); + # end for + return dps_avg; +# end parse_data_points + + +def create_header(num_cols): + fields = ['nserver_nclient(config_file)','add_req', 'complete_req']; + header = fields[0] + #write add_req_{1, ...} + for i in range(num_cols): + header = '%s %s_%i'%(header, fields[1], i+2) + #write complete_req_{1, ...} + for i in range(num_cols): + header = '%s %s_%i'%(header, fields[2], i+2) + # new-line + header = '%s\n'%(header) + return header +# end create_header + + +def create_data_line(aggr_dp): + # get common info + dp = aggr_dp[0] + data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient); + # get the point-count + num_cols = len(aggr_dp); + # write add_req_{1, ...} + for i in range(num_cols): + data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req) + # write complete_req_{1, ...} + for i in range(num_cols): + data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req) + # new-line + data_line = '%s\n'%(data_line) + return data_line +# end create_data_line + + +def make_data(filename): + # write the aggregated point in space separated file + dps = parse_data_points(filename); + if not len(dps) : return + print "total points: ", len(dps) + # open file + with open('%s.dat'%(filename), 'w+') as f: + # write header + f.write(create_header(len(dps[0]))); + # write data-line + for aggr_dp in dps: + f.write(create_data_line(aggr_dp)); + + +def main(output_file): + print output_file + make_data(output_file); + +import sys +if __name__ == "__main__": + file_name="result" + if len(sys.argv) > 1: + file_name=sys.argv[1].strip() + main(file_name) + diff --git a/src/dmclock/benchmark/plot_gen.sh b/src/dmclock/benchmark/plot_gen.sh new file mode 100755 index 000000000..d90bde192 --- /dev/null +++ b/src/dmclock/benchmark/plot_gen.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +if [ "$1" != "" ]; then + output_file="$1" +else + echo "Please provide the name of the output file" + exit +fi + +# parameter check -- k-value +if [ "$2" != "" ]; then + k_way="$2" +else + echo "Please provide the maximum K_WAY value" + exit +fi +#echo "k-way: $k_way" +#exit + +gnuplot << EOF + +# Note you need gnuplot 4.4 for the pdfcairo terminal. +clear +reset + +set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced +set output "${output_file}.pdf" + +# starts multiplot +set multiplot layout 2,1 + +# Line style for axes +set style line 80 lt rgb "#808080" + +# Line style for grid +set style line 81 lt 0 # dashed +set style line 81 lt rgb "#808080" # grey + +set grid back linestyle 81 +set border 3 back linestyle 80 + +#set xtics rotate out +set style data histogram +set style histogram clustered + +set style fill solid border +set xlabel 'Heap Timing for different K values' +set ylabel 'Time (nanosec)' +set key top right + +set yrange [0:*] + +# plot 1 +set title 'Request Addition Time' +plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader + +# plot 2 +set title 'Request Completion Time' +plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader +EOF diff --git a/src/dmclock/benchmark/run.sh b/src/dmclock/benchmark/run.sh new file mode 100755 index 000000000..11432b530 --- /dev/null +++ b/src/dmclock/benchmark/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# default value +k_way=3 #11 +repeat=2 #5 + +output_file="" +if [ "$1" != "" ]; then + output_file="$1" +else + echo "Please provide the name of the output file" + exit +fi + +echo "generating file ${output_file}" +sh data_gen.sh ${output_file} ${k_way} ${repeat} + +echo "converting ${output_file} to ${output_file}.dat" +python data_parser.py ${output_file} + +echo "now generating bar-chart" +#gnuplot -e 'output_file=value' plot_gen.gnuplot +sh plot_gen.sh ${output_file} ${k_way} +echo "done! check ${output_file}.pdf" |