summaryrefslogtreecommitdiffstats
path: root/src/dmclock/benchmark
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/dmclock/benchmark
parentInitial commit. (diff)
downloadceph-6d07fdb6bb33b1af39833b850bb6cf8af79fe293.tar.xz
ceph-6d07fdb6bb33b1af39833b850bb6cf8af79fe293.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/dmclock/benchmark')
-rw-r--r--src/dmclock/benchmark/README.md42
-rw-r--r--src/dmclock/benchmark/configs/dmc_sim_100_100.conf31
-rw-r--r--src/dmclock/benchmark/configs/dmc_sim_8_6.conf43
-rwxr-xr-xsrc/dmclock/benchmark/data_gen.sh73
-rwxr-xr-xsrc/dmclock/benchmark/data_parser.py191
-rwxr-xr-xsrc/dmclock/benchmark/plot_gen.sh60
-rwxr-xr-xsrc/dmclock/benchmark/run.sh24
7 files changed, 464 insertions, 0 deletions
diff --git a/src/dmclock/benchmark/README.md b/src/dmclock/benchmark/README.md
new file mode 100644
index 000000000..d945e986f
--- /dev/null
+++ b/src/dmclock/benchmark/README.md
@@ -0,0 +1,42 @@
+# dmclock benchmarking
+
+**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the
+value 1, the shell and Python scripts that generate the PDFs no longer
+work exactly correctly. Some effort to debug is necessary.
+
+This directory contains scripts to evaluate effects of different
+branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap
+data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding
+an ideal value for k (i.e., k=2 or k=3) for a particular work-load is
+important. Also, it is well-documented that the right choice of
+k-value improves the caching behaviour [Syed -- citation needed
+here]. As a result, the overall performance of an application using
+k-way heap increases significantly [Syed -- citation needed here].
+
+A rule of thumb is the following:
+ if number of elements are <= 6, use k=1
+ otherwise, use k=3.
+
+## Prerequisites
+
+requires python 2.7, gnuplot, and awk.
+
+## Running benchmark
+
+./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work]
+
+The "run.sh" script looks for config files in the "configs" directory,
+and the final output is generated as
+"name_of_the_output.pdf". Internally, "run.sh" calls other scripts
+such as data_gen.sh, data_parser.py, and plot_gen.sh.
+
+## Modifying parameters
+
+To modify k-value and/or the amount of times each simulation is
+repeated, modify the following two variables in "run.sh" file:
+
+ k_way=[your_value]
+ repeat=[your_value]
+
+For example, k_way=3 means, the benchmark will compare simulations
+using 1-way, 2-way, and 3-way heaps.
diff --git a/src/dmclock/benchmark/configs/dmc_sim_100_100.conf b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
new file mode 100644
index 000000000..c93d4c71f
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
@@ -0,0 +1,31 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[server.0]
+server_count = 100
+server_iops = 160
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
diff --git a/src/dmclock/benchmark/configs/dmc_sim_8_6.conf b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
new file mode 100644
index 000000000..28aeb401d
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
@@ -0,0 +1,43 @@
+[global]
+server_groups = 1
+client_groups = 3
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 2
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 2
+client_wait = 5
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 20.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 2
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+
+
+[server.0]
+server_count = 8
+server_iops = 160
diff --git a/src/dmclock/benchmark/data_gen.sh b/src/dmclock/benchmark/data_gen.sh
new file mode 100755
index 000000000..80a77bd9a
--- /dev/null
+++ b/src/dmclock/benchmark/data_gen.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+config_dir="configs"
+repeat=2 #5
+
+# parameter check -- output_file name
+if [ "$1" != "" ]; then
+ output_file="$1"
+else
+ echo "Please provide the name of the output file"
+ exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+ k_way="$2"
+else
+ echo "Please provide the maximum K_WAY value"
+ exit
+fi
+
+# parameter check --repeat
+if [ "$3" != "" ]; then
+ repeat="$3"
+fi
+
+echo "k-way:$k_way, num_repeat:$repeat"
+
+# create simulators in different directories
+k=2
+while [ $k -le $k_way ]
+do
+ mkdir "build_$k"
+ cd "build_$k"
+ rm -rf *
+ cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../.
+ make dmclock-sims
+ cd ..
+
+ k=$(( $k + 1 ))
+done
+
+# run simulators
+echo '' > $output_file
+for config in "$config_dir"/*.conf
+do
+ k=2
+ while [ $k -le $k_way ]
+ do
+ cd "build_$k"
+
+ # repeat same experiment
+ i=0
+ while [ $i -lt $repeat ]
+ do
+ i=$(( $i + 1 ))
+
+ # clear cache first
+ sync
+ #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches'
+ #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches'
+ #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
+
+ # run with heap
+ msg="file_name:$k:$config"
+ echo $msg >> ../$output_file
+ echo "running $msg ..."
+ ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file
+ done # end repeat
+ cd ..
+ k=$(( $k + 1 ))
+ done # end k_way
+done # end config
+
diff --git a/src/dmclock/benchmark/data_parser.py b/src/dmclock/benchmark/data_parser.py
new file mode 100755
index 000000000..c90d85fd9
--- /dev/null
+++ b/src/dmclock/benchmark/data_parser.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+class DataPoint:
+ def __init__(self):
+ self.nserver = 0;
+ self.nclient = 0;
+ self.heap_type = 0;
+ self.total_time_to_add_req = 0;
+ self.total_time_to_complete_req = 0;
+ self.config = ''
+
+ def set_name(self, config, heap_type):
+ self.config = config;
+ self.heap_type = heap_type
+
+ def get_conig(self):
+ import re
+ return re.split(r"/|\.", self.config)[1]
+
+ def __str__(self):
+ return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
+# end DataPoint
+
+
+def isFloat(elem):
+ try:
+ float(elem)
+ return True
+ except ValueError:
+ return False
+#end isFloat
+
+
+def parse_config_params(fname):
+ nclient = 0;
+ nserver = 0;
+ # read config file property
+ with open(fname, 'r') as f:
+ for line in f:
+ line = line.strip('\n \t')
+ if not line: continue;
+ if line.startswith("client_count"):
+ nclient += int(line.split('=')[-1]);
+ if line.startswith("server_count"):
+ nserver += int(line.split('=')[-1]);
+ # end of file
+ return [nserver, nclient];
+# parse_config_params
+
+def make_aggregate_data_point(dps, config, heap_type):
+ # create new aggregate point
+ dp = DataPoint();
+ # set set and k_way_heap property
+ dp.set_name(config, heap_type);
+
+ num_run = 0
+ for _dp in dps:
+ if _dp.config == config and _dp.heap_type == heap_type:
+ # print _dp, config, heap_type
+ dp.nserver =_dp.nserver
+ dp.nclient = _dp.nclient
+ num_run += 1
+ dp.total_time_to_add_req += _dp.total_time_to_add_req
+ dp.total_time_to_complete_req += _dp.total_time_to_complete_req
+
+ # average
+ dp.total_time_to_add_req /= num_run;
+ dp.total_time_to_complete_req /= num_run
+ #print dp
+ return dp;
+
+def parse_data_points(filename):
+ dps = []; #data-points
+ dp = None;
+ state = 0;
+ configs = {}
+ k_ways = {}
+
+ with open(filename, 'r') as f:
+ for line in f:
+ line = line.strip('\n \t')
+ if not line: continue;
+
+ # file_name:1:configs/dmc_sim_8_6.conf
+ if line.startswith("file_name"):
+ if dp:
+ dps.append(dp);
+ state = 0;
+
+ # new data-point
+ dp = DataPoint();
+ parts = line.split(':')
+ fname = parts[-1];
+ dp.heap_type = int(parts[1]);
+ if dp.heap_type not in k_ways:
+ k_ways[dp.heap_type] = 1;
+
+ # add to the dictionary
+ configs[fname] = 1;
+
+ dp.config = fname;
+ params = parse_config_params(fname)
+ dp.nserver = params[0];
+ dp.nclient = params[-1];
+
+ elif line.startswith("average"): # take last 2 averages
+ r = [float(s) for s in line.split(' ') if isFloat(s)]
+ state +=1;
+ #print r, dp #if isFloat(s)
+ if state == 3:
+ dp.total_time_to_add_req = r[0]
+ elif state == 4:
+ dp.total_time_to_complete_req = r[0]
+ else: pass
+
+ else:
+ pass;
+ # final entry
+ dps.append(dp)
+
+ # compute average of multiple runs
+ dps_avg = []
+ for config in configs:
+ data_per_config = []
+ for k in k_ways:
+ aggr_dp = make_aggregate_data_point(dps, config , k);
+ data_per_config.append(aggr_dp);
+ dps_avg.append(data_per_config);
+ # end for
+ return dps_avg;
+# end parse_data_points
+
+
+def create_header(num_cols):
+ fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
+ header = fields[0]
+ #write add_req_{1, ...}
+ for i in range(num_cols):
+ header = '%s %s_%i'%(header, fields[1], i+2)
+ #write complete_req_{1, ...}
+ for i in range(num_cols):
+ header = '%s %s_%i'%(header, fields[2], i+2)
+ # new-line
+ header = '%s\n'%(header)
+ return header
+# end create_header
+
+
+def create_data_line(aggr_dp):
+ # get common info
+ dp = aggr_dp[0]
+ data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
+ # get the point-count
+ num_cols = len(aggr_dp);
+ # write add_req_{1, ...}
+ for i in range(num_cols):
+ data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
+ # write complete_req_{1, ...}
+ for i in range(num_cols):
+ data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
+ # new-line
+ data_line = '%s\n'%(data_line)
+ return data_line
+# end create_data_line
+
+
+def make_data(filename):
+ # write the aggregated point in space separated file
+ dps = parse_data_points(filename);
+ if not len(dps) : return
+ print "total points: ", len(dps)
+ # open file
+ with open('%s.dat'%(filename), 'w+') as f:
+ # write header
+ f.write(create_header(len(dps[0])));
+ # write data-line
+ for aggr_dp in dps:
+ f.write(create_data_line(aggr_dp));
+
+
+def main(output_file):
+ print output_file
+ make_data(output_file);
+
+import sys
+if __name__ == "__main__":
+ file_name="result"
+ if len(sys.argv) > 1:
+ file_name=sys.argv[1].strip()
+ main(file_name)
+
diff --git a/src/dmclock/benchmark/plot_gen.sh b/src/dmclock/benchmark/plot_gen.sh
new file mode 100755
index 000000000..d90bde192
--- /dev/null
+++ b/src/dmclock/benchmark/plot_gen.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if [ "$1" != "" ]; then
+ output_file="$1"
+else
+ echo "Please provide the name of the output file"
+ exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+ k_way="$2"
+else
+ echo "Please provide the maximum K_WAY value"
+ exit
+fi
+#echo "k-way: $k_way"
+#exit
+
+gnuplot << EOF
+
+# Note you need gnuplot 4.4 for the pdfcairo terminal.
+clear
+reset
+
+set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced
+set output "${output_file}.pdf"
+
+# starts multiplot
+set multiplot layout 2,1
+
+# Line style for axes
+set style line 80 lt rgb "#808080"
+
+# Line style for grid
+set style line 81 lt 0 # dashed
+set style line 81 lt rgb "#808080" # grey
+
+set grid back linestyle 81
+set border 3 back linestyle 80
+
+#set xtics rotate out
+set style data histogram
+set style histogram clustered
+
+set style fill solid border
+set xlabel 'Heap Timing for different K values'
+set ylabel 'Time (nanosec)'
+set key top right
+
+set yrange [0:*]
+
+# plot 1
+set title 'Request Addition Time'
+plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+
+# plot 2
+set title 'Request Completion Time'
+plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+EOF
diff --git a/src/dmclock/benchmark/run.sh b/src/dmclock/benchmark/run.sh
new file mode 100755
index 000000000..11432b530
--- /dev/null
+++ b/src/dmclock/benchmark/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# default value
+k_way=3 #11
+repeat=2 #5
+
+output_file=""
+if [ "$1" != "" ]; then
+ output_file="$1"
+else
+ echo "Please provide the name of the output file"
+ exit
+fi
+
+echo "generating file ${output_file}"
+sh data_gen.sh ${output_file} ${k_way} ${repeat}
+
+echo "converting ${output_file} to ${output_file}.dat"
+python data_parser.py ${output_file}
+
+echo "now generating bar-chart"
+#gnuplot -e 'output_file=value' plot_gen.gnuplot
+sh plot_gen.sh ${output_file} ${k_way}
+echo "done! check ${output_file}.pdf"