Adding upstream version 16.2.11+ds.upstream/16.2.11+ds upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
commit: 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree: 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/dmclock/benchmark
parent: Initial commit. (diff)
download: ceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
7 files changed, 464 insertions, 0 deletions
diff --git a/src/dmclock/benchmark/README.md b/src/dmclock/benchmark/README.md
new file mode 100644
index 000000000..d945e986f
--- /dev/null
+++ b/src/dmclock/benchmark/README.md
@@ -0,0 +1,42 @@
+# dmclock benchmarking
+
+**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the
+value 1, the shell and Python scripts that generate the PDFs no longer
+work exactly correctly. Some effort to debug is necessary.
+
+This directory contains scripts to evaluate effects of different
+branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap
+data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding
+an ideal value for k (i.e., k=2 or k=3) for a particular work-load is
+important. Also, it is well-documented that the right choice of
+k-value improves the caching behaviour [Syed -- citation needed
+here]. As a result, the overall performance of an application using
+k-way heap increases significantly [Syed -- citation needed here].
+
+A rule of thumb is the following:
+	if number of elements are <= 6, use k=1
+	otherwise, use k=3.
+
+## Prerequisites
+
+requires python 2.7, gnuplot, and awk.
+  
+## Running benchmark
+
+./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work]
+
+The "run.sh" script looks for config files in the "configs" directory,
+and the final output is generated as
+"name_of_the_output.pdf". Internally, "run.sh" calls other scripts
+such as data_gen.sh, data_parser.py, and plot_gen.sh.
+
+## Modifying parameters
+
+To modify k-value and/or the amount of times each simulation is
+repeated, modify the following two variables in "run.sh" file:
+
+    k_way=[your_value]
+    repeat=[your_value]
+
+For example, k_way=3 means, the benchmark will compare simulations
+using 1-way, 2-way, and 3-way heaps.
diff --git a/src/dmclock/benchmark/configs/dmc_sim_100_100.conf b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
new file mode 100644
index 000000000..c93d4c71f
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
@@ -0,0 +1,31 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[server.0]
+server_count = 100
+server_iops  = 160
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
diff --git a/src/dmclock/benchmark/configs/dmc_sim_8_6.conf b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
new file mode 100644
index 000000000..28aeb401d
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
@@ -0,0 +1,43 @@
+[global]
+server_groups = 1
+client_groups = 3
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 2
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 2
+client_wait = 5
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 20.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 2
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+
+
+[server.0]
+server_count = 8
+server_iops  = 160
diff --git a/src/dmclock/benchmark/data_gen.sh b/src/dmclock/benchmark/data_gen.sh
new file mode 100755
index 000000000..80a77bd9a
--- /dev/null
+++ b/src/dmclock/benchmark/data_gen.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+config_dir="configs"
+repeat=2 #5
+
+# parameter check -- output_file name
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+
+# parameter check --repeat
+if [ "$3" != "" ]; then
+  repeat="$3"
+fi
+
+echo "k-way:$k_way, num_repeat:$repeat"
+
+# create simulators in different directories 
+k=2
+while [ $k -le $k_way ]
+do
+  mkdir "build_$k"
+  cd "build_$k"
+  rm -rf *
+  cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../.
+  make dmclock-sims
+  cd ..
+  
+  k=$(( $k + 1 ))
+done
+
+# run simulators 
+echo '' > $output_file
+for config in "$config_dir"/*.conf
+do
+  k=2
+  while [ $k -le $k_way ]
+  do
+    cd "build_$k"
+    
+    # repeat same experiment
+    i=0
+    while [ $i -lt $repeat ]
+    do  
+      i=$(( $i + 1 ))
+      
+      # clear cache first
+      sync
+      #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
+
+      # run with heap
+      msg="file_name:$k:$config"
+      echo $msg >> ../$output_file
+      echo "running $msg ..."
+      ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file
+    done # end repeat
+    cd ..
+    k=$(( $k + 1 ))
+  done # end k_way
+done # end config
+
diff --git a/src/dmclock/benchmark/data_parser.py b/src/dmclock/benchmark/data_parser.py
new file mode 100755
index 000000000..c90d85fd9
--- /dev/null
+++ b/src/dmclock/benchmark/data_parser.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+class DataPoint:  
+  def __init__(self):                
+    self.nserver = 0;
+    self.nclient = 0;
+    self.heap_type = 0;  
+    self.total_time_to_add_req = 0;
+    self.total_time_to_complete_req = 0;
+    self.config = ''
+
+  def set_name(self, config, heap_type):
+    self.config = config;
+    self.heap_type = heap_type
+
+  def get_conig(self):
+    import re
+    return re.split(r"/|\.", self.config)[1]
+
+  def __str__(self):
+    return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
+# end DataPoint
+
+
+def isFloat(elem):        
+ try:
+  float(elem)
+  return True
+ except ValueError:
+  return False
+#end isFloat
+
+
+def parse_config_params(fname):
+  nclient = 0;
+  nserver = 0;
+  # read config file property 
+  with open(fname, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      if line.startswith("client_count"):
+        nclient += int(line.split('=')[-1]);
+      if line.startswith("server_count"): 
+        nserver += int(line.split('=')[-1]);
+  # end of file
+  return [nserver, nclient];
+# parse_config_params
+
+def make_aggregate_data_point(dps, config, heap_type): 
+    # create new aggregate point
+    dp = DataPoint();
+    # set set and k_way_heap property
+    dp.set_name(config, heap_type); 
+    
+    num_run = 0
+    for _dp in dps:
+      if _dp.config == config and _dp.heap_type == heap_type:
+        # print _dp, config, heap_type
+        dp.nserver =_dp.nserver
+        dp.nclient = _dp.nclient
+        num_run                       += 1
+        dp.total_time_to_add_req      += _dp.total_time_to_add_req
+        dp.total_time_to_complete_req += _dp.total_time_to_complete_req 
+        
+    # average
+    dp.total_time_to_add_req      /= num_run;
+    dp.total_time_to_complete_req /= num_run
+    #print dp
+    return dp;
+
+def parse_data_points(filename):
+  dps = []; #data-points
+  dp = None;
+  state = 0;
+  configs = {}
+  k_ways  = {}
+  
+  with open(filename, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      
+      # file_name:1:configs/dmc_sim_8_6.conf
+      if line.startswith("file_name"):      
+        if dp:
+          dps.append(dp);
+          state = 0;
+         
+        # new data-point 
+        dp = DataPoint();
+        parts = line.split(':')
+        fname = parts[-1];        
+        dp.heap_type = int(parts[1]);
+        if dp.heap_type not in k_ways:
+          k_ways[dp.heap_type] = 1;
+        
+        # add to the dictionary
+        configs[fname] = 1;
+        
+        dp.config = fname;
+        params = parse_config_params(fname)      
+        dp.nserver = params[0];
+        dp.nclient = params[-1];
+         
+      elif line.startswith("average"):	# take last 2 averages
+        r = [float(s) for s in line.split(' ') if isFloat(s)]
+        state +=1;
+        #print r, dp #if isFloat(s)
+        if state == 3:
+          dp.total_time_to_add_req = r[0]
+        elif state == 4:
+          dp.total_time_to_complete_req = r[0]
+        else: pass
+
+      else: 
+        pass;    
+  # final entry
+  dps.append(dp) 
+  
+  # compute average of multiple runs
+  dps_avg = []
+  for config in configs:
+    data_per_config = []
+    for k in k_ways:
+      aggr_dp = make_aggregate_data_point(dps, config , k);
+      data_per_config.append(aggr_dp);
+    dps_avg.append(data_per_config);
+  # end for
+  return dps_avg;
+# end parse_data_points
+
+
+def create_header(num_cols):
+  fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
+  header = fields[0]
+  #write add_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[1], i+2)
+  #write complete_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[2], i+2)
+  # new-line
+  header = '%s\n'%(header)
+  return header
+# end create_header
+
+
+def create_data_line(aggr_dp):
+  # get common info
+  dp = aggr_dp[0]
+  data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
+  # get the point-count
+  num_cols = len(aggr_dp);
+  # write add_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
+  # write complete_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
+  # new-line
+  data_line = '%s\n'%(data_line)
+  return data_line
+# end create_data_line
+
+    
+def make_data(filename):
+  # write the aggregated point in space separated file  
+  dps = parse_data_points(filename);
+  if not len(dps) : return
+  print "total points: ", len(dps)
+  # open file
+  with open('%s.dat'%(filename), 'w+') as f:
+    # write header
+    f.write(create_header(len(dps[0])));
+    # write data-line
+    for aggr_dp in dps:
+    	f.write(create_data_line(aggr_dp));
+
+
+def main(output_file):
+  print output_file
+  make_data(output_file);
+
+import sys
+if __name__ == "__main__":
+  file_name="result"
+  if len(sys.argv) > 1:
+    file_name=sys.argv[1].strip()
+  main(file_name)
+
diff --git a/src/dmclock/benchmark/plot_gen.sh b/src/dmclock/benchmark/plot_gen.sh
new file mode 100755
index 000000000..d90bde192
--- /dev/null
+++ b/src/dmclock/benchmark/plot_gen.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+#echo "k-way: $k_way"
+#exit
+
+gnuplot << EOF
+
+# Note you need gnuplot 4.4 for the pdfcairo terminal.
+clear
+reset
+
+set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced
+set output "${output_file}.pdf"
+
+# starts multiplot
+set multiplot layout 2,1
+
+# Line style for axes
+set style line 80 lt rgb "#808080"
+
+# Line style for grid
+set style line 81 lt 0  # dashed
+set style line 81 lt rgb "#808080"  # grey
+
+set grid back linestyle 81
+set border 3 back linestyle 80 
+
+#set xtics rotate out
+set style data histogram
+set style histogram clustered
+
+set style fill solid border
+set xlabel 'Heap Timing for different K values'   
+set ylabel 'Time (nanosec)'        
+set key top right
+
+set yrange [0:*]
+
+# plot 1
+set title 'Request Addition Time'
+plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+
+# plot 2
+set title 'Request Completion Time'
+plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+EOF
diff --git a/src/dmclock/benchmark/run.sh b/src/dmclock/benchmark/run.sh
new file mode 100755
index 000000000..11432b530
--- /dev/null
+++ b/src/dmclock/benchmark/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# default value
+k_way=3 #11
+repeat=2 #5
+
+output_file="" 
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+echo "generating file ${output_file}"
+sh data_gen.sh ${output_file} ${k_way} ${repeat}
+
+echo "converting ${output_file} to ${output_file}.dat"
+python data_parser.py ${output_file}
+
+echo "now generating bar-chart"
+#gnuplot -e 'output_file=value'  plot_gen.gnuplot 
+sh plot_gen.sh  ${output_file} ${k_way}
+echo "done! check ${output_file}.pdf"
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
commit	19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree	42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/dmclock/benchmark
parent	Initial commit. (diff)
download	ceph-upstream/16.2.11+ds.tar.xz ceph-upstream/16.2.11+ds.zip