src/rocksdb/tools/benchmark_leveldb.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# REQUIRE: db_bench binary exists in the current directory
#
# This should be used with the LevelDB fork listed here to use additional test options.
# For more details on the changes see the blog post listed below.
#   https://github.com/mdcallag/leveldb-1
#   http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html

if [ $# -ne 1 ]; then
  echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]"
  exit 0
fi

# size constants
K=1024
M=$((1024 * K))
G=$((1024 * M))

if [ -z $DB_DIR ]; then
  echo "DB_DIR is not defined"
  exit 0
fi

output_dir=${OUTPUT_DIR:-/tmp/}
if [ ! -d $output_dir ]; then
  mkdir -p $output_dir
fi

# all multithreaded tests run with sync=1 unless
# $DB_BENCH_NO_SYNC is defined
syncval="1"
if [ ! -z $DB_BENCH_NO_SYNC ]; then
  echo "Turning sync off for all multithreaded tests"
  syncval="0";
fi

num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
cache_size=${CACHE_SIZE:-$((1 * G))}

num_keys=${NUM_KEYS:-$((1 * G))}
key_size=20
value_size=${VALUE_SIZE:-400}
block_size=${BLOCK_SIZE:-4096}

const_params="
  --db=$DB_DIR \
  \
  --num=$num_keys \
  --value_size=$value_size \
  --cache_size=$cache_size \
  --compression_ratio=0.5 \
  \
  --write_buffer_size=$((2 * M)) \
  \
  --histogram=1 \
  \
  --bloom_bits=10 \
  --open_files=$((20 * K))"

params_w="$const_params "

function summarize_result {
  test_out=$1
  test_name=$2
  bench_name=$3
  nthr=$4

  usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
  mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' )
  ops=$( grep "^Count:" $test_out | awk '{ print $2 }' )
  ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc )
  avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' )
  p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' )
  echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \
    >> $output_dir/report.txt
}

function run_fillseq {
  # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
  # client can discover where to restart a load after a crash. I think this is a good way to load.
  echo "Loading $num_keys keys sequentially"
  cmd="./db_bench --benchmarks=fillseq \
       --use_existing_db=0 \
       --sync=0 \
       $params_w \
       --threads=1 \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
  echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
  eval $cmd
  summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1
}

function run_change {
  operation=$1
  echo "Do $num_keys random $operation"
  out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
  cmd="./db_bench --benchmarks=$operation \
       --use_existing_db=1 \
       --sync=$syncval \
       $params_w \
       --threads=$num_threads \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads
}

function run_readrandom {
  echo "Reading $num_keys random keys"
  out_name="benchmark_readrandom.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readrandom \
       --use_existing_db=1 \
       $params_w \
       --threads=$num_threads \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads
}

function run_readwhile {
  operation=$1
  echo "Reading $num_keys random keys while $operation"
  out_name="benchmark_readwhile${operation}.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readwhile${operation} \
       --use_existing_db=1 \
       --sync=$syncval \
       $params_w \
       --threads=$num_threads \
       --writes_per_second=$writes_per_second \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads
}

function now() {
  echo `date +"%s"`
}

report="$output_dir/report.txt"
schedule="$output_dir/schedule.txt"

echo "===== Benchmark ====="

# Run!!!
IFS=',' read -a jobs <<< $1
# shellcheck disable=SC2068
for job in ${jobs[@]}; do

  if [ $job != debug ]; then
    echo "Start $job at `date`" | tee -a $schedule
  fi

  start=$(now)
  if [ $job = fillseq ]; then
    run_fillseq
  elif [ $job = overwrite ]; then
    run_change overwrite
  elif [ $job = readrandom ]; then
    run_readrandom
  elif [ $job = readwhilewriting ]; then
    run_readwhile writing
  elif [ $job = debug ]; then
    num_keys=1000; # debug
    echo "Setting num_keys to $num_keys"
  else
    echo "unknown job $job"
    exit
  fi
  end=$(now)

  if [ $job != debug ]; then
    echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
  fi

  echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest"
  tail -1 $output_dir/report.txt

done