summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/tools/benchmark_leveldb.sh
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/tools/benchmark_leveldb.sh')
-rwxr-xr-xsrc/rocksdb/tools/benchmark_leveldb.sh186
1 files changed, 186 insertions, 0 deletions
diff --git a/src/rocksdb/tools/benchmark_leveldb.sh b/src/rocksdb/tools/benchmark_leveldb.sh
new file mode 100755
index 00000000..40c7733c
--- /dev/null
+++ b/src/rocksdb/tools/benchmark_leveldb.sh
@@ -0,0 +1,186 @@
+#!/usr/bin/env bash
+# REQUIRE: db_bench binary exists in the current directory
+#
+# This should be used with the LevelDB fork listed here to use additional test options.
+# For more details on the changes see the blog post listed below.
+# https://github.com/mdcallag/leveldb-1
+# http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html
+
+if [ $# -ne 1 ]; then
+ echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]"
+ exit 0
+fi
+
+# size constants
+K=1024
+M=$((1024 * K))
+G=$((1024 * M))
+
+if [ -z $DB_DIR ]; then
+ echo "DB_DIR is not defined"
+ exit 0
+fi
+
+output_dir=${OUTPUT_DIR:-/tmp/}
+if [ ! -d $output_dir ]; then
+ mkdir -p $output_dir
+fi
+
+# all multithreaded tests run with sync=1 unless
+# $DB_BENCH_NO_SYNC is defined
+syncval="1"
+if [ ! -z $DB_BENCH_NO_SYNC ]; then
+ echo "Turning sync off for all multithreaded tests"
+ syncval="0";
+fi
+
+num_threads=${NUM_THREADS:-16}
+# Only for *whilewriting, *whilemerging
+writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
+cache_size=${CACHE_SIZE:-$((1 * G))}
+
+num_keys=${NUM_KEYS:-$((1 * G))}
+key_size=20
+value_size=${VALUE_SIZE:-400}
+block_size=${BLOCK_SIZE:-4096}
+
+const_params="
+ --db=$DB_DIR \
+ \
+ --num=$num_keys \
+ --value_size=$value_size \
+ --cache_size=$cache_size \
+ --compression_ratio=0.5 \
+ \
+ --write_buffer_size=$((2 * M)) \
+ \
+ --histogram=1 \
+ \
+ --bloom_bits=10 \
+ --open_files=$((20 * K))"
+
+params_w="$const_params "
+
+function summarize_result {
+ test_out=$1
+ test_name=$2
+ bench_name=$3
+ nthr=$4
+
+ usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
+ mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' )
+ ops=$( grep "^Count:" $test_out | awk '{ print $2 }' )
+ ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc )
+ avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' )
+ p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' )
+ echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \
+ >> $output_dir/report.txt
+}
+
+function run_fillseq {
+ # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
+ # client can discover where to restart a load after a crash. I think this is a good way to load.
+ echo "Loading $num_keys keys sequentially"
+ cmd="./db_bench --benchmarks=fillseq \
+ --use_existing_db=0 \
+ --sync=0 \
+ $params_w \
+ --threads=1 \
+ --seed=$( date +%s ) \
+ 2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
+ echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
+ eval $cmd
+ summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1
+}
+
+function run_change {
+ operation=$1
+ echo "Do $num_keys random $operation"
+ out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
+ cmd="./db_bench --benchmarks=$operation \
+ --use_existing_db=1 \
+ --sync=$syncval \
+ $params_w \
+ --threads=$num_threads \
+ --seed=$( date +%s ) \
+ 2>&1 | tee -a $output_dir/${out_name}"
+ echo $cmd | tee $output_dir/${out_name}
+ eval $cmd
+ summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads
+}
+
+function run_readrandom {
+ echo "Reading $num_keys random keys"
+ out_name="benchmark_readrandom.t${num_threads}.log"
+ cmd="./db_bench --benchmarks=readrandom \
+ --use_existing_db=1 \
+ $params_w \
+ --threads=$num_threads \
+ --seed=$( date +%s ) \
+ 2>&1 | tee -a $output_dir/${out_name}"
+ echo $cmd | tee $output_dir/${out_name}
+ eval $cmd
+ summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads
+}
+
+function run_readwhile {
+ operation=$1
+ echo "Reading $num_keys random keys while $operation"
+ out_name="benchmark_readwhile${operation}.t${num_threads}.log"
+ cmd="./db_bench --benchmarks=readwhile${operation} \
+ --use_existing_db=1 \
+ --sync=$syncval \
+ $params_w \
+ --threads=$num_threads \
+ --writes_per_second=$writes_per_second \
+ --seed=$( date +%s ) \
+ 2>&1 | tee -a $output_dir/${out_name}"
+ echo $cmd | tee $output_dir/${out_name}
+ eval $cmd
+ summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads
+}
+
+function now() {
+ echo `date +"%s"`
+}
+
+report="$output_dir/report.txt"
+schedule="$output_dir/schedule.txt"
+
+echo "===== Benchmark ====="
+
+# Run!!!
+IFS=',' read -a jobs <<< $1
+# shellcheck disable=SC2068
+for job in ${jobs[@]}; do
+
+ if [ $job != debug ]; then
+ echo "Start $job at `date`" | tee -a $schedule
+ fi
+
+ start=$(now)
+ if [ $job = fillseq ]; then
+ run_fillseq
+ elif [ $job = overwrite ]; then
+ run_change overwrite
+ elif [ $job = readrandom ]; then
+ run_readrandom
+ elif [ $job = readwhilewriting ]; then
+ run_readwhile writing
+ elif [ $job = debug ]; then
+ num_keys=1000; # debug
+ echo "Setting num_keys to $num_keys"
+ else
+ echo "unknown job $job"
+ exit
+ fi
+ end=$(now)
+
+ if [ $job != debug ]; then
+ echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
+ fi
+
+ echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest"
+ tail -1 $output_dir/report.txt
+
+done