diff options
Diffstat (limited to 'src/rocksdb/tools/benchmark_leveldb.sh')
-rwxr-xr-x | src/rocksdb/tools/benchmark_leveldb.sh | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/src/rocksdb/tools/benchmark_leveldb.sh b/src/rocksdb/tools/benchmark_leveldb.sh new file mode 100755 index 000000000..069b53a9f --- /dev/null +++ b/src/rocksdb/tools/benchmark_leveldb.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# REQUIRE: db_bench binary exists in the current directory +# +# This should be used with the LevelDB fork listed here to use additional test options. +# For more details on the changes see the blog post listed below. +# https://github.com/mdcallag/leveldb-1 +# http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html + +if [ $# -ne 1 ]; then + echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]" + exit 0 +fi + +# size constants +K=1024 +M=$((1024 * K)) +G=$((1024 * M)) + +if [ -z $DB_DIR ]; then + echo "DB_DIR is not defined" + exit 0 +fi + +output_dir=${OUTPUT_DIR:-/tmp/} +if [ ! -d $output_dir ]; then + mkdir -p $output_dir +fi + +# all multithreaded tests run with sync=1 unless +# $DB_BENCH_NO_SYNC is defined +syncval="1" +if [ ! -z $DB_BENCH_NO_SYNC ]; then + echo "Turning sync off for all multithreaded tests" + syncval="0"; +fi + +num_threads=${NUM_THREADS:-16} +# Only for *whilewriting, *whilemerging +writes_per_second=${WRITES_PER_SECOND:-$((10 * K))} +cache_size=${CACHE_SIZE:-$((1 * G))} + +num_keys=${NUM_KEYS:-$((1 * G))} +key_size=20 +value_size=${VALUE_SIZE:-400} +block_size=${BLOCK_SIZE:-4096} + +const_params=" + --db=$DB_DIR \ + \ + --num=$num_keys \ + --value_size=$value_size \ + --cache_size=$cache_size \ + --compression_ratio=0.5 \ + \ + --write_buffer_size=$((2 * M)) \ + \ + --histogram=1 \ + \ + --bloom_bits=10 \ + --open_files=$((20 * K))" + +params_w="$const_params " + +function summarize_result { + test_out=$1 + test_name=$2 + bench_name=$3 + nthr=$4 + + usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' ) + mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' ) + ops=$( grep "^Count:" $test_out | awk '{ print $2 }' ) + ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc ) + avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' ) + p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' ) + echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \ + >> $output_dir/report.txt +} + +function run_fillseq { + # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the + # client can discover where to restart a load after a crash. I think this is a good way to load. + echo "Loading $num_keys keys sequentially" + cmd="./db_bench --benchmarks=fillseq \ + --use_existing_db=0 \ + --sync=0 \ + $params_w \ + --threads=1 \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log" + echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log + eval $cmd + summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1 +} + +function run_change { + operation=$1 + echo "Do $num_keys random $operation" + out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log" + cmd="./db_bench --benchmarks=$operation \ + --use_existing_db=1 \ + --sync=$syncval \ + $params_w \ + --threads=$num_threads \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} + eval $cmd + summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads +} + +function run_readrandom { + echo "Reading $num_keys random keys" + out_name="benchmark_readrandom.t${num_threads}.log" + cmd="./db_bench --benchmarks=readrandom \ + --use_existing_db=1 \ + $params_w \ + --threads=$num_threads \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} + eval $cmd + summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads +} + +function run_readwhile { + operation=$1 + echo "Reading $num_keys random keys while $operation" + out_name="benchmark_readwhile${operation}.t${num_threads}.log" + cmd="./db_bench --benchmarks=readwhile${operation} \ + --use_existing_db=1 \ + --sync=$syncval \ + $params_w \ + --threads=$num_threads \ + --writes_per_second=$writes_per_second \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} + eval $cmd + summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads +} + +function now() { + echo `date +"%s"` +} + +report="$output_dir/report.txt" +schedule="$output_dir/schedule.txt" + +echo "===== Benchmark =====" + +# Run!!! +IFS=',' read -a jobs <<< $1 +# shellcheck disable=SC2068 +for job in ${jobs[@]}; do + + if [ $job != debug ]; then + echo "Start $job at `date`" | tee -a $schedule + fi + + start=$(now) + if [ $job = fillseq ]; then + run_fillseq + elif [ $job = overwrite ]; then + run_change overwrite + elif [ $job = readrandom ]; then + run_readrandom + elif [ $job = readwhilewriting ]; then + run_readwhile writing + elif [ $job = debug ]; then + num_keys=1000; # debug + echo "Setting num_keys to $num_keys" + else + echo "unknown job $job" + exit + fi + end=$(now) + + if [ $job != debug ]; then + echo "Complete $job in $((end-start)) seconds" | tee -a $schedule + fi + + echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest" + tail -1 $output_dir/report.txt + +done |