1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
|
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# REQUIRE: db_bench binary exists in the current directory
dbdir=$1
odir=$2
# Size Constants
K=1024
M=$((1024 * K))
# Dynamic loader configuration
ld_library_path=${LD_LIBRARY_PATH:-""}
# Benchmark configuration
duration_rw=${DURATION_RW:-65}
duration_ro=${DURATION_RO:-65}
num_keys=${NUM_KEYS:-1000000}
num_threads=${NUM_THREADS:-16}
key_size=${KEY_SIZE:-20}
value_size=${VALUE_SIZE:-400}
mb_write_per_sec=${MB_WRITE_PER_SEC:-2}
ci_tests_only=${CI_TESTS_ONLY:-"false"}
# RocksDB configuration
compression_type=${COMPRESSION_TYPE:-lz4}
subcompactions=${SUBCOMPACTIONS:-1}
write_buffer_size_mb=${WRITE_BUFFER_SIZE_MB:-32}
target_file_size_base_mb=${TARGET_FILE_SIZE_BASE_MB:-32}
max_bytes_for_level_base_mb=${MAX_BYTES_FOR_LEVEL_BASE_MB:-128}
max_background_jobs=${MAX_BACKGROUND_JOBS:-8}
stats_interval_seconds=${STATS_INTERVAL_SECONDS:-20}
cache_index_and_filter_blocks=${CACHE_INDEX_AND_FILTER_BLOCKS:-0}
# USE_O_DIRECT doesn't need a default
bytes_per_sync=${BYTES_PER_SYNC:-$(( 1 * M ))}
# CACHE_SIZE_MB doesn't need a default
min_level_to_compress=${MIN_LEVEL_TO_COMPRESS:-"-1"}
compaction_style=${COMPACTION_STYLE:-leveled}
if [ "$compaction_style" = "leveled" ]; then
echo Use leveled compaction
elif [ "$compaction_style" = "universal" ]; then
echo Use universal compaction
elif [ "$compaction_style" = "blob" ]; then
echo Use blob compaction
else
echo COMPACTION_STYLE is :: "$COMPACTION_STYLE" :: and must be one of leveled, universal, blob
exit 1
fi
# Leveled compaction configuration
level0_file_num_compaction_trigger=${LEVEL0_FILE_NUM_COMPACTION_TRIGGER:-4}
level0_slowdown_writes_trigger=${LEVEL0_SLOWDOWN_WRITES_TRIGGER:-20}
level0_stop_writes_trigger=${LEVEL0_STOP_WRITES_TRIGGER:-30}
per_level_fanout=${PER_LEVEL_FANOUT:-8}
# Universal compaction configuration
universal_min_merge_width=${UNIVERSAL_MIN_MERGE_WIDTH:-2}
universal_max_merge_width=${UNIVERSAL_MAX_MERGE_WIDTH:-20}
universal_size_ratio=${UNIVERSAL_SIZE_RATIO:-1}
universal_max_size_amp=${UNIVERSAL_MAX_SIZE_AMP:-200}
universal_compression_size_percent=${UNIVERSAL_COMPRESSION_SIZE_PERCENT:-"-1"}
# Integrated BlobDB configuration
min_blob_size=${MIN_BLOB_SIZE:-0}
blob_file_size=${BLOB_FILE_SIZE:-$(( 256 * M ))}
blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}}
blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"}
blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1}
# Arguments for dynamic loading
base_args=( LD_LIBRARY_PATH="$ld_library_path" )
# Arguments used for all tests
base_args+=( NUM_KEYS="$num_keys" )
base_args+=( NUM_THREADS="$num_threads" )
base_args+=( KEY_SIZE="$key_size" )
base_args+=( VALUE_SIZE="$value_size" )
base_args+=( SUBCOMPACTIONS="$subcompactions" )
base_args+=( COMPRESSION_TYPE="$compression_type" )
base_args+=( WRITE_BUFFER_SIZE_MB="$write_buffer_size_mb" )
base_args+=( TARGET_FILE_SIZE_BASE_MB="$target_file_size_base_mb" )
base_args+=( MAX_BYTES_FOR_LEVEL_BASE_MB="$max_bytes_for_level_base_mb" )
base_args+=( MAX_BACKGROUND_JOBS="$max_background_jobs" )
base_args+=( STATS_INTERVAL_SECONDS="$stats_interval_seconds" )
base_args+=( CACHE_INDEX_AND_FILTER_BLOCKS="$cache_index_and_filter_blocks" )
base_args+=( COMPACTION_STYLE="$compaction_style" )
base_args+=( BYTES_PER_SYNC="$bytes_per_sync" )
if [ -n "$USE_O_DIRECT" ]; then
base_args+=( USE_O_DIRECT=1 )
fi
if [ -n "$NUMA" ]; then
base_args+=( NUMACTL=1 )
fi
if [ -n "$CACHE_SIZE_MB" ]; then
cacheb=$(( CACHE_SIZE_MB * M ))
base_args+=( CACHE_SIZE="$cacheb" )
fi
if [ "$compaction_style" == "leveled" ]; then
base_args+=( LEVEL0_FILE_NUM_COMPACTION_TRIGGER="$level0_file_num_compaction_trigger" )
base_args+=( LEVEL0_SLOWDOWN_WRITES_TRIGGER="$level0_slowdown_writes_trigger" )
base_args+=( LEVEL0_STOP_WRITES_TRIGGER="$level0_stop_writes_trigger" )
base_args+=( PER_LEVEL_FANOUT="$per_level_fanout" )
elif [ "$compaction_style" == "universal" ]; then
base_args+=( LEVEL0_FILE_NUM_COMPACTION_TRIGGER="$level0_file_num_compaction_trigger" )
base_args+=( LEVEL0_SLOWDOWN_WRITES_TRIGGER="$level0_slowdown_writes_trigger" )
base_args+=( LEVEL0_STOP_WRITES_TRIGGER="$level0_stop_writes_trigger" )
base_args+=( UNIVERSAL_MIN_MERGE_WIDTH="$universal_min_merge_width" )
base_args+=( UNIVERSAL_MAX_MERGE_WIDTH="$universal_max_merge_width" )
base_args+=( UNIVERSAL_SIZE_RATIO="$universal_size_ratio" )
base_args+=( UNIVERSAL_MAX_SIZE_AMP="$universal_max_size_amp" )
if [ -n "$UNIVERSAL_ALLOW_TRIVIAL_MOVE" ]; then
base_args+=( UNIVERSAL_ALLOW_TRIVIAL_MOVE=1 )
fi
else
# Inherit settings for leveled because index uses leveled LSM
base_args+=( LEVEL0_FILE_NUM_COMPACTION_TRIGGER="$level0_file_num_compaction_trigger" )
base_args+=( LEVEL0_SLOWDOWN_WRITES_TRIGGER="$level0_slowdown_writes_trigger" )
base_args+=( LEVEL0_STOP_WRITES_TRIGGER="$level0_stop_writes_trigger" )
base_args+=( PER_LEVEL_FANOUT="$per_level_fanout" )
# Then add BlobDB specific settings
base_args+=( MIN_BLOB_SIZE="$min_blob_size" )
base_args+=( BLOB_FILE_SIZE="$blob_file_size" )
base_args+=( BLOB_COMPRESSION_TYPE="$blob_compression_type" )
base_args+=( BLOB_GC_AGE_CUTOFF="$blob_gc_age_cutoff" )
base_args+=( BLOB_GC_FORCE_THRESHOLD="$blob_gc_force_threshold" )
fi
function usage {
echo "usage: benchmark_compare.sh db_dir output_dir version+"
echo -e "\tdb_dir\t\tcreate RocksDB database in this directory"
echo -e "\toutput_dir\twrite output from performance tests in this directory"
echo -e "\tversion+\tspace separated sequence of RocksDB versions to test."
echo -e "\nThis expects that db_bench.\$version exists in \$PWD for each version in the sequence."
echo -e "An example value for version+ is 6.23.0 6.24.0"
echo ""
echo -e "Environment variables for options"
echo -e "\tNUM_KEYS\t\t\tnumber of keys to load"
echo -e "\tKEY_SIZE\t\t\tsize of key"
echo -e "\tVALUE_SIZE\t\t\tsize of value"
echo -e "\tCACHE_SIZE_MB\t\t\tsize of block cache in MB"
echo -e "\tDURATION_RW\t\t\tnumber of seconds for which each test runs, except for read-only tests"
echo -e "\tDURATION_RO\t\t\tnumber of seconds for which each read-only test runs"
echo -e "\tMB_WRITE_PER_SEC\t\trate limit for writer that runs concurrent with queries for some tests"
echo -e "\tNUM_THREADS\t\t\tnumber of user threads"
echo -e "\tCOMPRESSION_TYPE\t\tcompression type (zstd, lz4, none, etc)"
echo -e "\tMIN_LEVEL_TO_COMPRESS\t\tmin_level_to_compress for leveled"
echo -e "\tWRITE_BUFFER_SIZE_MB\t\tsize of write buffer in MB"
echo -e "\tTARGET_FILE_SIZE_BASE_MB\tvalue for target_file_size_base in MB"
echo -e "\tMAX_BYTES_FOR_LEVEL_BASE_MB\tvalue for max_bytes_for_level_base in MB"
echo -e "\tMAX_BACKGROUND_JOBS\t\tvalue for max_background_jobs"
echo -e "\tCACHE_INDEX_AND_FILTER_BLOCKS\tvalue for cache_index_and_filter_blocks"
echo -e "\tUSE_O_DIRECT\t\t\tUse O_DIRECT for user reads and compaction"
echo -e "\tBYTES_PER_SYNC\t\t\tValue for bytes_per_sync"
echo -e "\tSTATS_INTERVAL_SECONDS\t\tvalue for stats_interval_seconds"
echo -e "\tSUBCOMPACTIONS\t\t\tvalue for subcompactions"
echo -e "\tCOMPACTION_STYLE\t\tCompaction style to use, one of: leveled, universal, blob"
echo -e "\tCI_TESTS_ONLY\t\tRun a subset of tests tailored to a CI regression job, one of: true, false (default)"
echo ""
echo -e "\tOptions specific to leveled compaction:"
echo -e "\t\tLEVEL0_FILE_NUM_COMPACTION_TRIGGER\tvalue for level0_file_num_compaction_trigger"
echo -e "\t\tLEVEL0_SLOWDOWN_WRITES_TRIGGER\t\tvalue for level0_slowdown_writes_trigger"
echo -e "\t\tLEVEL0_STOP_WRITES_TRIGGER\t\tvalue for level0_stop_writes_trigger"
echo -e "\t\tPER_LEVEL_FANOUT\t\t\tvalue for max_bytes_for_level_multiplier"
echo ""
echo -e "\tOptions specific to universal compaction:"
echo -e "\t\tSee LEVEL0_*_TRIGGER above"
echo -e "\t\tUNIVERSAL_MIN_MERGE_WIDTH\t\tvalue of min_merge_width option for universal"
echo -e "\t\tUNIVERSAL_MAX_MERGE_WIDTH\t\tvalue of min_merge_width option for universal"
echo -e "\t\tUNIVERSAL_SIZE_RATIO\t\t\tvalue of size_ratio option for universal"
echo -e "\t\tUNIVERSAL_MAX_SIZE_AMP\t\t\tmax_size_amplification_percent for universal"
echo -e "\t\tUNIVERSAL_ALLOW_TRIVIAL_MOVE\t\tSet allow_trivial_move to true for universal, default is false"
echo -e "\t\tUNIVERSAL_COMPRESSION_SIZE_PERCENT\tpercentage of LSM tree that should be compressed"
echo ""
echo -e "\tOptions for integrated BlobDB:"
echo -e "\t\tMIN_BLOB_SIZE\t\t\t\tvalue for min_blob_size"
echo -e "\t\tBLOB_FILE_SIZE\t\t\t\tvalue for blob_file_size"
echo -e "\t\tBLOB_COMPRESSION_TYPE\t\t\tvalue for blob_compression_type"
echo -e "\t\tBLOB_GC_AGE_CUTOFF\t\t\tvalue for blog_garbage_collection_age_cutoff"
echo -e "\t\tBLOB_GC_FORCE_THRESHOLD\t\t\tvalue for blog_garbage_collection_force_threshold"
}
function dump_env {
echo "Base args" > "$odir"/args
echo "${base_args[@]}" | tr ' ' '\n' >> "$odir"/args
echo -e "\nOther args" >> "$odir"/args
echo -e "dbdir\t$dbdir" >> "$odir"/args
echo -e "duration_rw\t$duration_rw" >> "$odir"/args
echo -e "duration_ro\t$duration_ro" >> "$odir"/args
echo -e "per_level_fanout\t$per_level_fanout" >> "$odir"/args
echo -e "\nargs_load:" >> "$odir"/args
echo "${args_load[@]}" | tr ' ' '\n' >> "$odir"/args
echo -e "\nargs_nolim:" >> "$odir"/args
echo "${args_nolim[@]}" | tr ' ' '\n' >> "$odir"/args
echo -e "\nargs_lim:" >> "$odir"/args
echo "${args_lim[@]}" | tr ' ' '\n' >> "$odir"/args
}
if [ $# -lt 3 ]; then
usage
echo
echo "Need at least 3 arguments"
exit 1
fi
shift 2
mkdir -p "$odir"
echo Test versions: "$@"
echo Test versions: "$@" >> "$odir"/args
for v in "$@" ; do
my_odir="$odir"/"$v"
if [ -d "$my_odir" ]; then
echo Exiting because the output directory exists: "$my_odir"
exit 1
fi
args_common=("${base_args[@]}")
args_common+=( OUTPUT_DIR="$my_odir" DB_DIR="$dbdir" WAL_DIR="$dbdir" DB_BENCH_NO_SYNC=1 )
if [ "$compaction_style" == "leveled" ]; then
args_common+=( MIN_LEVEL_TO_COMPRESS="$min_level_to_compress" )
elif [ "$compaction_style" == "universal" ]; then
args_common+=( UNIVERSAL=1 COMPRESSION_SIZE_PERCENT="$universal_compression_size_percent" )
else
args_common+=( MIN_LEVEL_TO_COMPRESS="$min_level_to_compress" )
fi
args_load=("${args_common[@]}")
args_nolim=("${args_common[@]}")
args_lim=("${args_nolim[@]}")
args_lim+=( MB_WRITE_PER_SEC="$mb_write_per_sec" )
dump_env
echo Run benchmark for "$v" at "$( date )" with results at "$my_odir"
rm -f db_bench
echo ln -s db_bench."$v" db_bench
ln -s db_bench."$v" db_bench
find "$dbdir" -type f -exec rm \{\} \;
# Load in key order
echo env "${args_load[@]}" bash ./benchmark.sh fillseq_disable_wal
env -i "${args_load[@]}" bash ./benchmark.sh fillseq_disable_wal
# Read-only tests. The LSM tree shape is in a deterministic state if trivial move
# was used during the load.
# Add revrange with a fixed duration and hardwired number of keys and threads to give
# compaction debt leftover from fillseq a chance at being removed. Not using waitforcompaction
# here because it isn't supported on older db_bench versions.
env -i "${args_nolim[@]}" DURATION=300 NUM_KEYS=100 NUM_THREADS=1 bash ./benchmark.sh revrange
env -i "${args_nolim[@]}" DURATION="$duration_ro" bash ./benchmark.sh readrandom
# Skipped for CI - a single essentail readrandom is enough to set up for other tests
if [ "$ci_tests_only" != "true" ]; then
env -i "${args_nolim[@]}" DURATION="$duration_ro" bash ./benchmark.sh fwdrange
env -i "${args_lim[@]}" DURATION="$duration_ro" bash ./benchmark.sh multireadrandom --multiread_batched
else
echo "CI_TESTS_ONLY is set, skipping optional read steps."
fi
# Write 10% of the keys. The goal is to randomize keys prior to Lmax
p10=$( echo "$num_keys" "$num_threads" | awk '{ printf "%.0f", $1 / $2 / 10.0 }' )
env -i "${args_nolim[@]}" WRITES="$p10" bash ./benchmark.sh overwritesome
if [ "$compaction_style" == "leveled" ]; then
# These are not supported by older versions
# Flush memtable & L0 to get LSM tree into deterministic state
env -i "${args_nolim[@]}" bash ./benchmark.sh flush_mt_l0
elif [ "$compaction_style" == "universal" ]; then
# For universal don't compact L0 as can have too many sorted runs
# waitforcompaction can hang, see https://github.com/facebook/rocksdb/issues/9275
# While this is disabled the test that follows will have more variance from compaction debt.
# env -i "${args_nolim[@]}" bash ./benchmark.sh waitforcompaction
echo TODO enable when waitforcompaction hang is fixed
else
# These are not supported by older versions
# Flush memtable & L0 to get LSM tree into deterministic state
env -i "${args_nolim[@]}" bash ./benchmark.sh flush_mt_l0
fi
# Read-mostly tests with a rate-limited writer
env -i "${args_lim[@]}" DURATION="$duration_rw" bash ./benchmark.sh revrangewhilewriting
env -i "${args_lim[@]}" DURATION="$duration_rw" bash ./benchmark.sh fwdrangewhilewriting
env -i "${args_lim[@]}" DURATION="$duration_rw" bash ./benchmark.sh readwhilewriting
# Write-only tests
# This creates much compaction debt which will be a problem for tests added after it.
# Also, the compaction stats measured at test end can underestimate write-amp depending
# on how much compaction debt is allowed.
if [ "$compaction_style" == "leveled" ] && ./db_bench --benchmarks=waitforcompaction ; then
# Use waitforcompaction to get more accurate write-amp measurement
env -i "${args_nolim[@]}" DURATION="$duration_rw" bash ./benchmark.sh overwriteandwait
else
# waitforcompaction hangs with universal, see https://github.com/facebook/rocksdb/issues/9275
env -i "${args_nolim[@]}" DURATION="$duration_rw" bash ./benchmark.sh overwrite
fi
cp "$dbdir"/LOG* "$my_odir"
gzip -9 "$my_odir"/LOG*
done
# Generate a file that groups lines from the same test for all versions
basev=$1
nlines=$( awk '/^ops_sec/,/END/' "$odir"/"$basev"/report.tsv | grep -v ops_sec | wc -l )
hline=$( awk '/^ops_sec/ { print NR }' "$odir"/"$basev"/report.tsv )
sline=$(( hline + 1 ))
eline=$(( sline + nlines - 1 ))
sum_file="$odir"/summary.tsv
for v in "$@" ; do
echo "$odir"/"$v"/report.tsv
done >> "$sum_file"
echo >> "$sum_file"
for x in $( seq "$sline" "$eline" ); do
awk '{ if (NR == lno) { print $0 } }' lno="$hline" "$odir"/"$basev"/report.tsv >> "$sum_file"
for v in "$@" ; do
r="$odir"/"$v"/report.tsv
awk '{ if (NR == lno) { print $0 } }' lno="$x" "$r" >> "$sum_file"
done
echo >> "$sum_file"
done
|