4 files changed, 1807 insertions, 0 deletions
diff --git a/src/rocksdb/microbench/CMakeLists.txt b/src/rocksdb/microbench/CMakeLists.txt
new file mode 100644
index 000000000..483e97973
--- /dev/null
+++ b/src/rocksdb/microbench/CMakeLists.txt
@@ -0,0 +1,17 @@
+find_package(benchmark REQUIRED)
+find_package(Threads REQUIRED)
+
+file(GLOB_RECURSE ALL_BENCH_CPP *.cc)
+foreach(ONE_BENCH_CPP ${ALL_BENCH_CPP})
+  get_filename_component(TARGET_NAME ${ONE_BENCH_CPP} NAME_WE)
+  add_executable(${TARGET_NAME} ${ONE_BENCH_CPP})
+  target_link_libraries(${TARGET_NAME} ${ROCKSDB_LIB} benchmark::benchmark
+          ${CMAKE_THREAD_LIBS_INIT})
+  # run benchmark like a test, if added, the benchmark tests could be run by `ctest -R Bench_`
+  # add_test(Bench_${TARGET_NAME} ${TARGET_NAME})
+  list(APPEND ALL_BENCH_TARGETS ${TARGET_NAME})
+endforeach()
+add_custom_target(microbench DEPENDS ${ALL_BENCH_TARGETS})
+add_custom_target(run_microbench
+        COMMAND for t in ${ALL_BENCH_TARGETS}\; do \.\/$$t \|\| exit 1\; done
+        DEPENDS ${ALL_BENCH_TARGETS})
diff --git a/src/rocksdb/microbench/README.md b/src/rocksdb/microbench/README.md
new file mode 100644
index 000000000..290ca58d7
--- /dev/null
+++ b/src/rocksdb/microbench/README.md
@@ -0,0 +1,60 @@
+# RocksDB Micro-Benchmark
+
+## Overview
+
+RocksDB micro-benchmark is a set of tests for benchmarking a single component or simple DB operations. The test artificially generates input data and executes the same operation with it to collect and report performance metrics. As it's focusing on testing a single, well-defined operation, the result is more precise and reproducible, which also has its limitation of not representing a real production use case. The test author needs to carefully design the microbench to represent its true purpose.
+
+The tests are based on [Google Benchmark](https://github.com/google/benchmark) library, which provides a standard framework for writing benchmarks.
+
+## How to Run
+### Prerequisite
+Install the [Google Benchmark](https://github.com/google/benchmark) version `1.6.0` or above.
+
+*Note: Google Benchmark `1.6.x` is incompatible with previous versions like `1.5.x`, please make sure you're using the newer version.*
+
+### Build and Run
+With `Makefile`:
+```bash
+$ DEBUG_LEVEL=0 make run_microbench
+```
+Or with cmake:
+```bash
+$ mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_BENCHMARK
+$ make run_microbench
+```
+
+*Note: Please run the benchmark code in release build.*
+### Run Single Test
+Example:
+```bash
+$ make db_basic_bench
+$ ./db_basic_bench --benchmark_filter=<TEST_NAME>
+```
+
+## Best Practices
+#### * Use the Same Test Directory Setting as Unittest
+Most of the Micro-benchmark tests use the same test directory setup as unittest, so it could be overridden by:
+```bash
+$ TEST_TMPDIR=/mydata/tmp/ ./db_basic_bench --benchmark_filter=<TEST_NAME>
+```
+Please also follow that when designing new tests.
+
+#### * Avoid Using Debug API
+Even though micro-benchmark is a test, avoid using internal Debug API like TEST_WaitForRun() which is designed for unittest. As benchmark tests are designed for release build, don't use any of that.
+
+#### * Pay Attention to Local Optimization
+As a micro-benchmark is focusing on a single component or area, make sure it is a key part for impacting the overall application performance.
+
+The compiler might be able to optimize the code that not the same way as the whole application, and if the test data input is simple and small, it may be able to all cached in CPU memory, which is leading to a wrong metric. Take these into consideration when designing the tests.
+
+#### * Names of user-defined counters/metrics has to be `[A-Za-z0-9_]`
+It's a restriction of the metrics collecting and reporting system RocksDB is using internally. It will also help integrate with more systems.
+
+#### * Minimize the Metrics Variation
+Try reducing the test result variation, one way to check that is running the test multiple times and check the CV (Coefficient of Variation) reported by gbenchmark.
+```bash
+$ ./db_basic_bench --benchmark_filter=<TEST_NAME> --benchmark_repetitions=10
+...
+<TEST_NAME>_cv    3.2%
+```
+RocksDB has background compaction jobs which may cause the test result to vary a lot. If the micro-benchmark is not purposely testing the operation while compaction is in progress, it should wait for the compaction to finish (`db_impl->WaitForCompact()`) or disable auto-compaction.
diff --git a/src/rocksdb/microbench/db_basic_bench.cc b/src/rocksdb/microbench/db_basic_bench.cc
new file mode 100644
index 000000000..6c70ad21d
--- /dev/null
+++ b/src/rocksdb/microbench/db_basic_bench.cc
@@ -0,0 +1,1575 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifndef OS_WIN
+#include <unistd.h>
+#endif  // ! OS_WIN
+
+#include "benchmark/benchmark.h"
+#include "db/db_impl/db_impl.h"
+#include "rocksdb/db.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/options.h"
+#include "table/block_based/block.h"
+#include "table/block_based/block_builder.h"
+#include "util/random.h"
+#include "utilities/merge_operators.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class KeyGenerator {
+ public:
+  // Generate next key
+  // buff: the caller needs to make sure there's enough space for generated key
+  // offset: to control the group of the key, 0 means normal key, 1 means
+  // non-existing key, 2 is reserved prefix_only: only return a prefix
+  Slice Next(char* buff, int8_t offset = 0, bool prefix_only = false) {
+    assert(max_key_ < std::numeric_limits<uint32_t>::max() /
+                          MULTIPLIER);  // TODO: add large key support
+
+    uint32_t k;
+    if (is_sequential_) {
+      assert(next_sequential_key_ < max_key_);
+      k = (next_sequential_key_ % max_key_) * MULTIPLIER + offset;
+      if (next_sequential_key_ + 1 == max_key_) {
+        next_sequential_key_ = 0;
+      } else {
+        next_sequential_key_++;
+      }
+    } else {
+      k = (rnd_->Next() % max_key_) * MULTIPLIER + offset;
+    }
+    // TODO: make sure the buff is large enough
+    memset(buff, 0, key_size_);
+    if (prefix_num_ > 0) {
+      uint32_t prefix = (k % prefix_num_) * MULTIPLIER + offset;
+      Encode(buff, prefix);
+      if (prefix_only) {
+        return {buff, prefix_size_};
+      }
+    }
+    Encode(buff + prefix_size_, k);
+    return {buff, key_size_};
+  }
+
+  // use internal buffer for generated key, make sure there's only one caller in
+  // single thread
+  Slice Next() { return Next(buff_); }
+
+  // user internal buffer for generated prefix
+  Slice NextPrefix() {
+    assert(prefix_num_ > 0);
+    return Next(buff_, 0, true);
+  }
+
+  // helper function to get non exist key
+  Slice NextNonExist() { return Next(buff_, 1); }
+
+  Slice MaxKey(char* buff) const {
+    memset(buff, 0xff, key_size_);
+    return {buff, key_size_};
+  }
+
+  Slice MinKey(char* buff) const {
+    memset(buff, 0, key_size_);
+    return {buff, key_size_};
+  }
+
+  // max_key: the max key that it could generate
+  // prefix_num: the max prefix number
+  // key_size: in bytes
+  explicit KeyGenerator(Random* rnd, uint64_t max_key = 100 * 1024 * 1024,
+                        size_t prefix_num = 0, size_t key_size = 10) {
+    prefix_num_ = prefix_num;
+    key_size_ = key_size;
+    max_key_ = max_key;
+    rnd_ = rnd;
+    if (prefix_num > 0) {
+      prefix_size_ = 4;  // TODO: support different prefix_size
+    }
+  }
+
+  // generate sequential keys
+  explicit KeyGenerator(uint64_t max_key = 100 * 1024 * 1024,
+                        size_t key_size = 10) {
+    key_size_ = key_size;
+    max_key_ = max_key;
+    rnd_ = nullptr;
+    is_sequential_ = true;
+  }
+
+ private:
+  Random* rnd_;
+  size_t prefix_num_ = 0;
+  size_t prefix_size_ = 0;
+  size_t key_size_;
+  uint64_t max_key_;
+  bool is_sequential_ = false;
+  uint32_t next_sequential_key_ = 0;
+  char buff_[256] = {0};
+  const int MULTIPLIER = 3;
+
+  void static Encode(char* buf, uint32_t value) {
+    if (port::kLittleEndian) {
+      buf[0] = static_cast<char>((value >> 24) & 0xff);
+      buf[1] = static_cast<char>((value >> 16) & 0xff);
+      buf[2] = static_cast<char>((value >> 8) & 0xff);
+      buf[3] = static_cast<char>(value & 0xff);
+    } else {
+      memcpy(buf, &value, sizeof(value));
+    }
+  }
+};
+
+static void SetupDB(benchmark::State& state, Options& options,
+                    std::unique_ptr<DB>* db,
+                    const std::string& test_name = "") {
+  options.create_if_missing = true;
+  auto env = Env::Default();
+  std::string db_path;
+  Status s = env->GetTestDirectory(&db_path);
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+    return;
+  }
+  std::string db_name =
+      db_path + kFilePathSeparator + test_name + std::to_string(getpid());
+  DestroyDB(db_name, options);
+
+  DB* db_ptr = nullptr;
+  s = DB::Open(options, db_name, &db_ptr);
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+    return;
+  }
+  db->reset(db_ptr);
+}
+
+static void TeardownDB(benchmark::State& state, const std::unique_ptr<DB>& db,
+                       const Options& options, KeyGenerator& kg) {
+  char min_buff[256], max_buff[256];
+  const Range r(kg.MinKey(min_buff), kg.MaxKey(max_buff));
+  uint64_t size;
+  Status s = db->GetApproximateSizes(&r, 1, &size);
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+  }
+  state.counters["db_size"] = static_cast<double>(size);
+
+  std::string db_name = db->GetName();
+  s = db->Close();
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+  }
+  DestroyDB(db_name, options);
+}
+
+static void DBOpen(benchmark::State& state) {
+  // create DB
+  std::unique_ptr<DB> db;
+  Options options;
+  SetupDB(state, options, &db, "DBOpen");
+
+  std::string db_name = db->GetName();
+  db->Close();
+
+  options.create_if_missing = false;
+
+  auto rnd = Random(123);
+
+  for (auto _ : state) {
+    {
+      DB* db_ptr = nullptr;
+      Status s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+      db.reset(db_ptr);
+    }
+    state.PauseTiming();
+    auto wo = WriteOptions();
+    Status s;
+    for (int i = 0; i < 2; i++) {
+      for (int j = 0; j < 100; j++) {
+        s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
+        if (!s.ok()) {
+          state.SkipWithError(s.ToString().c_str());
+        }
+      }
+      s = db->Flush(FlushOptions());
+    }
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    s = db->Close();
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    state.ResumeTiming();
+  }
+  DestroyDB(db_name, options);
+}
+
+BENCHMARK(DBOpen)->Iterations(200);  // specify iteration number as the db size
+                                     // is impacted by iteration number
+
+static void DBClose(benchmark::State& state) {
+  // create DB
+  std::unique_ptr<DB> db;
+  Options options;
+  SetupDB(state, options, &db, "DBClose");
+
+  std::string db_name = db->GetName();
+  db->Close();
+
+  options.create_if_missing = false;
+
+  auto rnd = Random(12345);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    {
+      DB* db_ptr = nullptr;
+      Status s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+      db.reset(db_ptr);
+    }
+    auto wo = WriteOptions();
+    Status s;
+    for (int i = 0; i < 2; i++) {
+      for (int j = 0; j < 100; j++) {
+        s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
+        if (!s.ok()) {
+          state.SkipWithError(s.ToString().c_str());
+        }
+      }
+      s = db->Flush(FlushOptions());
+    }
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    state.ResumeTiming();
+    s = db->Close();
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+  DestroyDB(db_name, options);
+}
+
+BENCHMARK(DBClose)->Iterations(200);  // specify iteration number as the db size
+                                      // is impacted by iteration number
+
+static void DBPut(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  bool enable_statistics = state.range(3);
+  bool enable_wal = state.range(4);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db = nullptr;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  options.compaction_style = compaction_style;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBPut");
+  }
+
+  auto wo = WriteOptions();
+  wo.disableWAL = !enable_wal;
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    Slice key = kg.Next();
+    std::string val = rnd.RandomString(static_cast<int>(per_key_size));
+    state.ResumeTiming();
+    Status s = db->Put(wo, key, val);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    Status s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    if (enable_statistics) {
+      HistogramData histogram_data;
+      options.statistics->histogramData(DB_WRITE, &histogram_data);
+      state.counters["put_mean"] = histogram_data.average * std::milli::den;
+      state.counters["put_p95"] = histogram_data.percentile95 * std::milli::den;
+      state.counters["put_p99"] = histogram_data.percentile99 * std::milli::den;
+    }
+
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void DBPutArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {100l << 30}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        for (bool enable_statistics : {false, true}) {
+          for (bool wal : {false, true}) {
+            b->Args(
+                {comp_style, max_data, per_key_size, enable_statistics, wal});
+          }
+        }
+      }
+    }
+  }
+  b->ArgNames(
+      {"comp_style", "max_data", "per_key_size", "enable_statistics", "wal"});
+}
+
+static const uint64_t DBPutNum = 409600l;
+BENCHMARK(DBPut)->Threads(1)->Iterations(DBPutNum)->Apply(DBPutArguments);
+BENCHMARK(DBPut)->Threads(8)->Iterations(DBPutNum / 8)->Apply(DBPutArguments);
+
+static void ManualCompaction(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  bool enable_statistics = state.range(3);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  options.compaction_style = compaction_style;
+  // No auto compaction
+  options.disable_auto_compactions = true;
+  options.level0_file_num_compaction_trigger = (1 << 30);
+  options.level0_slowdown_writes_trigger = (1 << 30);
+  options.level0_stop_writes_trigger = (1 << 30);
+  options.soft_pending_compaction_bytes_limit = 0;
+  options.hard_pending_compaction_bytes_limit = 0;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "ManualCompaction");
+  }
+
+  auto wo = WriteOptions();
+  wo.disableWAL = true;
+  uint64_t flush_mod = key_num / 4;  // at least generate 4 files for compaction
+  for (uint64_t i = 0; i < key_num; i++) {
+    Status s = db->Put(wo, kg.Next(),
+                       rnd.RandomString(static_cast<int>(per_key_size)));
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    if (i + 1 % flush_mod == 0) {
+      s = db->Flush(FlushOptions());
+    }
+  }
+  FlushOptions fo;
+  Status s = db->Flush(fo);
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+  }
+  std::vector<LiveFileMetaData> files_meta;
+  db->GetLiveFilesMetaData(&files_meta);
+  std::vector<std::string> files_before_compact;
+  files_before_compact.reserve(files_meta.size());
+  for (const LiveFileMetaData& file : files_meta) {
+    files_before_compact.emplace_back(file.name);
+  }
+
+  SetPerfLevel(kEnableTime);
+  get_perf_context()->EnablePerLevelPerfContext();
+  get_perf_context()->Reset();
+  CompactionOptions co;
+  for (auto _ : state) {
+    s = db->CompactFiles(co, files_before_compact, 1);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    if (enable_statistics) {
+      HistogramData histogram_data;
+      options.statistics->histogramData(COMPACTION_TIME, &histogram_data);
+      state.counters["comp_time"] = histogram_data.average;
+      options.statistics->histogramData(COMPACTION_CPU_TIME, &histogram_data);
+      state.counters["comp_cpu_time"] = histogram_data.average;
+      options.statistics->histogramData(COMPACTION_OUTFILE_SYNC_MICROS,
+                                        &histogram_data);
+      state.counters["comp_outfile_sync"] = histogram_data.average;
+
+      state.counters["comp_read"] = static_cast<double>(
+          options.statistics->getTickerCount(COMPACT_READ_BYTES));
+      state.counters["comp_write"] = static_cast<double>(
+          options.statistics->getTickerCount(COMPACT_WRITE_BYTES));
+
+      state.counters["user_key_comparison_count"] =
+          static_cast<double>(get_perf_context()->user_key_comparison_count);
+      state.counters["block_read_count"] =
+          static_cast<double>(get_perf_context()->block_read_count);
+      state.counters["block_read_time"] =
+          static_cast<double>(get_perf_context()->block_read_time);
+      state.counters["block_checksum_time"] =
+          static_cast<double>(get_perf_context()->block_checksum_time);
+      state.counters["new_table_block_iter_nanos"] =
+          static_cast<double>(get_perf_context()->new_table_block_iter_nanos);
+      state.counters["new_table_iterator_nanos"] =
+          static_cast<double>(get_perf_context()->new_table_iterator_nanos);
+      state.counters["find_table_nanos"] =
+          static_cast<double>(get_perf_context()->find_table_nanos);
+    }
+
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void ManualCompactionArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal}) {
+    for (int64_t max_data : {32l << 20, 128l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        for (bool enable_statistics : {false, true}) {
+          b->Args({comp_style, max_data, per_key_size, enable_statistics});
+        }
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics"});
+}
+
+BENCHMARK(ManualCompaction)->Iterations(1)->Apply(ManualCompactionArguments);
+
+static void ManualFlush(benchmark::State& state) {
+  uint64_t key_num = state.range(0);
+  uint64_t per_key_size = state.range(1);
+  bool enable_statistics = true;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  options.disable_auto_compactions = true;
+  options.level0_file_num_compaction_trigger = (1 << 30);
+  options.level0_slowdown_writes_trigger = (1 << 30);
+  options.level0_stop_writes_trigger = (1 << 30);
+  options.soft_pending_compaction_bytes_limit = 0;
+  options.hard_pending_compaction_bytes_limit = 0;
+  options.write_buffer_size = 2l << 30;  // 2G to avoid auto flush
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "ManualFlush");
+  }
+
+  auto wo = WriteOptions();
+  for (auto _ : state) {
+    state.PauseTiming();
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+    }
+    FlushOptions fo;
+    state.ResumeTiming();
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    Status s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    if (enable_statistics) {
+      HistogramData histogram_data;
+      options.statistics->histogramData(FLUSH_TIME, &histogram_data);
+      state.counters["flush_time"] = histogram_data.average;
+      state.counters["flush_write_bytes"] = static_cast<double>(
+          options.statistics->getTickerCount(FLUSH_WRITE_BYTES));
+    }
+
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void ManualFlushArguments(benchmark::internal::Benchmark* b) {
+  for (int64_t key_num : {1l << 10, 8l << 10, 64l << 10}) {
+    for (int64_t per_key_size : {256, 1024}) {
+      b->Args({key_num, per_key_size});
+    }
+  }
+  b->ArgNames({"key_num", "per_key_size"});
+}
+
+BENCHMARK(ManualFlush)->Iterations(1)->Apply(ManualFlushArguments);
+
+static void DBGet(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  bool enable_statistics = state.range(3);
+  bool negative_query = state.range(4);
+  bool enable_filter = state.range(5);
+  bool mmap = state.range(6);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  if (mmap) {
+    options.allow_mmap_reads = true;
+    options.compression = kNoCompression;
+  }
+  options.compaction_style = compaction_style;
+
+  BlockBasedTableOptions table_options;
+  if (enable_filter) {
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  }
+  if (mmap) {
+    table_options.no_block_cache = true;
+    table_options.block_restart_interval = 1;
+  }
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBGet");
+
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    FlushOptions fo;
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+  }
+
+  auto ro = ReadOptions();
+  if (mmap) {
+    ro.verify_checksums = false;
+  }
+  size_t not_found = 0;
+  if (negative_query) {
+    for (auto _ : state) {
+      std::string val;
+      Status s = db->Get(ro, kg.NextNonExist(), &val);
+      if (s.IsNotFound()) {
+        not_found++;
+      }
+    }
+  } else {
+    for (auto _ : state) {
+      std::string val;
+      Status s = db->Get(ro, kg.Next(), &val);
+      if (s.IsNotFound()) {
+        not_found++;
+      }
+    }
+  }
+
+  state.counters["neg_qu_pct"] = benchmark::Counter(
+      static_cast<double>(not_found * 100), benchmark::Counter::kAvgIterations);
+
+  if (state.thread_index() == 0) {
+    if (enable_statistics) {
+      HistogramData histogram_data;
+      options.statistics->histogramData(DB_GET, &histogram_data);
+      state.counters["get_mean"] = histogram_data.average * std::milli::den;
+      state.counters["get_p95"] = histogram_data.percentile95 * std::milli::den;
+      state.counters["get_p99"] = histogram_data.percentile99 * std::milli::den;
+    }
+
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void DBGetArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {128l << 20, 512l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        for (bool enable_statistics : {false, true}) {
+          for (bool negative_query : {false, true}) {
+            for (bool enable_filter : {false, true}) {
+              for (bool mmap : {false, true}) {
+                b->Args({comp_style, max_data, per_key_size, enable_statistics,
+                         negative_query, enable_filter, mmap});
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
+               "negative_query", "enable_filter", "mmap"});
+}
+
+static constexpr uint64_t kDBGetNum = 1l << 20;
+BENCHMARK(DBGet)->Threads(1)->Iterations(kDBGetNum)->Apply(DBGetArguments);
+BENCHMARK(DBGet)->Threads(8)->Iterations(kDBGetNum / 8)->Apply(DBGetArguments);
+
+static void SimpleGetWithPerfContext(benchmark::State& state) {
+  // setup DB
+  static std::unique_ptr<DB> db;
+  std::string db_name;
+  Options options;
+  options.create_if_missing = true;
+  options.arena_block_size = 8 << 20;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, 1024);
+
+  if (state.thread_index() == 0) {
+    auto env = Env::Default();
+    std::string db_path;
+    Status s = env->GetTestDirectory(&db_path);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    db_name = db_path + "/simple_get_" + std::to_string(getpid());
+    DestroyDB(db_name, options);
+
+    {
+      DB* db_ptr = nullptr;
+      s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+        return;
+      }
+      db.reset(db_ptr);
+    }
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < 1024; i++) {
+      s = db->Put(wo, kg.Next(), rnd.RandomString(1024));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    FlushOptions fo;
+    s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  auto ro = ReadOptions();
+  size_t not_found = 0;
+  uint64_t user_key_comparison_count = 0;
+  uint64_t block_read_time = 0;
+  uint64_t block_checksum_time = 0;
+  uint64_t get_snapshot_time = 0;
+  uint64_t get_post_process_time = 0;
+  uint64_t get_from_output_files_time = 0;
+  uint64_t new_table_block_iter_nanos = 0;
+  uint64_t block_seek_nanos = 0;
+  uint64_t get_cpu_nanos = 0;
+  uint64_t get_from_table_nanos = 0;
+  SetPerfLevel(kEnableTime);
+  get_perf_context()->EnablePerLevelPerfContext();
+  for (auto _ : state) {
+    std::string val;
+    get_perf_context()->Reset();
+    Status s = db->Get(ro, kg.NextNonExist(), &val);
+    if (s.IsNotFound()) {
+      not_found++;
+    }
+    user_key_comparison_count += get_perf_context()->user_key_comparison_count;
+    block_read_time += get_perf_context()->block_read_time;
+    block_checksum_time += get_perf_context()->block_checksum_time;
+    get_snapshot_time += get_perf_context()->get_snapshot_time;
+    get_post_process_time += get_perf_context()->get_post_process_time;
+    get_from_output_files_time +=
+        get_perf_context()->get_from_output_files_time;
+    new_table_block_iter_nanos +=
+        get_perf_context()->new_table_block_iter_nanos;
+    block_seek_nanos += get_perf_context()->block_seek_nanos;
+    get_cpu_nanos += get_perf_context()->get_cpu_nanos;
+    get_from_table_nanos +=
+        (*(get_perf_context()->level_to_perf_context))[0].get_from_table_nanos;
+  }
+
+  state.counters["neg_qu_pct"] = benchmark::Counter(
+      static_cast<double>(not_found * 100), benchmark::Counter::kAvgIterations);
+  state.counters["user_key_comparison_count"] =
+      benchmark::Counter(static_cast<double>(user_key_comparison_count),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["block_read_time"] = benchmark::Counter(
+      static_cast<double>(block_read_time), benchmark::Counter::kAvgIterations);
+  state.counters["block_checksum_time"] =
+      benchmark::Counter(static_cast<double>(block_checksum_time),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["get_snapshot_time"] =
+      benchmark::Counter(static_cast<double>(get_snapshot_time),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["get_post_process_time"] =
+      benchmark::Counter(static_cast<double>(get_post_process_time),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["get_from_output_files_time"] =
+      benchmark::Counter(static_cast<double>(get_from_output_files_time),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["new_table_block_iter_nanos"] =
+      benchmark::Counter(static_cast<double>(new_table_block_iter_nanos),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["block_seek_nanos"] =
+      benchmark::Counter(static_cast<double>(block_seek_nanos),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["get_cpu_nanos"] = benchmark::Counter(
+      static_cast<double>(get_cpu_nanos), benchmark::Counter::kAvgIterations);
+  state.counters["get_from_table_nanos"] =
+      benchmark::Counter(static_cast<double>(get_from_table_nanos),
+                         benchmark::Counter::kAvgIterations);
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+BENCHMARK(SimpleGetWithPerfContext)->Iterations(1000000);
+
+static void DBGetMergeOperandsInMemtable(benchmark::State& state) {
+  const uint64_t kDataLen = 16 << 20;  // 16MB
+  const uint64_t kValueLen = 64;
+  const uint64_t kNumEntries = kDataLen / kValueLen;
+  const uint64_t kNumEntriesPerKey = state.range(0);
+  const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+
+  Options options;
+  options.merge_operator = MergeOperators::CreateStringAppendOperator();
+  // Make memtable large enough that automatic flush will not be triggered.
+  options.write_buffer_size = 2 * kDataLen;
+
+  KeyGenerator sequential_key_gen(kNumKeys);
+  auto rnd = Random(301 + state.thread_index());
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBGetMergeOperandsInMemtable");
+
+    // load db
+    auto write_opts = WriteOptions();
+    write_opts.disableWAL = true;
+    for (uint64_t i = 0; i < kNumEntries; i++) {
+      Status s = db->Merge(write_opts, sequential_key_gen.Next(),
+                           rnd.RandomString(static_cast<int>(kValueLen)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+  }
+
+  KeyGenerator random_key_gen(kNumKeys);
+  std::vector<PinnableSlice> value_operands;
+  value_operands.resize(kNumEntriesPerKey);
+  GetMergeOperandsOptions get_merge_ops_opts;
+  get_merge_ops_opts.expected_max_number_of_operands =
+      static_cast<int>(kNumEntriesPerKey);
+  for (auto _ : state) {
+    int num_value_operands = 0;
+    Status s = db->GetMergeOperands(
+        ReadOptions(), db->DefaultColumnFamily(), random_key_gen.Next(),
+        value_operands.data(), &get_merge_ops_opts, &num_value_operands);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
+      state.SkipWithError("Unexpected number of merge operands found for key");
+    }
+    for (auto& value_operand : value_operands) {
+      value_operand.Reset();
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, random_key_gen);
+  }
+}
+
+static void DBGetMergeOperandsInSstFile(benchmark::State& state) {
+  const uint64_t kDataLen = 16 << 20;  // 16MB
+  const uint64_t kValueLen = 64;
+  const uint64_t kNumEntries = kDataLen / kValueLen;
+  const uint64_t kNumEntriesPerKey = state.range(0);
+  const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
+  const bool kMmap = state.range(1);
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+
+  BlockBasedTableOptions table_options;
+  if (kMmap) {
+    table_options.no_block_cache = true;
+  } else {
+    // Make block cache large enough that eviction will not be triggered.
+    table_options.block_cache = NewLRUCache(2 * kDataLen);
+  }
+
+  Options options;
+  if (kMmap) {
+    options.allow_mmap_reads = true;
+  }
+  options.compression = kNoCompression;
+  options.merge_operator = MergeOperators::CreateStringAppendOperator();
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  // Make memtable large enough that automatic flush will not be triggered.
+  options.write_buffer_size = 2 * kDataLen;
+
+  KeyGenerator sequential_key_gen(kNumKeys);
+  auto rnd = Random(301 + state.thread_index());
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBGetMergeOperandsInBlockCache");
+
+    // load db
+    //
+    // Take a snapshot after each cycle of merges to ensure flush cannot
+    // merge any entries.
+    std::vector<const Snapshot*> snapshots;
+    snapshots.resize(kNumEntriesPerKey);
+    auto write_opts = WriteOptions();
+    write_opts.disableWAL = true;
+    for (uint64_t i = 0; i < kNumEntriesPerKey; i++) {
+      for (uint64_t j = 0; j < kNumKeys; j++) {
+        Status s = db->Merge(write_opts, sequential_key_gen.Next(),
+                             rnd.RandomString(static_cast<int>(kValueLen)));
+        if (!s.ok()) {
+          state.SkipWithError(s.ToString().c_str());
+        }
+      }
+      snapshots[i] = db->GetSnapshot();
+    }
+
+    // Flush to an L0 file; read back to prime the cache/mapped memory.
+    db->Flush(FlushOptions());
+    for (uint64_t i = 0; i < kNumKeys; ++i) {
+      std::string value;
+      Status s = db->Get(ReadOptions(), sequential_key_gen.Next(), &value);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    if (state.thread_index() == 0) {
+      for (uint64_t i = 0; i < kNumEntriesPerKey; ++i) {
+        db->ReleaseSnapshot(snapshots[i]);
+      }
+    }
+  }
+
+  KeyGenerator random_key_gen(kNumKeys);
+  std::vector<PinnableSlice> value_operands;
+  value_operands.resize(kNumEntriesPerKey);
+  GetMergeOperandsOptions get_merge_ops_opts;
+  get_merge_ops_opts.expected_max_number_of_operands =
+      static_cast<int>(kNumEntriesPerKey);
+  for (auto _ : state) {
+    int num_value_operands = 0;
+    ReadOptions read_opts;
+    read_opts.verify_checksums = false;
+    Status s = db->GetMergeOperands(
+        read_opts, db->DefaultColumnFamily(), random_key_gen.Next(),
+        value_operands.data(), &get_merge_ops_opts, &num_value_operands);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
+      state.SkipWithError("Unexpected number of merge operands found for key");
+    }
+    for (auto& value_operand : value_operands) {
+      value_operand.Reset();
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, random_key_gen);
+  }
+}
+
+static void DBGetMergeOperandsInMemtableArguments(
+    benchmark::internal::Benchmark* b) {
+  for (int entries_per_key : {1, 32, 1024}) {
+    b->Args({entries_per_key});
+  }
+  b->ArgNames({"entries_per_key"});
+}
+
+static void DBGetMergeOperandsInSstFileArguments(
+    benchmark::internal::Benchmark* b) {
+  for (int entries_per_key : {1, 32, 1024}) {
+    for (bool mmap : {false, true}) {
+      b->Args({entries_per_key, mmap});
+    }
+  }
+  b->ArgNames({"entries_per_key", "mmap"});
+}
+
+BENCHMARK(DBGetMergeOperandsInMemtable)
+    ->Threads(1)
+    ->Apply(DBGetMergeOperandsInMemtableArguments);
+BENCHMARK(DBGetMergeOperandsInMemtable)
+    ->Threads(8)
+    ->Apply(DBGetMergeOperandsInMemtableArguments);
+BENCHMARK(DBGetMergeOperandsInSstFile)
+    ->Threads(1)
+    ->Apply(DBGetMergeOperandsInSstFileArguments);
+BENCHMARK(DBGetMergeOperandsInSstFile)
+    ->Threads(8)
+    ->Apply(DBGetMergeOperandsInSstFileArguments);
+
+std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
+                        Random* rnd) {
+  char buf[50];
+  char* p = &buf[0];
+  snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
+  std::string k(p);
+  if (padding_size) {
+    k += rnd->RandomString(padding_size);
+  }
+
+  return k;
+}
+
+void GenerateRandomKVs(std::vector<std::string>* keys,
+                       std::vector<std::string>* values, const int from,
+                       const int len, const int step = 1,
+                       const int padding_size = 0,
+                       const int keys_share_prefix = 1) {
+  Random rnd(302);
+
+  // generate different prefix
+  for (int i = from; i < from + len; i += step) {
+    // generating keys that share the prefix
+    for (int j = 0; j < keys_share_prefix; ++j) {
+      keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
+      // 100 bytes values
+      values->emplace_back(rnd.RandomString(100));
+    }
+  }
+}
+
+// TODO: move it to different files, as it's testing an internal API
+static void DataBlockSeek(benchmark::State& state) {
+  Random rnd(301);
+  Options options = Options();
+
+  BlockBuilder builder(16, true, false,
+                       BlockBasedTableOptions::kDataBlockBinarySearch);
+
+  int num_records = 500;
+  std::vector<std::string> keys;
+  std::vector<std::string> values;
+
+  GenerateRandomKVs(&keys, &values, 0, num_records);
+
+  for (int i = 0; i < num_records; i++) {
+    std::string ukey(keys[i] + "1");
+    InternalKey ikey(ukey, 0, kTypeValue);
+    builder.Add(ikey.Encode().ToString(), values[i]);
+  }
+
+  Slice rawblock = builder.Finish();
+
+  BlockContents contents;
+  contents.data = rawblock;
+  Block reader(std::move(contents));
+
+  SetPerfLevel(kEnableTime);
+  uint64_t total = 0;
+  for (auto _ : state) {
+    DataBlockIter* iter = reader.NewDataIterator(options.comparator,
+                                                 kDisableGlobalSequenceNumber);
+    uint32_t index = rnd.Uniform(static_cast<int>(num_records));
+    std::string ukey(keys[index] + "1");
+    InternalKey ikey(ukey, 0, kTypeValue);
+    get_perf_context()->Reset();
+    bool may_exist = iter->SeekForGet(ikey.Encode().ToString());
+    if (!may_exist) {
+      state.SkipWithError("key not found");
+    }
+    total += get_perf_context()->block_seek_nanos;
+    delete iter;
+  }
+  state.counters["seek_ns"] = benchmark::Counter(
+      static_cast<double>(total), benchmark::Counter::kAvgIterations);
+}
+
+BENCHMARK(DataBlockSeek)->Iterations(1000000);
+
+static void IteratorSeek(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  bool enable_statistics = state.range(3);
+  bool negative_query = state.range(4);
+  bool enable_filter = state.range(5);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  options.compaction_style = compaction_style;
+
+  if (enable_filter) {
+    BlockBasedTableOptions table_options;
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  }
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "IteratorSeek");
+
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    FlushOptions fo;
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<Iterator> iter{nullptr};
+    state.PauseTiming();
+    if (!iter) {
+      iter.reset(db->NewIterator(ReadOptions()));
+    }
+    Slice key = negative_query ? kg.NextNonExist() : kg.Next();
+    if (!iter->status().ok()) {
+      state.SkipWithError(iter->status().ToString().c_str());
+      return;
+    }
+    state.ResumeTiming();
+    iter->Seek(key);
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void IteratorSeekArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {128l << 20, 512l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        for (bool enable_statistics : {false, true}) {
+          for (bool negative_query : {false, true}) {
+            for (bool enable_filter : {false, true}) {
+              b->Args({comp_style, max_data, per_key_size, enable_statistics,
+                       negative_query, enable_filter});
+            }
+          }
+        }
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
+               "negative_query", "enable_filter"});
+}
+
+static constexpr uint64_t kDBSeekNum = 10l << 10;
+BENCHMARK(IteratorSeek)
+    ->Threads(1)
+    ->Iterations(kDBSeekNum)
+    ->Apply(IteratorSeekArguments);
+BENCHMARK(IteratorSeek)
+    ->Threads(8)
+    ->Iterations(kDBSeekNum / 8)
+    ->Apply(IteratorSeekArguments);
+
+static void IteratorNext(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  options.compaction_style = compaction_style;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "IteratorNext");
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    FlushOptions fo;
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<Iterator> iter{nullptr};
+    state.PauseTiming();
+    if (!iter) {
+      iter.reset(db->NewIterator(ReadOptions()));
+    }
+    while (!iter->Valid()) {
+      iter->Seek(kg.Next());
+      if (!iter->status().ok()) {
+        state.SkipWithError(iter->status().ToString().c_str());
+      }
+    }
+    state.ResumeTiming();
+    iter->Next();
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void IteratorNextArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {128l << 20, 512l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        b->Args({comp_style, max_data, per_key_size});
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size"});
+}
+static constexpr uint64_t kIteratorNextNum = 10l << 10;
+BENCHMARK(IteratorNext)
+    ->Iterations(kIteratorNextNum)
+    ->Apply(IteratorNextArguments);
+
+static void IteratorNextWithPerfContext(benchmark::State& state) {
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, 1024);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "IteratorNextWithPerfContext");
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < 1024; i++) {
+      Status s = db->Put(wo, kg.Next(), rnd.RandomString(1024));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    Status s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+    FlushOptions fo;
+    s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  uint64_t user_key_comparison_count = 0;
+  uint64_t internal_key_skipped_count = 0;
+  uint64_t find_next_user_entry_time = 0;
+  uint64_t iter_next_cpu_nanos = 0;
+
+  SetPerfLevel(kEnableTime);
+  get_perf_context()->EnablePerLevelPerfContext();
+
+  for (auto _ : state) {
+    std::unique_ptr<Iterator> iter{nullptr};
+    state.PauseTiming();
+    if (!iter) {
+      iter.reset(db->NewIterator(ReadOptions()));
+    }
+    while (!iter->Valid()) {
+      iter->Seek(kg.Next());
+      if (!iter->status().ok()) {
+        state.SkipWithError(iter->status().ToString().c_str());
+      }
+    }
+    get_perf_context()->Reset();
+    state.ResumeTiming();
+
+    iter->Next();
+    user_key_comparison_count += get_perf_context()->user_key_comparison_count;
+    internal_key_skipped_count +=
+        get_perf_context()->internal_key_skipped_count;
+    find_next_user_entry_time += get_perf_context()->find_next_user_entry_time;
+    iter_next_cpu_nanos += get_perf_context()->iter_next_cpu_nanos;
+  }
+
+  state.counters["user_key_comparison_count"] =
+      benchmark::Counter(static_cast<double>(user_key_comparison_count),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["internal_key_skipped_count"] =
+      benchmark::Counter(static_cast<double>(internal_key_skipped_count),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["find_next_user_entry_time"] =
+      benchmark::Counter(static_cast<double>(find_next_user_entry_time),
+                         benchmark::Counter::kAvgIterations);
+  state.counters["iter_next_cpu_nanos"] =
+      benchmark::Counter(static_cast<double>(iter_next_cpu_nanos),
+                         benchmark::Counter::kAvgIterations);
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+BENCHMARK(IteratorNextWithPerfContext)->Iterations(100000);
+
+static void IteratorPrev(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  std::string db_name;
+  Options options;
+  options.compaction_style = compaction_style;
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "IteratorPrev");
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    FlushOptions fo;
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<Iterator> iter{nullptr};
+    state.PauseTiming();
+    if (!iter) {
+      iter.reset(db->NewIterator(ReadOptions()));
+    }
+    while (!iter->Valid()) {
+      iter->Seek(kg.Next());
+      if (!iter->status().ok()) {
+        state.SkipWithError(iter->status().ToString().c_str());
+      }
+    }
+    state.ResumeTiming();
+    iter->Prev();
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void IteratorPrevArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {128l << 20, 512l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        b->Args({comp_style, max_data, per_key_size});
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size"});
+}
+
+static constexpr uint64_t kIteratorPrevNum = 10l << 10;
+BENCHMARK(IteratorPrev)
+    ->Iterations(kIteratorPrevNum)
+    ->Apply(IteratorPrevArguments);
+
+static void PrefixSeek(benchmark::State& state) {
+  auto compaction_style = static_cast<CompactionStyle>(state.range(0));
+  uint64_t max_data = state.range(1);
+  uint64_t per_key_size = state.range(2);
+  bool enable_statistics = state.range(3);
+  bool enable_filter = state.range(4);
+  uint64_t key_num = max_data / per_key_size;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+  Options options;
+  if (enable_statistics) {
+    options.statistics = CreateDBStatistics();
+  }
+  options.compaction_style = compaction_style;
+  options.prefix_extractor.reset(NewFixedPrefixTransform(4));
+
+  if (enable_filter) {
+    BlockBasedTableOptions table_options;
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  }
+
+  auto rnd = Random(301 + state.thread_index());
+  KeyGenerator kg(&rnd, key_num, key_num / 100);
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "PrefixSeek");
+
+    // load db
+    auto wo = WriteOptions();
+    wo.disableWAL = true;
+    for (uint64_t i = 0; i < key_num; i++) {
+      Status s = db->Put(wo, kg.Next(),
+                         rnd.RandomString(static_cast<int>(per_key_size)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    FlushOptions fo;
+    Status s = db->Flush(fo);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
+    s = db_full->WaitForCompact(true);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+      return;
+    }
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<Iterator> iter{nullptr};
+    state.PauseTiming();
+    if (!iter) {
+      iter.reset(db->NewIterator(ReadOptions()));
+    }
+    state.ResumeTiming();
+    iter->Seek(kg.NextPrefix());
+    if (!iter->status().ok()) {
+      state.SkipWithError(iter->status().ToString().c_str());
+      return;
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, kg);
+  }
+}
+
+static void PrefixSeekArguments(benchmark::internal::Benchmark* b) {
+  for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                         kCompactionStyleFIFO}) {
+    for (int64_t max_data : {128l << 20, 512l << 20}) {
+      for (int64_t per_key_size : {256, 1024}) {
+        for (bool enable_statistics : {false, true}) {
+          for (bool enable_filter : {false, true}) {
+            b->Args({comp_style, max_data, per_key_size, enable_statistics,
+                     enable_filter});
+          }
+        }
+      }
+    }
+  }
+  b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
+               "enable_filter"});
+}
+
+static constexpr uint64_t kPrefixSeekNum = 10l << 10;
+BENCHMARK(PrefixSeek)->Iterations(kPrefixSeekNum)->Apply(PrefixSeekArguments);
+BENCHMARK(PrefixSeek)
+    ->Threads(8)
+    ->Iterations(kPrefixSeekNum / 8)
+    ->Apply(PrefixSeekArguments);
+
+// TODO: move it to different files, as it's testing an internal API
+static void RandomAccessFileReaderRead(benchmark::State& state) {
+  bool enable_statistics = state.range(0);
+  constexpr int kFileNum = 10;
+  auto env = Env::Default();
+  auto fs = env->GetFileSystem();
+  std::string db_path;
+  Status s = env->GetTestDirectory(&db_path);
+  if (!s.ok()) {
+    state.SkipWithError(s.ToString().c_str());
+    return;
+  }
+
+  // Setup multiple `RandomAccessFileReader`s with different parameters to be
+  // used for test
+  Random rand(301);
+  std::string fname_base =
+      db_path + kFilePathSeparator + "random-access-file-reader-read";
+  std::vector<std::unique_ptr<RandomAccessFileReader>> readers;
+  auto statistics_share = CreateDBStatistics();
+  Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr;
+  for (int i = 0; i < kFileNum; i++) {
+    std::string fname = fname_base + std::to_string(i);
+    std::string content = rand.RandomString(kDefaultPageSize);
+    std::unique_ptr<WritableFile> tgt_file;
+    env->NewWritableFile(fname, &tgt_file, EnvOptions());
+    tgt_file->Append(content);
+    tgt_file->Close();
+
+    std::unique_ptr<FSRandomAccessFile> f;
+    fs->NewRandomAccessFile(fname, FileOptions(), &f, nullptr);
+    int rand_num = rand.Next() % 3;
+    auto temperature = rand_num == 0   ? Temperature::kUnknown
+                       : rand_num == 1 ? Temperature::kWarm
+                                       : Temperature::kCold;
+    readers.emplace_back(new RandomAccessFileReader(
+        std::move(f), fname, env->GetSystemClock().get(), nullptr, statistics,
+        0, nullptr, nullptr, {}, temperature, rand_num == 1));
+  }
+
+  IOOptions io_options;
+  std::unique_ptr<char[]> scratch(new char[2048]);
+  Slice result;
+  uint64_t idx = 0;
+  for (auto _ : state) {
+    s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3,
+                                        &result, scratch.get(), nullptr,
+                                        Env::IO_TOTAL);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+  }
+
+  // clean up
+  for (int i = 0; i < kFileNum; i++) {
+    std::string fname = fname_base + std::to_string(i);
+    env->DeleteFile(fname);  // ignore return, okay to fail cleanup
+  }
+}
+
+BENCHMARK(RandomAccessFileReaderRead)
+    ->Iterations(1000000)
+    ->Arg(0)
+    ->Arg(1)
+    ->ArgName("enable_statistics");
+
+}  // namespace ROCKSDB_NAMESPACE
+
+BENCHMARK_MAIN();
diff --git a/src/rocksdb/microbench/ribbon_bench.cc b/src/rocksdb/microbench/ribbon_bench.cc
new file mode 100644
index 000000000..d0fb2ec9a
--- /dev/null
+++ b/src/rocksdb/microbench/ribbon_bench.cc
@@ -0,0 +1,155 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// this is a simple micro-benchmark for compare ribbon filter vs. other filter
+// for more comprehensive, please check the dedicate util/filter_bench.
+#include "benchmark/benchmark.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/mock_block_based_table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct KeyMaker {
+  explicit KeyMaker(size_t avg_size)
+      : smallest_size_(avg_size),
+        buf_size_(avg_size + 11),  // pad to vary key size and alignment
+        buf_(new char[buf_size_]) {
+    memset(buf_.get(), 0, buf_size_);
+    assert(smallest_size_ > 8);
+  }
+  size_t smallest_size_;
+  size_t buf_size_;
+  std::unique_ptr<char[]> buf_;
+
+  // Returns a unique(-ish) key based on the given parameter values. Each
+  // call returns a Slice from the same buffer so previously returned
+  // Slices should be considered invalidated.
+  Slice Get(uint32_t filter_num, uint32_t val_num) const {
+    size_t start = val_num % 4;
+    size_t len = smallest_size_;
+    // To get range [avg_size - 2, avg_size + 2]
+    // use range [smallest_size, smallest_size + 4]
+    len += FastRange32((val_num >> 5) * 1234567891, 5);
+    char *data = buf_.get() + start;
+    // Populate key data such that all data makes it into a key of at
+    // least 8 bytes. We also don't want all the within-filter key
+    // variance confined to a contiguous 32 bits, because then a 32 bit
+    // hash function can "cheat" the false positive rate by
+    // approximating a perfect hash.
+    EncodeFixed32(data, val_num);
+    EncodeFixed32(data + 4, filter_num + val_num);
+    // ensure clearing leftovers from different alignment
+    EncodeFixed32(data + 8, 0);
+    return {data, len};
+  }
+};
+
+// benchmark arguments:
+// 0. filter impl (like filter_bench -impl)
+// 1. filter config bits_per_key
+// 2. average data key length
+// 3. data entry number
+static void CustomArguments(benchmark::internal::Benchmark *b) {
+  const auto kImplCount =
+      static_cast<int>(BloomLikeFilterPolicy::GetAllFixedImpls().size());
+  for (int filter_impl = 0; filter_impl < kImplCount; ++filter_impl) {
+    for (int bits_per_key : {10, 20}) {
+      for (int key_len_avg : {10, 100}) {
+        for (int64_t entry_num : {1 << 10, 1 << 20}) {
+          b->Args({filter_impl, bits_per_key, key_len_avg, entry_num});
+        }
+      }
+    }
+  }
+  b->ArgNames({"filter_impl", "bits_per_key", "key_len_avg", "entry_num"});
+}
+
+static void FilterBuild(benchmark::State &state) {
+  // setup data
+  auto filter = BloomLikeFilterPolicy::Create(
+      BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
+      static_cast<double>(state.range(1)));
+  auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
+  KeyMaker km(state.range(2));
+  std::unique_ptr<const char[]> owner;
+  const int64_t kEntryNum = state.range(3);
+  auto rnd = Random32(12345);
+  uint32_t filter_num = rnd.Next();
+  // run the test
+  for (auto _ : state) {
+    std::unique_ptr<FilterBitsBuilder> builder(tester->GetBuilder());
+    for (uint32_t i = 0; i < kEntryNum; i++) {
+      builder->AddKey(km.Get(filter_num, i));
+    }
+    auto ret = builder->Finish(&owner);
+    state.counters["size"] = static_cast<double>(ret.size());
+  }
+}
+BENCHMARK(FilterBuild)->Apply(CustomArguments);
+
+static void FilterQueryPositive(benchmark::State &state) {
+  // setup data
+  auto filter = BloomLikeFilterPolicy::Create(
+      BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
+      static_cast<double>(state.range(1)));
+  auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
+  KeyMaker km(state.range(2));
+  std::unique_ptr<const char[]> owner;
+  const int64_t kEntryNum = state.range(3);
+  auto rnd = Random32(12345);
+  uint32_t filter_num = rnd.Next();
+  std::unique_ptr<FilterBitsBuilder> builder(tester->GetBuilder());
+  for (uint32_t i = 0; i < kEntryNum; i++) {
+    builder->AddKey(km.Get(filter_num, i));
+  }
+  auto data = builder->Finish(&owner);
+  std::unique_ptr<FilterBitsReader> reader{filter->GetFilterBitsReader(data)};
+
+  // run test
+  uint32_t i = 0;
+  for (auto _ : state) {
+    i++;
+    i = i % kEntryNum;
+    reader->MayMatch(km.Get(filter_num, i));
+  }
+}
+BENCHMARK(FilterQueryPositive)->Apply(CustomArguments);
+
+static void FilterQueryNegative(benchmark::State &state) {
+  // setup data
+  auto filter = BloomLikeFilterPolicy::Create(
+      BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
+      static_cast<double>(state.range(1)));
+  auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
+  KeyMaker km(state.range(2));
+  std::unique_ptr<const char[]> owner;
+  const int64_t kEntryNum = state.range(3);
+  auto rnd = Random32(12345);
+  uint32_t filter_num = rnd.Next();
+  std::unique_ptr<FilterBitsBuilder> builder(tester->GetBuilder());
+  for (uint32_t i = 0; i < kEntryNum; i++) {
+    builder->AddKey(km.Get(filter_num, i));
+  }
+  auto data = builder->Finish(&owner);
+  std::unique_ptr<FilterBitsReader> reader{filter->GetFilterBitsReader(data)};
+
+  // run test
+  uint32_t i = 0;
+  double fp_cnt = 0;
+  for (auto _ : state) {
+    i++;
+    auto result = reader->MayMatch(km.Get(filter_num + 1, i));
+    if (result) {
+      fp_cnt++;
+    }
+  }
+  state.counters["fp_pct"] =
+      benchmark::Counter(fp_cnt * 100, benchmark::Counter::kAvgIterations);
+}
+BENCHMARK(FilterQueryNegative)->Apply(CustomArguments);
+
+}  // namespace ROCKSDB_NAMESPACE
+
+BENCHMARK_MAIN();