summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/util/dynamic_bloom_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/util/dynamic_bloom_test.cc')
-rw-r--r--src/rocksdb/util/dynamic_bloom_test.cc324
1 files changed, 324 insertions, 0 deletions
diff --git a/src/rocksdb/util/dynamic_bloom_test.cc b/src/rocksdb/util/dynamic_bloom_test.cc
new file mode 100644
index 000000000..47d04a36c
--- /dev/null
+++ b/src/rocksdb/util/dynamic_bloom_test.cc
@@ -0,0 +1,324 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#ifndef GFLAGS
+#include <cstdio>
+int main() {
+ fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
+ return 0;
+}
+#else
+
+#include <algorithm>
+#include <atomic>
+#include <cinttypes>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <vector>
+
+#include "dynamic_bloom.h"
+#include "logging/logging.h"
+#include "memory/arena.h"
+#include "port/port.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/gflags_compat.h"
+#include "util/stop_watch.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+
+DEFINE_int32(bits_per_key, 10, "");
+DEFINE_int32(num_probes, 6, "");
+DEFINE_bool(enable_perf, false, "");
+
+namespace ROCKSDB_NAMESPACE {
+
+struct KeyMaker {
+ uint64_t a;
+ uint64_t b;
+
+ // Sequential, within a hash function block
+ inline Slice Seq(uint64_t i) {
+ a = i;
+ return Slice(reinterpret_cast<char *>(&a), sizeof(a));
+ }
+ // Not quite sequential, varies across hash function blocks
+ inline Slice Nonseq(uint64_t i) {
+ a = i;
+ b = i * 123;
+ return Slice(reinterpret_cast<char *>(this), sizeof(*this));
+ }
+ inline Slice Key(uint64_t i, bool nonseq) {
+ return nonseq ? Nonseq(i) : Seq(i);
+ }
+};
+
+class DynamicBloomTest : public testing::Test {};
+
+TEST_F(DynamicBloomTest, EmptyFilter) {
+ Arena arena;
+ DynamicBloom bloom1(&arena, 100, 2);
+ ASSERT_TRUE(!bloom1.MayContain("hello"));
+ ASSERT_TRUE(!bloom1.MayContain("world"));
+
+ DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+ ASSERT_TRUE(!bloom2.MayContain("hello"));
+ ASSERT_TRUE(!bloom2.MayContain("world"));
+}
+
+TEST_F(DynamicBloomTest, Small) {
+ Arena arena;
+ DynamicBloom bloom1(&arena, 100, 2);
+ bloom1.Add("hello");
+ bloom1.Add("world");
+ ASSERT_TRUE(bloom1.MayContain("hello"));
+ ASSERT_TRUE(bloom1.MayContain("world"));
+ ASSERT_TRUE(!bloom1.MayContain("x"));
+ ASSERT_TRUE(!bloom1.MayContain("foo"));
+
+ DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+ bloom2.Add("hello");
+ bloom2.Add("world");
+ ASSERT_TRUE(bloom2.MayContain("hello"));
+ ASSERT_TRUE(bloom2.MayContain("world"));
+ ASSERT_TRUE(!bloom2.MayContain("x"));
+ ASSERT_TRUE(!bloom2.MayContain("foo"));
+}
+
+TEST_F(DynamicBloomTest, SmallConcurrentAdd) {
+ Arena arena;
+ DynamicBloom bloom1(&arena, 100, 2);
+ bloom1.AddConcurrently("hello");
+ bloom1.AddConcurrently("world");
+ ASSERT_TRUE(bloom1.MayContain("hello"));
+ ASSERT_TRUE(bloom1.MayContain("world"));
+ ASSERT_TRUE(!bloom1.MayContain("x"));
+ ASSERT_TRUE(!bloom1.MayContain("foo"));
+
+ DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+ bloom2.AddConcurrently("hello");
+ bloom2.AddConcurrently("world");
+ ASSERT_TRUE(bloom2.MayContain("hello"));
+ ASSERT_TRUE(bloom2.MayContain("world"));
+ ASSERT_TRUE(!bloom2.MayContain("x"));
+ ASSERT_TRUE(!bloom2.MayContain("foo"));
+}
+
+static uint32_t NextNum(uint32_t num) {
+ if (num < 10) {
+ num += 1;
+ } else if (num < 100) {
+ num += 10;
+ } else if (num < 1000) {
+ num += 100;
+ } else {
+ num = num * 26 / 10;
+ }
+ return num;
+}
+
+TEST_F(DynamicBloomTest, VaryingLengths) {
+ KeyMaker km;
+
+ // Count number of filters that significantly exceed the false positive rate
+ int mediocre_filters = 0;
+ int good_filters = 0;
+ uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+ fprintf(stderr, "bits_per_key: %d num_probes: %d\n", FLAGS_bits_per_key,
+ num_probes);
+
+ // NB: FP rate impact of 32-bit hash is noticeable starting around 10M keys.
+ // But that effect is hidden if using sequential keys (unique hashes).
+ for (bool nonseq : {false, true}) {
+ const uint32_t max_num = FLAGS_enable_perf ? 40000000 : 400000;
+ for (uint32_t num = 1; num <= max_num; num = NextNum(num)) {
+ uint32_t bloom_bits = 0;
+ Arena arena;
+ bloom_bits = num * FLAGS_bits_per_key;
+ DynamicBloom bloom(&arena, bloom_bits, num_probes);
+ for (uint64_t i = 0; i < num; i++) {
+ bloom.Add(km.Key(i, nonseq));
+ ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq)));
+ }
+
+ // All added keys must match
+ for (uint64_t i = 0; i < num; i++) {
+ ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq)));
+ }
+
+ // Check false positive rate
+ int result = 0;
+ for (uint64_t i = 0; i < 30000; i++) {
+ if (bloom.MayContain(km.Key(i + 1000000000, nonseq))) {
+ result++;
+ }
+ }
+ double rate = result / 30000.0;
+
+ fprintf(stderr,
+ "False positives (%s keys): "
+ "%5.2f%% @ num = %6u, bloom_bits = %6u\n",
+ nonseq ? "nonseq" : "seq", rate * 100.0, num, bloom_bits);
+
+ if (rate > 0.0125)
+ mediocre_filters++; // Allowed, but not too often
+ else
+ good_filters++;
+ }
+ }
+
+ fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
+ mediocre_filters);
+ ASSERT_LE(mediocre_filters, good_filters / 25);
+}
+
+TEST_F(DynamicBloomTest, perf) {
+ KeyMaker km;
+ StopWatchNano timer(Env::Default());
+ uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+ if (!FLAGS_enable_perf) {
+ return;
+ }
+
+ for (uint32_t m = 1; m <= 8; ++m) {
+ Arena arena;
+ const uint32_t num_keys = m * 8 * 1024 * 1024;
+ fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8);
+
+ DynamicBloom std_bloom(&arena, num_keys * 10, num_probes);
+
+ timer.Start();
+ for (uint64_t i = 1; i <= num_keys; ++i) {
+ std_bloom.Add(km.Seq(i));
+ }
+
+ uint64_t elapsed = timer.ElapsedNanos();
+ fprintf(stderr, "dynamic bloom, avg add latency %3g\n",
+ static_cast<double>(elapsed) / num_keys);
+
+ uint32_t count = 0;
+ timer.Start();
+ for (uint64_t i = 1; i <= num_keys; ++i) {
+ if (std_bloom.MayContain(km.Seq(i))) {
+ ++count;
+ }
+ }
+ ASSERT_EQ(count, num_keys);
+ elapsed = timer.ElapsedNanos();
+ assert(count > 0);
+ fprintf(stderr, "dynamic bloom, avg query latency %3g\n",
+ static_cast<double>(elapsed) / count);
+ }
+}
+
+TEST_F(DynamicBloomTest, concurrent_with_perf) {
+ uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+ uint32_t m_limit = FLAGS_enable_perf ? 8 : 1;
+
+ uint32_t num_threads = 4;
+ std::vector<port::Thread> threads;
+
+ // NB: Uses sequential keys for speed, but that hides the FP rate
+ // impact of 32-bit hash, which is noticeable starting around 10M keys
+ // when they vary across hashing blocks.
+ for (uint32_t m = 1; m <= m_limit; ++m) {
+ Arena arena;
+ const uint32_t num_keys = m * 8 * 1024 * 1024;
+ fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8);
+
+ DynamicBloom std_bloom(&arena, num_keys * 10, num_probes);
+
+ std::atomic<uint64_t> elapsed(0);
+
+ std::function<void(size_t)> adder([&](size_t t) {
+ KeyMaker km;
+ StopWatchNano timer(Env::Default());
+ timer.Start();
+ for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
+ std_bloom.AddConcurrently(km.Seq(i));
+ }
+ elapsed += timer.ElapsedNanos();
+ });
+ for (size_t t = 0; t < num_threads; ++t) {
+ threads.emplace_back(adder, t);
+ }
+ while (threads.size() > 0) {
+ threads.back().join();
+ threads.pop_back();
+ }
+
+ fprintf(stderr,
+ "dynamic bloom, avg parallel add latency %3g"
+ " nanos/key\n",
+ static_cast<double>(elapsed) / num_threads / num_keys);
+
+ elapsed = 0;
+ std::function<void(size_t)> hitter([&](size_t t) {
+ KeyMaker km;
+ StopWatchNano timer(Env::Default());
+ timer.Start();
+ for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
+ bool f = std_bloom.MayContain(km.Seq(i));
+ ASSERT_TRUE(f);
+ }
+ elapsed += timer.ElapsedNanos();
+ });
+ for (size_t t = 0; t < num_threads; ++t) {
+ threads.emplace_back(hitter, t);
+ }
+ while (threads.size() > 0) {
+ threads.back().join();
+ threads.pop_back();
+ }
+
+ fprintf(stderr,
+ "dynamic bloom, avg parallel hit latency %3g"
+ " nanos/key\n",
+ static_cast<double>(elapsed) / num_threads / num_keys);
+
+ elapsed = 0;
+ std::atomic<uint32_t> false_positives(0);
+ std::function<void(size_t)> misser([&](size_t t) {
+ KeyMaker km;
+ StopWatchNano timer(Env::Default());
+ timer.Start();
+ for (uint64_t i = num_keys + 1 + t; i <= 2 * num_keys; i += num_threads) {
+ bool f = std_bloom.MayContain(km.Seq(i));
+ if (f) {
+ ++false_positives;
+ }
+ }
+ elapsed += timer.ElapsedNanos();
+ });
+ for (size_t t = 0; t < num_threads; ++t) {
+ threads.emplace_back(misser, t);
+ }
+ while (threads.size() > 0) {
+ threads.back().join();
+ threads.pop_back();
+ }
+
+ fprintf(stderr,
+ "dynamic bloom, avg parallel miss latency %3g"
+ " nanos/key, %f%% false positive rate\n",
+ static_cast<double>(elapsed) / num_threads / num_keys,
+ false_positives.load() * 100.0 / num_keys);
+ }
+}
+
+} // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ ParseCommandLineFlags(&argc, &argv, true);
+
+ return RUN_ALL_TESTS();
+}
+
+#endif // GFLAGS