summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/db_bloom_filter_test.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/db/db_bloom_filter_test.cc3498
1 files changed, 3498 insertions, 0 deletions
diff --git a/src/rocksdb/db/db_bloom_filter_test.cc b/src/rocksdb/db/db_bloom_filter_test.cc
new file mode 100644
index 000000000..d68ab6115
--- /dev/null
+++ b/src/rocksdb/db/db_bloom_filter_test.cc
@@ -0,0 +1,3498 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+#include <string>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_reservation_manager.h"
+#include "db/db_test_util.h"
+#include "options/options_helper.h"
+#include "port/stack_trace.h"
+#include "rocksdb/advanced_options.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/perf_context.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_based_table_reader.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/format.h"
+#include "test_util/testutil.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+std::shared_ptr<const FilterPolicy> Create(double bits_per_key,
+ const std::string& name) {
+ return BloomLikeFilterPolicy::Create(name, bits_per_key);
+}
+const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
+const std::string kFastLocalBloom =
+ test::FastLocalBloomFilterPolicy::kClassName();
+const std::string kStandard128Ribbon =
+ test::Standard128RibbonFilterPolicy::kClassName();
+const std::string kAutoBloom = BloomFilterPolicy::kClassName();
+const std::string kAutoRibbon = RibbonFilterPolicy::kClassName();
+} // anonymous namespace
+
+// DB tests related to bloom filter.
+
+class DBBloomFilterTest : public DBTestBase {
+ public:
+ DBBloomFilterTest()
+ : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {}
+};
+
+class DBBloomFilterTestWithParam
+ : public DBTestBase,
+ public testing::WithParamInterface<
+ std::tuple<std::string, bool, uint32_t>> {
+ // public testing::WithParamInterface<bool> {
+ protected:
+ std::string bfp_impl_;
+ bool partition_filters_;
+ uint32_t format_version_;
+
+ public:
+ DBBloomFilterTestWithParam()
+ : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {}
+
+ ~DBBloomFilterTestWithParam() override {}
+
+ void SetUp() override {
+ bfp_impl_ = std::get<0>(GetParam());
+ partition_filters_ = std::get<1>(GetParam());
+ format_version_ = std::get<2>(GetParam());
+ }
+};
+
+class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {};
+
+class SliceTransformLimitedDomainGeneric : public SliceTransform {
+ const char* Name() const override {
+ return "SliceTransformLimitedDomainGeneric";
+ }
+
+ Slice Transform(const Slice& src) const override {
+ return Slice(src.data(), 5);
+ }
+
+ bool InDomain(const Slice& src) const override {
+ // prefix will be x????
+ return src.size() >= 5;
+ }
+
+ bool InRange(const Slice& dst) const override {
+ // prefix will be x????
+ return dst.size() == 5;
+ }
+};
+
+// KeyMayExist can lead to a few false positives, but not false negatives.
+// To make test deterministic, use a much larger number of bits per key-20 than
+// bits in the key, so that false positives are eliminated
+TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
+ do {
+ ReadOptions ropts;
+ std::string value;
+ anon::OptionsOverride options_override;
+ options_override.filter_policy = Create(20, bfp_impl_);
+ options_override.partition_filters = partition_filters_;
+ options_override.metadata_block_size = 32;
+ options_override.full_block_cache = true;
+ Options options = CurrentOptions(options_override);
+ if (partition_filters_) {
+ auto* table_options =
+ options.table_factory->GetOptions<BlockBasedTableOptions>();
+ if (table_options != nullptr &&
+ table_options->index_type !=
+ BlockBasedTableOptions::kTwoLevelIndexSearch) {
+ // In the current implementation partitioned filters depend on
+ // partitioned indexes
+ continue;
+ }
+ }
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ CreateAndReopenWithCF({"pikachu"}, options);
+
+ ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+
+ ASSERT_OK(Put(1, "a", "b"));
+ bool value_found = false;
+ ASSERT_TRUE(
+ db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
+ ASSERT_TRUE(value_found);
+ ASSERT_EQ("b", value);
+
+ ASSERT_OK(Flush(1));
+ value.clear();
+
+ uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ ASSERT_TRUE(
+ db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
+ ASSERT_TRUE(!value_found);
+ // assert that no new files were opened and no new blocks were
+ // read into block cache.
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+ ASSERT_OK(Delete(1, "a"));
+
+ numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+ ASSERT_OK(Flush(1));
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
+ true /* disallow trivial move */));
+
+ numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+ ASSERT_OK(Delete(1, "c"));
+
+ numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value));
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+ // KeyMayExist function only checks data in block caches, which is not used
+ // by plain table format.
+ } while (
+ ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction));
+}
+
+TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) {
+ for (bool partition_filters : {true, false}) {
+ Options options = last_options_;
+ options.prefix_extractor =
+ std::make_shared<SliceTransformLimitedDomainGeneric>();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ get_perf_context()->EnablePerLevelPerfContext();
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(10));
+ if (partition_filters) {
+ bbto.partition_filters = true;
+ bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ bbto.whole_key_filtering = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ DestroyAndReopen(options);
+
+ WriteOptions wo;
+ ReadOptions ro;
+ FlushOptions fo;
+ fo.wait = true;
+ std::string value;
+
+ ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
+ ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
+ ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
+
+ ASSERT_OK(dbfull()->Flush(fo));
+
+ ASSERT_EQ("foo", Get("barbarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ(
+ 0,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+ ASSERT_EQ("foo2", Get("barbarbar2"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ(
+ 0,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+ ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ(
+ 0,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ(
+ 1,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
+ ASSERT_EQ(
+ 2,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ ro.total_order_seek = true;
+ // NOTE: total_order_seek no longer affects Get()
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ(
+ 3,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ // No bloom on extractor changed
+#ifndef ROCKSDB_LITE
+ ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}}));
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ(
+ 3,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+#endif // ROCKSDB_LITE
+
+ // No bloom on extractor changed, after re-open
+ options.prefix_extractor.reset(NewCappedPrefixTransform(10));
+ Reopen(options);
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ(
+ 3,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ get_perf_context()->Reset();
+ }
+}
+
+TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
+ for (bool partition_filters : {true, false}) {
+ Options options = last_options_;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(8));
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ get_perf_context()->EnablePerLevelPerfContext();
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(10));
+ if (partition_filters) {
+ bbto.partition_filters = true;
+ bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ bbto.whole_key_filtering = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ DestroyAndReopen(options);
+
+ WriteOptions wo;
+ ReadOptions ro;
+ FlushOptions fo;
+ fo.wait = true;
+ std::string value;
+
+ ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
+ ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
+ ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
+
+ ASSERT_OK(dbfull()->Flush(fo));
+
+ ASSERT_EQ("foo", Get("barbarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ("foo2", Get("barbarbar2"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+
+ ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
+
+ ro.total_order_seek = true;
+ // NOTE: total_order_seek no longer affects Get()
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ(
+ 3,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+
+ // No bloom on extractor changed
+#ifndef ROCKSDB_LITE
+ ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}}));
+ ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ(
+ 3,
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
+#endif // ROCKSDB_LITE
+
+ get_perf_context()->Reset();
+ }
+}
+
+TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
+ for (bool partition_filters : {true, false}) {
+ Options options = last_options_;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ get_perf_context()->EnablePerLevelPerfContext();
+
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = false;
+ if (partition_filters) {
+ bbto.partition_filters = true;
+ bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ DestroyAndReopen(options);
+
+ WriteOptions wo;
+ ReadOptions ro;
+ FlushOptions fo;
+ fo.wait = true;
+ std::string value;
+
+ ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+ // Needs insert some keys to make sure files are not filtered out by key
+ // ranges.
+ ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+ ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+ ASSERT_OK(dbfull()->Flush(fo));
+
+ Reopen(options);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+ // Reopen with whole key filtering enabled and prefix extractor
+ // NULL. Bloom filter should be off for both of whole key and
+ // prefix bloom.
+ bbto.whole_key_filtering = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ options.prefix_extractor.reset();
+ Reopen(options);
+
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ // Write DB with only full key filtering.
+ ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+ // Needs insert some keys to make sure files are not filtered out by key
+ // ranges.
+ ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+ ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+ // Reopen with both of whole key off and prefix extractor enabled.
+ // Still no bloom filter should be used.
+ options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+ bbto.whole_key_filtering = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ Reopen(options);
+
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+ // Try to create a DB with mixed files:
+ ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+ // Needs insert some keys to make sure files are not filtered out by key
+ // ranges.
+ ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+ ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+ options.prefix_extractor.reset();
+ bbto.whole_key_filtering = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ Reopen(options);
+
+ // Try to create a DB with mixed files.
+ ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
+ // In this case needs insert some keys to make sure files are
+ // not filtered out by key ranges.
+ ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+ ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+ ASSERT_OK(Flush());
+
+ // Now we have two files:
+ // File 1: An older file with prefix bloom.
+ // File 2: A newer file with whole bloom filter.
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+ ASSERT_EQ("bar", Get("barfoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+
+ // Reopen with the same setting: only whole key is used
+ Reopen(options);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+ ASSERT_EQ("bar", Get("barfoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+
+ // Restart with both filters are allowed
+ options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+ bbto.whole_key_filtering = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ Reopen(options);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+ // File 1 will has it filtered out.
+ // File 2 will not, as prefix `foo` exists in the file.
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+ ASSERT_EQ("bar", Get("barfoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+
+ // Restart with only prefix bloom is allowed.
+ options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+ bbto.whole_key_filtering = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ Reopen(options);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+ ASSERT_EQ("NOT_FOUND", Get("foo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+ ASSERT_EQ("NOT_FOUND", Get("bar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+ ASSERT_EQ("foo", Get("foobar"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+ ASSERT_EQ("bar", Get("barfoo"));
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+ uint64_t bloom_filter_useful_all_levels = 0;
+ for (auto& kv : (*(get_perf_context()->level_to_perf_context))) {
+ if (kv.second.bloom_filter_useful > 0) {
+ bloom_filter_useful_all_levels += kv.second.bloom_filter_useful;
+ }
+ }
+ ASSERT_EQ(12, bloom_filter_useful_all_levels);
+ get_perf_context()->Reset();
+ }
+}
+
+TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
+ do {
+ Options options = CurrentOptions();
+ env_->count_random_reads_ = true;
+ options.env = env_;
+ // ChangeCompactOptions() only changes compaction style, which does not
+ // trigger reset of table_factory
+ BlockBasedTableOptions table_options;
+ table_options.no_block_cache = true;
+ table_options.filter_policy = Create(10, bfp_impl_);
+ table_options.partition_filters = partition_filters_;
+ if (partition_filters_) {
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ table_options.format_version = format_version_;
+ if (format_version_ >= 4) {
+ // value delta encoding challenged more with index interval > 1
+ table_options.index_block_restart_interval = 8;
+ }
+ table_options.metadata_block_size = 32;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ CreateAndReopenWithCF({"pikachu"}, options);
+
+ // Populate multiple layers
+ const int N = 10000;
+ for (int i = 0; i < N; i++) {
+ ASSERT_OK(Put(1, Key(i), Key(i)));
+ }
+ Compact(1, "a", "z");
+ for (int i = 0; i < N; i += 100) {
+ ASSERT_OK(Put(1, Key(i), Key(i)));
+ }
+ ASSERT_OK(Flush(1));
+
+ // Prevent auto compactions triggered by seeks
+ env_->delay_sstable_sync_.store(true, std::memory_order_release);
+
+ // Lookup present keys. Should rarely read from small sstable.
+ env_->random_read_counter_.Reset();
+ for (int i = 0; i < N; i++) {
+ ASSERT_EQ(Key(i), Get(1, Key(i)));
+ }
+ int reads = env_->random_read_counter_.Read();
+ fprintf(stderr, "%d present => %d reads\n", N, reads);
+ ASSERT_GE(reads, N);
+ if (partition_filters_) {
+ // Without block cache, we read an extra partition filter per each
+ // level*read and a partition index per each read
+ ASSERT_LE(reads, 4 * N + 2 * N / 100);
+ } else {
+ ASSERT_LE(reads, N + 2 * N / 100);
+ }
+
+ // Lookup present keys. Should rarely read from either sstable.
+ env_->random_read_counter_.Reset();
+ for (int i = 0; i < N; i++) {
+ ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing"));
+ }
+ reads = env_->random_read_counter_.Read();
+ fprintf(stderr, "%d missing => %d reads\n", N, reads);
+ if (partition_filters_) {
+ // With partitioned filter we read one extra filter per level per each
+ // missed read.
+ ASSERT_LE(reads, 2 * N + 3 * N / 100);
+ } else {
+ ASSERT_LE(reads, 3 * N / 100);
+ }
+
+#ifndef ROCKSDB_LITE
+ // Sanity check some table properties
+ std::map<std::string, std::string> props;
+ ASSERT_TRUE(db_->GetMapProperty(
+ handles_[1], DB::Properties::kAggregatedTableProperties, &props));
+ uint64_t nkeys = N + N / 100;
+ uint64_t filter_size = ParseUint64(props["filter_size"]);
+ EXPECT_LE(filter_size,
+ (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8);
+ if (bfp_impl_ == kAutoRibbon) {
+ // Sometimes using Ribbon filter which is more space-efficient
+ EXPECT_GE(filter_size, 7 * nkeys / /*bits / byte*/ 8);
+ } else {
+ // Always Bloom
+ EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8);
+ }
+
+ uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]);
+ EXPECT_EQ(num_filter_entries, nkeys);
+#endif // ROCKSDB_LITE
+
+ env_->delay_sstable_sync_.store(false, std::memory_order_release);
+ Close();
+ } while (ChangeCompactOptions());
+}
+
+namespace {
+
+class AlwaysTrueBitsBuilder : public FilterBitsBuilder {
+ public:
+ void AddKey(const Slice&) override {}
+ size_t EstimateEntriesAdded() override { return 0U; }
+ Slice Finish(std::unique_ptr<const char[]>* /* buf */) override {
+ // Interpreted as "always true" filter (0 probes over 1 byte of
+ // payload, 5 bytes metadata)
+ return Slice("\0\0\0\0\0\0", 6);
+ }
+ using FilterBitsBuilder::Finish;
+ size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; }
+};
+
+class AlwaysTrueFilterPolicy : public ReadOnlyBuiltinFilterPolicy {
+ public:
+ explicit AlwaysTrueFilterPolicy(bool skip) : skip_(skip) {}
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext&) const override {
+ if (skip_) {
+ return nullptr;
+ } else {
+ return new AlwaysTrueBitsBuilder();
+ }
+ }
+
+ private:
+ bool skip_;
+};
+
+} // anonymous namespace
+
+TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
+ constexpr int maxKey = 10;
+ auto PutFn = [&]() {
+ int i;
+ // Put
+ for (i = 0; i < maxKey; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+ Flush();
+ };
+ auto GetFn = [&]() {
+ int i;
+ // Get OK
+ for (i = 0; i < maxKey; i++) {
+ ASSERT_EQ(Key(i), Get(Key(i)));
+ }
+ // Get NotFound
+ for (; i < maxKey * 2; i++) {
+ ASSERT_EQ(Get(Key(i)), "NOT_FOUND");
+ }
+ };
+ auto PutAndGetFn = [&]() {
+ PutFn();
+ GetFn();
+ };
+#ifndef ROCKSDB_LITE
+ std::map<std::string, std::string> props;
+ const auto& kAggTableProps = DB::Properties::kAggregatedTableProperties;
+#endif // ROCKSDB_LITE
+
+ Options options = CurrentOptions();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ BlockBasedTableOptions table_options;
+ table_options.partition_filters = partition_filters_;
+ if (partition_filters_) {
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ table_options.format_version = format_version_;
+
+ // Test 1: bits per key < 0.5 means skip filters -> no filter
+ // constructed or read.
+ table_options.filter_policy = Create(0.4, bfp_impl_);
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+ PutAndGetFn();
+
+ // Verify no filter access nor contruction
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0);
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0);
+
+#ifndef ROCKSDB_LITE
+ props.clear();
+ ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props));
+ EXPECT_EQ(props["filter_size"], "0");
+#endif // ROCKSDB_LITE
+
+ // Test 2: use custom API to skip filters -> no filter constructed
+ // or read.
+ table_options.filter_policy.reset(
+ new AlwaysTrueFilterPolicy(/* skip */ true));
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+ PutAndGetFn();
+
+ // Verify no filter access nor construction
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0);
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0);
+
+#ifndef ROCKSDB_LITE
+ props.clear();
+ ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props));
+ EXPECT_EQ(props["filter_size"], "0");
+#endif // ROCKSDB_LITE
+
+ // Control test: using an actual filter with 100% FP rate -> the filter
+ // is constructed and checked on read.
+ table_options.filter_policy.reset(
+ new AlwaysTrueFilterPolicy(/* skip */ false));
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+ PutAndGetFn();
+
+ // Verify filter is accessed (and constructed)
+ EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE),
+ maxKey * 2);
+ EXPECT_EQ(
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE),
+ maxKey);
+#ifndef ROCKSDB_LITE
+ props.clear();
+ ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props));
+ EXPECT_NE(props["filter_size"], "0");
+#endif // ROCKSDB_LITE
+
+ // Test 3 (options test): Able to read existing filters with longstanding
+ // generated options file entry `filter_policy=rocksdb.BuiltinBloomFilter`
+ ASSERT_OK(FilterPolicy::CreateFromString(ConfigOptions(),
+ "rocksdb.BuiltinBloomFilter",
+ &table_options.filter_policy));
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(options);
+ GetFn();
+
+ // Verify filter is accessed
+ EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE),
+ maxKey * 2);
+ EXPECT_EQ(
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE),
+ maxKey);
+
+ // But new filters are not generated (configuration details unknown)
+ DestroyAndReopen(options);
+ PutAndGetFn();
+
+ // Verify no filter access nor construction
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0);
+ EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0);
+
+#ifndef ROCKSDB_LITE
+ props.clear();
+ ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props));
+ EXPECT_EQ(props["filter_size"], "0");
+#endif // ROCKSDB_LITE
+}
+
+#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
+INSTANTIATE_TEST_CASE_P(
+ FormatDef, DBBloomFilterTestDefFormatVersion,
+ ::testing::Values(
+ std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
+ std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion),
+ std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion)));
+
+INSTANTIATE_TEST_CASE_P(
+ FormatDef, DBBloomFilterTestWithParam,
+ ::testing::Values(
+ std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
+ std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion),
+ std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion)));
+
+INSTANTIATE_TEST_CASE_P(
+ FormatLatest, DBBloomFilterTestWithParam,
+ ::testing::Values(std::make_tuple(kAutoBloom, true, kLatestFormatVersion),
+ std::make_tuple(kAutoBloom, false, kLatestFormatVersion),
+ std::make_tuple(kAutoRibbon, false,
+ kLatestFormatVersion)));
+#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
+
+TEST_F(DBBloomFilterTest, BloomFilterRate) {
+ while (ChangeFilterOptions()) {
+ Options options = CurrentOptions();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ get_perf_context()->EnablePerLevelPerfContext();
+ CreateAndReopenWithCF({"pikachu"}, options);
+
+ const int maxKey = 10000;
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_OK(Put(1, Key(i), Key(i)));
+ }
+ // Add a large key to make the file contain wide range
+ ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+ Flush(1);
+
+ // Check if they can be found
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_EQ(Key(i), Get(1, Key(i)));
+ }
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+
+ // Check if filter is useful
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
+ }
+ ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98);
+ ASSERT_GE(
+ (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful,
+ maxKey * 0.98);
+ get_perf_context()->Reset();
+ }
+}
+
+namespace {
+struct CompatibilityConfig {
+ std::shared_ptr<const FilterPolicy> policy;
+ bool partitioned;
+ uint32_t format_version;
+
+ void SetInTableOptions(BlockBasedTableOptions* table_options) {
+ table_options->filter_policy = policy;
+ table_options->partition_filters = partitioned;
+ if (partitioned) {
+ table_options->index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ } else {
+ table_options->index_type =
+ BlockBasedTableOptions::IndexType::kBinarySearch;
+ }
+ table_options->format_version = format_version;
+ }
+};
+// High bits per key -> almost no FPs
+std::shared_ptr<const FilterPolicy> kCompatibilityBloomPolicy{
+ NewBloomFilterPolicy(20)};
+// bloom_before_level=-1 -> always use Ribbon
+std::shared_ptr<const FilterPolicy> kCompatibilityRibbonPolicy{
+ NewRibbonFilterPolicy(20, -1)};
+
+std::vector<CompatibilityConfig> kCompatibilityConfigs = {
+ {kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version},
+ {kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version},
+ {kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U},
+ {kCompatibilityRibbonPolicy, false,
+ BlockBasedTableOptions().format_version},
+ {kCompatibilityRibbonPolicy, true, BlockBasedTableOptions().format_version},
+};
+} // anonymous namespace
+
+TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
+ Options options = CurrentOptions();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ options.level0_file_num_compaction_trigger =
+ static_cast<int>(kCompatibilityConfigs.size()) + 1;
+ options.max_open_files = -1;
+
+ Close();
+
+ // Create one file for each kind of filter. Each file covers a distinct key
+ // range.
+ for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) {
+ BlockBasedTableOptions table_options;
+ kCompatibilityConfigs[i].SetInTableOptions(&table_options);
+ ASSERT_TRUE(table_options.filter_policy != nullptr);
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(options);
+
+ std::string prefix = std::to_string(i) + "_";
+ ASSERT_OK(Put(prefix + "A", "val"));
+ ASSERT_OK(Put(prefix + "Z", "val"));
+ ASSERT_OK(Flush());
+ }
+
+ // Test filter is used between each pair of {reader,writer} configurations,
+ // because any built-in FilterPolicy should be able to read filters from any
+ // other built-in FilterPolicy
+ for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) {
+ BlockBasedTableOptions table_options;
+ kCompatibilityConfigs[i].SetInTableOptions(&table_options);
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(options);
+ for (size_t j = 0; j < kCompatibilityConfigs.size(); ++j) {
+ std::string prefix = std::to_string(j) + "_";
+ ASSERT_EQ("val", Get(prefix + "A")); // Filter positive
+ ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive
+ // Filter negative, with high probability
+ ASSERT_EQ("NOT_FOUND", Get(prefix + "Q"));
+ EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE),
+ 2);
+ EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+ }
+ }
+}
+
+// To align with the type of hash entry being reserved in implementation.
+using FilterConstructionReserveMemoryHash = uint64_t;
+
+class ChargeFilterConstructionTestWithParam
+ : public DBTestBase,
+ public testing::WithParamInterface<std::tuple<
+ CacheEntryRoleOptions::Decision, std::string, bool, bool>> {
+ public:
+ ChargeFilterConstructionTestWithParam()
+ : DBTestBase("db_bloom_filter_tests",
+ /*env_do_fsync=*/true),
+ num_key_(0),
+ charge_filter_construction_(std::get<0>(GetParam())),
+ policy_(std::get<1>(GetParam())),
+ partition_filters_(std::get<2>(GetParam())),
+ detect_filter_construct_corruption_(std::get<3>(GetParam())) {
+ if (charge_filter_construction_ ==
+ CacheEntryRoleOptions::Decision::kDisabled ||
+ policy_ == kLegacyBloom) {
+ // For these cases, we only interested in whether filter construction
+ // cache charging happens instead of its accuracy. Therefore we don't
+ // need many keys.
+ num_key_ = 5;
+ } else if (partition_filters_) {
+ // For PartitionFilter case, since we set
+ // table_options.metadata_block_size big enough such that each partition
+ // trigger at least 1 dummy entry reservation each for hash entries and
+ // final filter, we need a large number of keys to ensure we have at least
+ // two partitions.
+ num_key_ = 18 *
+ CacheReservationManagerImpl<
+ CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() /
+ sizeof(FilterConstructionReserveMemoryHash);
+ } else if (policy_ == kFastLocalBloom) {
+ // For Bloom Filter + FullFilter case, since we design the num_key_ to
+ // make hash entry cache charging be a multiple of dummy entries, the
+ // correct behavior of charging final filter on top of it will trigger at
+ // least another dummy entry insertion. Therefore we can assert that
+ // behavior and we don't need a large number of keys to verify we
+ // indeed charge the final filter for in cache, even though final
+ // filter is a lot smaller than hash entries.
+ num_key_ = 1 *
+ CacheReservationManagerImpl<
+ CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() /
+ sizeof(FilterConstructionReserveMemoryHash);
+ } else {
+ // For Ribbon Filter + FullFilter case, we need a large enough number of
+ // keys so that charging final filter after releasing the hash entries
+ // reservation will trigger at least another dummy entry (or equivalently
+ // to saying, causing another peak in cache charging) as banding
+ // reservation might not be a multiple of dummy entry.
+ num_key_ = 12 *
+ CacheReservationManagerImpl<
+ CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() /
+ sizeof(FilterConstructionReserveMemoryHash);
+ }
+ }
+
+ BlockBasedTableOptions GetBlockBasedTableOptions() {
+ BlockBasedTableOptions table_options;
+
+ // We set cache capacity big enough to prevent cache full for convenience in
+ // calculation.
+ constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024;
+
+ table_options.cache_usage_options.options_overrides.insert(
+ {CacheEntryRole::kFilterConstruction,
+ {/*.charged = */ charge_filter_construction_}});
+ table_options.filter_policy = Create(10, policy_);
+ table_options.partition_filters = partition_filters_;
+ if (table_options.partition_filters) {
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ // We set table_options.metadata_block_size big enough so that each
+ // partition trigger at least 1 dummy entry insertion each for hash
+ // entries and final filter.
+ table_options.metadata_block_size = 409000;
+ }
+ table_options.detect_filter_construct_corruption =
+ detect_filter_construct_corruption_;
+
+ LRUCacheOptions lo;
+ lo.capacity = kCacheCapacity;
+ lo.num_shard_bits = 0; // 2^0 shard
+ lo.strict_capacity_limit = true;
+ cache_ = std::make_shared<
+ TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>(
+ (NewLRUCache(lo)));
+ table_options.block_cache = cache_;
+
+ return table_options;
+ }
+
+ std::size_t GetNumKey() { return num_key_; }
+
+ CacheEntryRoleOptions::Decision ChargeFilterConstructMemory() {
+ return charge_filter_construction_;
+ }
+
+ std::string GetFilterPolicy() { return policy_; }
+
+ bool PartitionFilters() { return partition_filters_; }
+
+ std::shared_ptr<
+ TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+ GetCache() {
+ return cache_;
+ }
+
+ private:
+ std::size_t num_key_;
+ CacheEntryRoleOptions::Decision charge_filter_construction_;
+ std::string policy_;
+ bool partition_filters_;
+ std::shared_ptr<
+ TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+ cache_;
+ bool detect_filter_construct_corruption_;
+};
+
+INSTANTIATE_TEST_CASE_P(
+ ChargeFilterConstructionTestWithParam,
+ ChargeFilterConstructionTestWithParam,
+ ::testing::Values(
+ std::make_tuple(CacheEntryRoleOptions::Decision::kDisabled,
+ kFastLocalBloom, false, false),
+
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kFastLocalBloom, false, false),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kFastLocalBloom, false, true),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kFastLocalBloom, true, false),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kFastLocalBloom, true, true),
+
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kStandard128Ribbon, false, false),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kStandard128Ribbon, false, true),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kStandard128Ribbon, true, false),
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+ kStandard128Ribbon, true, true),
+
+ std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom,
+ false, false)));
+
+// TODO: Speed up this test, and reduce disk space usage (~700MB)
+// The current test inserts many keys (on the scale of dummy entry size)
+// in order to make small memory user (e.g, final filter, partitioned hash
+// entries/filter/banding) , which is proportional to the number of
+// keys, big enough so that its cache charging triggers dummy entry insertion
+// and becomes observable in the test.
+//
+// However, inserting that many keys slows down this test and leaves future
+// developers an opportunity to speed it up.
+//
+// Possible approaches & challenges:
+// 1. Use sync point during cache charging of filter construction
+//
+// Benefit: It does not rely on triggering dummy entry insertion
+// but the sync point to verify small memory user is charged correctly.
+//
+// Challenge: this approach is intrusive.
+//
+// 2. Make dummy entry size configurable and set it small in the test
+//
+// Benefit: It increases the precision of cache charging and therefore
+// small memory usage can still trigger insertion of dummy entry.
+//
+// Challenge: change CacheReservationManager related APIs and a hack
+// might be needed to control the size of dummmy entry of
+// CacheReservationManager used in filter construction for testing
+// since CacheReservationManager is not exposed at the high level.
+//
+TEST_P(ChargeFilterConstructionTestWithParam, Basic) {
+ Options options = CurrentOptions();
+ // We set write_buffer_size big enough so that in the case where there is
+ // filter construction cache charging, flush won't be triggered before we
+ // manually trigger it for clean testing
+ options.write_buffer_size = 640 << 20;
+ BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ std::shared_ptr<
+ TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+ cache = GetCache();
+ options.create_if_missing = true;
+ // Disable auto compaction to prevent its unexpected side effect
+ // to the number of keys per partition designed by us in the test
+ options.disable_auto_compactions = true;
+ DestroyAndReopen(options);
+ int num_key = static_cast<int>(GetNumKey());
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+
+ ASSERT_EQ(cache->GetChargedCacheIncrementSum(), 0)
+ << "Flush was triggered too early in the test case with filter "
+ "construction cache charging - please make sure no flush triggered "
+ "during the key insertions above";
+
+ ASSERT_OK(Flush());
+
+ bool charge_filter_construction = (ChargeFilterConstructMemory() ==
+ CacheEntryRoleOptions::Decision::kEnabled);
+ std::string policy = GetFilterPolicy();
+ bool partition_filters = PartitionFilters();
+ bool detect_filter_construct_corruption =
+ table_options.detect_filter_construct_corruption;
+
+ std::deque<std::size_t> filter_construction_cache_res_peaks =
+ cache->GetChargedCachePeaks();
+ std::size_t filter_construction_cache_res_increments_sum =
+ cache->GetChargedCacheIncrementSum();
+
+ if (!charge_filter_construction) {
+ EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0);
+ return;
+ }
+
+ if (policy == kLegacyBloom) {
+ EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0)
+ << "There shouldn't be filter construction cache charging as this "
+ "feature does not support kLegacyBloom";
+ return;
+ }
+
+ const std::size_t kDummyEntrySize = CacheReservationManagerImpl<
+ CacheEntryRole::kFilterConstruction>::GetDummyEntrySize();
+
+ const std::size_t predicted_hash_entries_cache_res =
+ num_key * sizeof(FilterConstructionReserveMemoryHash);
+ ASSERT_EQ(predicted_hash_entries_cache_res % kDummyEntrySize, 0)
+ << "It's by this test's design that predicted_hash_entries_cache_res is "
+ "a multipe of dummy entry";
+
+ const std::size_t predicted_hash_entries_cache_res_dummy_entry_num =
+ predicted_hash_entries_cache_res / kDummyEntrySize;
+ const std::size_t predicted_final_filter_cache_res =
+ static_cast<std::size_t>(
+ std::ceil(1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 *
+ (policy == kStandard128Ribbon ? 0.7 : 1))) *
+ kDummyEntrySize;
+ const std::size_t predicted_banding_cache_res =
+ static_cast<std::size_t>(
+ std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) *
+ kDummyEntrySize;
+
+ if (policy == kFastLocalBloom) {
+ /* kFastLocalBloom + FullFilter
+ * p0
+ * / \
+ * b / \
+ * / \
+ * / \
+ * 0/ \
+ * hash entries = b - 0, final filter = p0 - b
+ * p0 = hash entries + final filter
+ *
+ * The test is designed in a way such that the reservation for b is a
+ * multiple of dummy entries so that reservation for (p0 - b)
+ * will trigger at least another dummy entry insertion.
+ *
+ * kFastLocalBloom + FullFilter +
+ * detect_filter_construct_corruption
+ * The peak p0 stays the same as
+ * (kFastLocalBloom + FullFilter) but just lasts
+ * longer since we release hash entries reservation later.
+ *
+ * kFastLocalBloom + PartitionedFilter
+ * p1
+ * / \
+ * p0 b'/ \
+ * / \ / \
+ * b / \ / \
+ * / \ / \
+ * / a \
+ * 0/ \
+ * partitioned hash entries1 = b - 0, partitioned hash entries1 = b' - a
+ * parittioned final filter1 = p0 - b, parittioned final filter2 = p1 - b'
+ *
+ * (increment p0 - 0) + (increment p1 - a)
+ * = partitioned hash entries1 + partitioned hash entries2
+ * + parittioned final filter1 + parittioned final filter2
+ * = hash entries + final filter
+ *
+ * kFastLocalBloom + PartitionedFilter +
+ * detect_filter_construct_corruption
+ * The peak p0, p1 stay the same as
+ * (kFastLocalBloom + PartitionedFilter) but just
+ * last longer since we release hash entries reservation later.
+ *
+ */
+ if (!partition_filters) {
+ EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
+ << "Filter construction cache charging should have only 1 peak in "
+ "case: kFastLocalBloom + FullFilter";
+ std::size_t filter_construction_cache_res_peak =
+ filter_construction_cache_res_peaks[0];
+ EXPECT_GT(filter_construction_cache_res_peak,
+ predicted_hash_entries_cache_res)
+ << "The testing number of hash entries is designed to make hash "
+ "entries cache charging be multiples of dummy entries"
+ " so the correct behavior of charging final filter on top of it"
+ " should've triggered at least another dummy entry insertion";
+
+ std::size_t predicted_filter_construction_cache_res_peak =
+ predicted_hash_entries_cache_res + predicted_final_filter_cache_res;
+ EXPECT_GE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 0.9);
+ EXPECT_LE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 1.1);
+ return;
+ } else {
+ EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
+ << "Filter construction cache charging should have multiple peaks "
+ "in case: kFastLocalBloom + "
+ "PartitionedFilter";
+ std::size_t predicted_filter_construction_cache_res_increments_sum =
+ predicted_hash_entries_cache_res + predicted_final_filter_cache_res;
+ EXPECT_GE(filter_construction_cache_res_increments_sum,
+ predicted_filter_construction_cache_res_increments_sum * 0.9);
+ EXPECT_LE(filter_construction_cache_res_increments_sum,
+ predicted_filter_construction_cache_res_increments_sum * 1.1);
+ return;
+ }
+ }
+
+ if (policy == kStandard128Ribbon) {
+ /* kStandard128Ribbon + FullFilter
+ * p0
+ * / \ p1
+ * / \/\
+ * b / b' \
+ * / \
+ * 0/ \
+ * hash entries = b - 0, banding = p0 - b, final filter = p1 - b'
+ * p0 = hash entries + banding
+ *
+ * The test is designed in a way such that the reservation for (p1 - b')
+ * will trigger at least another dummy entry insertion
+ * (or equivelantly to saying, creating another peak).
+ *
+ * kStandard128Ribbon + FullFilter +
+ * detect_filter_construct_corruption
+ *
+ * new p0
+ * / \
+ * / \
+ * pre p0 \
+ * / \
+ * / \
+ * b / \
+ * / \
+ * 0/ \
+ * hash entries = b - 0, banding = pre p0 - b,
+ * final filter = new p0 - pre p0
+ * new p0 = hash entries + banding + final filter
+ *
+ * The previous p0 will no longer be a peak since under
+ * detect_filter_construct_corruption == true, we do not release hash
+ * entries reserveration (like p0 - b' previously) until after final filter
+ * creation and post-verification
+ *
+ * kStandard128Ribbon + PartitionedFilter
+ * p3
+ * p0 /\ p4
+ * / \ p1 / \ /\
+ * / \/\ b''/ a' \
+ * b / b' \ / \
+ * / \ / \
+ * 0/ a \
+ * partitioned hash entries1 = b - 0, partitioned hash entries2 = b'' - a
+ * partitioned banding1 = p0 - b, partitioned banding2 = p3 - b''
+ * parittioned final filter1 = p1 - b',parittioned final filter2 = p4 - a'
+ *
+ * (increment p0 - 0) + (increment p1 - b')
+ * + (increment p3 - a) + (increment p4 - a')
+ * = partitioned hash entries1 + partitioned hash entries2
+ * + parittioned banding1 + parittioned banding2
+ * + parittioned final filter1 + parittioned final filter2
+ * = hash entries + banding + final filter
+ *
+ * kStandard128Ribbon + PartitionedFilter +
+ * detect_filter_construct_corruption
+ *
+ * new p3
+ * / \
+ * pre p3 \
+ * new p0 / \
+ * / \ / \
+ * pre p0 \ / \
+ * / \ b'/ \
+ * / \ / \
+ * b / \ / \
+ * / \a \
+ * 0/ \
+ * partitioned hash entries1 = b - 0, partitioned hash entries2 = b' - a
+ * partitioned banding1 = pre p0 - b, partitioned banding2 = pre p3 - b'
+ * parittioned final filter1 = new p0 - pre p0,
+ * parittioned final filter2 = new p3 - pre p3
+ *
+ * The previous p0 and p3 will no longer be a peak since under
+ * detect_filter_construct_corruption == true, we do not release hash
+ * entries reserveration (like p0 - b', p3 - a' previously) until after
+ * parittioned final filter creation and post-verification
+ *
+ * However, increments sum stay the same as shown below:
+ * (increment new p0 - 0) + (increment new p3 - a)
+ * = partitioned hash entries1 + partitioned hash entries2
+ * + parittioned banding1 + parittioned banding2
+ * + parittioned final filter1 + parittioned final filter2
+ * = hash entries + banding + final filter
+ *
+ */
+ if (!partition_filters) {
+ ASSERT_GE(
+ std::floor(
+ 1.0 * predicted_final_filter_cache_res /
+ CacheReservationManagerImpl<
+ CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()),
+ 1)
+ << "Final filter cache charging too small for this test - please "
+ "increase the number of keys";
+ if (!detect_filter_construct_corruption) {
+ EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2)
+ << "Filter construction cache charging should have 2 peaks in "
+ "case: kStandard128Ribbon + "
+ "FullFilter. "
+ "The second peak is resulted from charging the final filter "
+ "after "
+ "decreasing the hash entry reservation since the testing final "
+ "filter reservation is designed to be at least 1 dummy entry "
+ "size";
+
+ std::size_t filter_construction_cache_res_peak =
+ filter_construction_cache_res_peaks[0];
+ std::size_t predicted_filter_construction_cache_res_peak =
+ predicted_hash_entries_cache_res + predicted_banding_cache_res;
+ EXPECT_GE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 0.9);
+ EXPECT_LE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 1.1);
+ } else {
+ EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
+ << "Filter construction cache charging should have 1 peaks in "
+ "case: kStandard128Ribbon + FullFilter "
+ "+ detect_filter_construct_corruption. "
+ "The previous second peak now disappears since we don't "
+ "decrease the hash entry reservation"
+ "until after final filter reservation and post-verification";
+
+ std::size_t filter_construction_cache_res_peak =
+ filter_construction_cache_res_peaks[0];
+ std::size_t predicted_filter_construction_cache_res_peak =
+ predicted_hash_entries_cache_res + predicted_banding_cache_res +
+ predicted_final_filter_cache_res;
+ EXPECT_GE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 0.9);
+ EXPECT_LE(filter_construction_cache_res_peak,
+ predicted_filter_construction_cache_res_peak * 1.1);
+ }
+ return;
+ } else {
+ if (!detect_filter_construct_corruption) {
+ EXPECT_GE(filter_construction_cache_res_peaks.size(), 3)
+ << "Filter construction cache charging should have more than 3 "
+ "peaks "
+ "in case: kStandard128Ribbon + "
+ "PartitionedFilter";
+ } else {
+ EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
+ << "Filter construction cache charging should have more than 2 "
+ "peaks "
+ "in case: kStandard128Ribbon + "
+ "PartitionedFilter + detect_filter_construct_corruption";
+ }
+ std::size_t predicted_filter_construction_cache_res_increments_sum =
+ predicted_hash_entries_cache_res + predicted_banding_cache_res +
+ predicted_final_filter_cache_res;
+ EXPECT_GE(filter_construction_cache_res_increments_sum,
+ predicted_filter_construction_cache_res_increments_sum * 0.9);
+ EXPECT_LE(filter_construction_cache_res_increments_sum,
+ predicted_filter_construction_cache_res_increments_sum * 1.1);
+ return;
+ }
+ }
+}
+
+class DBFilterConstructionCorruptionTestWithParam
+ : public DBTestBase,
+ public testing::WithParamInterface<
+ std::tuple<bool /* detect_filter_construct_corruption */, std::string,
+ bool /* partition_filters */>> {
+ public:
+ DBFilterConstructionCorruptionTestWithParam()
+ : DBTestBase("db_bloom_filter_tests",
+ /*env_do_fsync=*/true) {}
+
+ BlockBasedTableOptions GetBlockBasedTableOptions() {
+ BlockBasedTableOptions table_options;
+ table_options.detect_filter_construct_corruption = std::get<0>(GetParam());
+ table_options.filter_policy = Create(10, std::get<1>(GetParam()));
+ table_options.partition_filters = std::get<2>(GetParam());
+ if (table_options.partition_filters) {
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ // We set table_options.metadata_block_size small enough so we can
+ // trigger filter partitioning with GetNumKey() amount of keys
+ table_options.metadata_block_size = 10;
+ }
+
+ return table_options;
+ }
+
+ // Return an appropriate amount of keys for testing
+ // to generate a long filter (i.e, size >= 8 + kMetadataLen)
+ std::size_t GetNumKey() { return 5000; }
+};
+
+INSTANTIATE_TEST_CASE_P(
+ DBFilterConstructionCorruptionTestWithParam,
+ DBFilterConstructionCorruptionTestWithParam,
+ ::testing::Values(std::make_tuple(false, kFastLocalBloom, false),
+ std::make_tuple(true, kFastLocalBloom, false),
+ std::make_tuple(true, kFastLocalBloom, true),
+ std::make_tuple(true, kStandard128Ribbon, false),
+ std::make_tuple(true, kStandard128Ribbon, true)));
+
+TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.create_if_missing = true;
+ options.disable_auto_compactions = true;
+
+ DestroyAndReopen(options);
+ int num_key = static_cast<int>(GetNumKey());
+ Status s;
+
+ // Case 1: No corruption in filter construction
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+ s = Flush();
+ EXPECT_TRUE(s.ok());
+
+ // Case 2: Corruption of hash entries in filter construction
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) {
+ std::deque<uint64_t>* hash_entries_to_corrupt =
+ (std::deque<uint64_t>*)arg;
+ assert(!hash_entries_to_corrupt->empty());
+ *(hash_entries_to_corrupt->begin()) =
+ *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 };
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ s = Flush();
+
+ if (table_options.detect_filter_construct_corruption) {
+ EXPECT_TRUE(s.IsCorruption());
+ EXPECT_TRUE(
+ s.ToString().find("Filter's hash entries checksum mismatched") !=
+ std::string::npos);
+ } else {
+ EXPECT_TRUE(s.ok());
+ }
+
+ SyncPoint::GetInstance()->DisableProcessing();
+ SyncPoint::GetInstance()->ClearCallBack(
+ "XXPH3FilterBitsBuilder::Finish::"
+ "TamperHashEntries");
+
+ // Case 3: Corruption of filter content in filter construction
+ DestroyAndReopen(options);
+
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "XXPH3FilterBitsBuilder::Finish::TamperFilter", [&](void* arg) {
+ std::pair<std::unique_ptr<char[]>*, std::size_t>* TEST_arg_pair =
+ (std::pair<std::unique_ptr<char[]>*, std::size_t>*)arg;
+ std::size_t filter_size = TEST_arg_pair->second;
+ // 5 is the kMetadataLen and
+ assert(filter_size >= 8 + 5);
+ std::unique_ptr<char[]>* filter_content_to_corrupt =
+ TEST_arg_pair->first;
+ std::memset(filter_content_to_corrupt->get(), '\0', 8);
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ s = Flush();
+
+ if (table_options.detect_filter_construct_corruption) {
+ EXPECT_TRUE(s.IsCorruption());
+ EXPECT_TRUE(s.ToString().find("Corrupted filter content") !=
+ std::string::npos);
+ } else {
+ EXPECT_TRUE(s.ok());
+ }
+
+ SyncPoint::GetInstance()->DisableProcessing();
+ SyncPoint::GetInstance()->ClearCallBack(
+ "XXPH3FilterBitsBuilder::Finish::"
+ "TamperFilter");
+}
+
+// RocksDB lite does not support dynamic options
+#ifndef ROCKSDB_LITE
+TEST_P(DBFilterConstructionCorruptionTestWithParam,
+ DynamicallyTurnOnAndOffDetectConstructCorruption) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
+ // We intend to turn on
+ // table_options.detect_filter_construct_corruption dynamically
+ // therefore we override this test parmater's value
+ table_options.detect_filter_construct_corruption = false;
+
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.create_if_missing = true;
+
+ int num_key = static_cast<int>(GetNumKey());
+ Status s;
+
+ DestroyAndReopen(options);
+
+ // Case 1: !table_options.detect_filter_construct_corruption
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) {
+ std::deque<uint64_t>* hash_entries_to_corrupt =
+ (std::deque<uint64_t>*)arg;
+ assert(!hash_entries_to_corrupt->empty());
+ *(hash_entries_to_corrupt->begin()) =
+ *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 };
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ s = Flush();
+
+ SyncPoint::GetInstance()->DisableProcessing();
+ SyncPoint::GetInstance()->ClearCallBack(
+ "XXPH3FilterBitsBuilder::Finish::"
+ "TamperHashEntries");
+
+ ASSERT_FALSE(table_options.detect_filter_construct_corruption);
+ EXPECT_TRUE(s.ok());
+
+ // Case 2: dynamically turn on
+ // table_options.detect_filter_construct_corruption
+ ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
+ "{detect_filter_construct_corruption=true;}"}}));
+
+ for (int i = 0; i < num_key; i++) {
+ ASSERT_OK(Put(Key(i), Key(i)));
+ }
+
+ SyncPoint::GetInstance()->SetCallBack(
+ "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) {
+ std::deque<uint64_t>* hash_entries_to_corrupt =
+ (std::deque<uint64_t>*)arg;
+ assert(!hash_entries_to_corrupt->empty());
+ *(hash_entries_to_corrupt->begin()) =
+ *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 };
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ s = Flush();
+
+ SyncPoint::GetInstance()->DisableProcessing();
+ SyncPoint::GetInstance()->ClearCallBack(
+ "XXPH3FilterBitsBuilder::Finish::"
+ "TamperHashEntries");
+
+ auto updated_table_options =
+ db_->GetOptions().table_factory->GetOptions<BlockBasedTableOptions>();
+ EXPECT_TRUE(updated_table_options->detect_filter_construct_corruption);
+ EXPECT_TRUE(s.IsCorruption());
+ EXPECT_TRUE(s.ToString().find("Filter's hash entries checksum mismatched") !=
+ std::string::npos);
+
+ // Case 3: dynamically turn off
+ // table_options.detect_filter_construct_corruption
+ ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
+ "{detect_filter_construct_corruption=false;}"}}));
+ updated_table_options =
+ db_->GetOptions().table_factory->GetOptions<BlockBasedTableOptions>();
+ EXPECT_FALSE(updated_table_options->detect_filter_construct_corruption);
+}
+#endif // ROCKSDB_LITE
+
+namespace {
+// NOTE: This class is referenced by HISTORY.md as a model for a wrapper
+// FilterPolicy selecting among configurations based on context.
+class LevelAndStyleCustomFilterPolicy : public FilterPolicy {
+ public:
+ explicit LevelAndStyleCustomFilterPolicy(int bpk_fifo, int bpk_l0_other,
+ int bpk_otherwise)
+ : policy_fifo_(NewBloomFilterPolicy(bpk_fifo)),
+ policy_l0_other_(NewBloomFilterPolicy(bpk_l0_other)),
+ policy_otherwise_(NewBloomFilterPolicy(bpk_otherwise)) {}
+
+ const char* Name() const override {
+ return "LevelAndStyleCustomFilterPolicy";
+ }
+
+ // OK to use built-in policy name because we are deferring to a
+ // built-in builder. We aren't changing the serialized format.
+ const char* CompatibilityName() const override {
+ return policy_fifo_->CompatibilityName();
+ }
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext& context) const override {
+ if (context.compaction_style == kCompactionStyleFIFO) {
+ return policy_fifo_->GetBuilderWithContext(context);
+ } else if (context.level_at_creation == 0) {
+ return policy_l0_other_->GetBuilderWithContext(context);
+ } else {
+ return policy_otherwise_->GetBuilderWithContext(context);
+ }
+ }
+
+ FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
+ // OK to defer to any of them; they all can parse built-in filters
+ // from any settings.
+ return policy_fifo_->GetFilterBitsReader(contents);
+ }
+
+ private:
+ const std::unique_ptr<const FilterPolicy> policy_fifo_;
+ const std::unique_ptr<const FilterPolicy> policy_l0_other_;
+ const std::unique_ptr<const FilterPolicy> policy_otherwise_;
+};
+
+static std::map<TableFileCreationReason, std::string>
+ table_file_creation_reason_to_string{
+ {TableFileCreationReason::kCompaction, "kCompaction"},
+ {TableFileCreationReason::kFlush, "kFlush"},
+ {TableFileCreationReason::kMisc, "kMisc"},
+ {TableFileCreationReason::kRecovery, "kRecovery"},
+ };
+
+class TestingContextCustomFilterPolicy
+ : public LevelAndStyleCustomFilterPolicy {
+ public:
+ explicit TestingContextCustomFilterPolicy(int bpk_fifo, int bpk_l0_other,
+ int bpk_otherwise)
+ : LevelAndStyleCustomFilterPolicy(bpk_fifo, bpk_l0_other, bpk_otherwise) {
+ }
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext& context) const override {
+ test_report_ += "cf=";
+ test_report_ += context.column_family_name;
+ test_report_ += ",s=";
+ test_report_ +=
+ OptionsHelper::compaction_style_to_string[context.compaction_style];
+ test_report_ += ",n=";
+ test_report_ += std::to_string(context.num_levels);
+ test_report_ += ",l=";
+ test_report_ += std::to_string(context.level_at_creation);
+ test_report_ += ",b=";
+ test_report_ += std::to_string(int{context.is_bottommost});
+ test_report_ += ",r=";
+ test_report_ += table_file_creation_reason_to_string[context.reason];
+ test_report_ += "\n";
+
+ return LevelAndStyleCustomFilterPolicy::GetBuilderWithContext(context);
+ }
+
+ std::string DumpTestReport() {
+ std::string rv;
+ std::swap(rv, test_report_);
+ return rv;
+ }
+
+ private:
+ mutable std::string test_report_;
+};
+} // anonymous namespace
+
+TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) {
+ auto policy = std::make_shared<TestingContextCustomFilterPolicy>(15, 8, 5);
+ Options options;
+ for (bool fifo : {true, false}) {
+ options = CurrentOptions();
+ options.max_open_files = fifo ? -1 : options.max_open_files;
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ options.compaction_style =
+ fifo ? kCompactionStyleFIFO : kCompactionStyleLevel;
+
+ BlockBasedTableOptions table_options;
+ table_options.filter_policy = policy;
+ table_options.format_version = 5;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ TryReopen(options);
+ CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options);
+
+ const int maxKey = 10000;
+ for (int i = 0; i < maxKey / 2; i++) {
+ ASSERT_OK(Put(1, Key(i), Key(i)));
+ }
+ // Add a large key to make the file contain wide range
+ ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+ Flush(1);
+ EXPECT_EQ(policy->DumpTestReport(),
+ fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n"
+ : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n");
+
+ for (int i = maxKey / 2; i < maxKey; i++) {
+ ASSERT_OK(Put(1, Key(i), Key(i)));
+ }
+ Flush(1);
+ EXPECT_EQ(policy->DumpTestReport(),
+ fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n"
+ : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n");
+
+ // Check that they can be found
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_EQ(Key(i), Get(1, Key(i)));
+ }
+ // Since we have two tables / two filters, we might have Bloom checks on
+ // our queries, but no more than one "useful" per query on a found key.
+ EXPECT_LE(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey);
+
+ // Check that we have two filters, each about
+ // fifo: 0.12% FP rate (15 bits per key)
+ // level: 2.3% FP rate (8 bits per key)
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
+ }
+ {
+ auto useful_count =
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL);
+ EXPECT_GE(useful_count, maxKey * 2 * (fifo ? 0.9980 : 0.975));
+ EXPECT_LE(useful_count, maxKey * 2 * (fifo ? 0.9995 : 0.98));
+ }
+
+ if (!fifo) { // FIFO doesn't fully support CompactRange
+ // Full compaction
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+ nullptr));
+ EXPECT_EQ(policy->DumpTestReport(),
+ "cf=bob,s=kCompactionStyleLevel,n=7,l=1,b=1,r=kCompaction\n");
+
+ // Check that we now have one filter, about 9.2% FP rate (5 bits per key)
+ for (int i = 0; i < maxKey; i++) {
+ ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
+ }
+ {
+ auto useful_count =
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL);
+ EXPECT_GE(useful_count, maxKey * 0.90);
+ EXPECT_LE(useful_count, maxKey * 0.91);
+ }
+ } else {
+#ifndef ROCKSDB_LITE
+ // Also try external SST file
+ {
+ std::string file_path = dbname_ + "/external.sst";
+ SstFileWriter sst_file_writer(EnvOptions(), options, handles_[1]);
+ ASSERT_OK(sst_file_writer.Open(file_path));
+ ASSERT_OK(sst_file_writer.Put("key", "value"));
+ ASSERT_OK(sst_file_writer.Finish());
+ }
+ // Note: kCompactionStyleLevel is default, ignored if num_levels == -1
+ EXPECT_EQ(policy->DumpTestReport(),
+ "cf=abe,s=kCompactionStyleLevel,n=-1,l=-1,b=0,r=kMisc\n");
+#endif
+ }
+
+ // Destroy
+ ASSERT_OK(dbfull()->DropColumnFamily(handles_[1]));
+ ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1]));
+ handles_[1] = nullptr;
+ }
+}
+
+class SliceTransformLimitedDomain : public SliceTransform {
+ const char* Name() const override { return "SliceTransformLimitedDomain"; }
+
+ Slice Transform(const Slice& src) const override {
+ return Slice(src.data(), 5);
+ }
+
+ bool InDomain(const Slice& src) const override {
+ // prefix will be x????
+ return src.size() >= 5 && src[0] == 'x';
+ }
+
+ bool InRange(const Slice& dst) const override {
+ // prefix will be x????
+ return dst.size() == 5 && dst[0] == 'x';
+ }
+};
+
+TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) {
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = false;
+
+ Options options = CurrentOptions();
+ options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("x1111_AAAA", "val1"));
+ ASSERT_OK(Put("x1112_AAAA", "val2"));
+ ASSERT_OK(Put("x1113_AAAA", "val3"));
+ ASSERT_OK(Put("x1114_AAAA", "val4"));
+ // Not in domain, wont be added to filter
+ ASSERT_OK(Put("zzzzz_AAAA", "val5"));
+
+ ASSERT_OK(Flush());
+
+ ASSERT_EQ(Get("x1111_AAAA"), "val1");
+ ASSERT_EQ(Get("x1112_AAAA"), "val2");
+ ASSERT_EQ(Get("x1113_AAAA"), "val3");
+ ASSERT_EQ(Get("x1114_AAAA"), "val4");
+ // Was not added to filter but rocksdb will try to read it from the filter
+ ASSERT_EQ(Get("zzzzz_AAAA"), "val5");
+}
+
+TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) {
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
+
+ Options options = CurrentOptions();
+ options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("x1113_AAAA", "val3"));
+ ASSERT_OK(Put("x1114_AAAA", "val4"));
+ // Not in domain, wont be added to filter
+ ASSERT_OK(Put("zzzzz_AAAA", "val1"));
+ ASSERT_OK(Put("zzzzz_AAAB", "val2"));
+ ASSERT_OK(Put("zzzzz_AAAC", "val3"));
+ ASSERT_OK(Put("zzzzz_AAAD", "val4"));
+
+ ASSERT_OK(Flush());
+
+ std::vector<std::string> iter_res;
+ auto iter = db_->NewIterator(ReadOptions());
+ // Seek to a key that was not in Domain
+ for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) {
+ iter_res.emplace_back(iter->value().ToString());
+ }
+
+ std::vector<std::string> expected_res = {"val1", "val2", "val3", "val4"};
+ ASSERT_EQ(iter_res, expected_res);
+ delete iter;
+}
+
+TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilter) {
+ // regression test for #2743. the range delete tombstones in memtable should
+ // be added even when Get() skips searching due to its prefix bloom filter
+ const int kMemtableSize = 1 << 20; // 1MB
+ const int kMemtablePrefixFilterSize = 1 << 13; // 8KB
+ const int kPrefixLen = 4;
+ Options options = CurrentOptions();
+ options.memtable_prefix_bloom_size_ratio =
+ static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize;
+ options.prefix_extractor.reset(
+ ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen));
+ options.write_buffer_size = kMemtableSize;
+ options.memtable_whole_key_filtering = false;
+ Reopen(options);
+ std::string key1("AAAABBBB");
+ std::string key2("AAAACCCC"); // not in DB
+ std::string key3("AAAADDDD");
+ std::string key4("AAAAEEEE");
+ std::string value1("Value1");
+ std::string value3("Value3");
+ std::string value4("Value4");
+
+ ASSERT_OK(Put(key1, value1, WriteOptions()));
+
+ // check memtable bloom stats
+ ASSERT_EQ("NOT_FOUND", Get(key2));
+ ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count);
+ // same prefix, bloom filter false positive
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count);
+
+ // enable whole key bloom filter
+ options.memtable_whole_key_filtering = true;
+ Reopen(options);
+ // check memtable bloom stats
+ ASSERT_OK(Put(key3, value3, WriteOptions()));
+ ASSERT_EQ("NOT_FOUND", Get(key2));
+ // whole key bloom filter kicks in and determines it's a miss
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count);
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count);
+
+ // verify whole key filtering does not depend on prefix_extractor
+ options.prefix_extractor.reset();
+ Reopen(options);
+ // check memtable bloom stats
+ ASSERT_OK(Put(key4, value4, WriteOptions()));
+ ASSERT_EQ("NOT_FOUND", Get(key2));
+ // whole key bloom filter kicks in and determines it's a miss
+ ASSERT_EQ(2, get_perf_context()->bloom_memtable_miss_count);
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count);
+}
+
+TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) {
+ Options options = CurrentOptions();
+ options.memtable_prefix_bloom_size_ratio = 0.015;
+ options.memtable_whole_key_filtering = true;
+ Reopen(options);
+ std::string key1("AA");
+ std::string key2("BB");
+ std::string key3("CC");
+ std::string key4("DD");
+ std::string key_not("EE");
+ std::string value1("Value1");
+ std::string value2("Value2");
+ std::string value3("Value3");
+ std::string value4("Value4");
+
+ ASSERT_OK(Put(key1, value1, WriteOptions()));
+ ASSERT_OK(Put(key2, value2, WriteOptions()));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put(key3, value3, WriteOptions()));
+ const Snapshot* snapshot = db_->GetSnapshot();
+ ASSERT_OK(Put(key4, value4, WriteOptions()));
+
+ // Delete key2 and key3
+ ASSERT_OK(
+ db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "BA", "CZ"));
+
+ // Read without snapshot
+ auto results = MultiGet({key_not, key1, key2, key3, key4});
+ ASSERT_EQ(results[0], "NOT_FOUND");
+ ASSERT_EQ(results[1], value1);
+ ASSERT_EQ(results[2], "NOT_FOUND");
+ ASSERT_EQ(results[3], "NOT_FOUND");
+ ASSERT_EQ(results[4], value4);
+
+ // Also check Get
+ ASSERT_EQ(Get(key1), value1);
+ ASSERT_EQ(Get(key2), "NOT_FOUND");
+ ASSERT_EQ(Get(key3), "NOT_FOUND");
+ ASSERT_EQ(Get(key4), value4);
+
+ // Read with snapshot
+ results = MultiGet({key_not, key1, key2, key3, key4}, snapshot);
+ ASSERT_EQ(results[0], "NOT_FOUND");
+ ASSERT_EQ(results[1], value1);
+ ASSERT_EQ(results[2], value2);
+ ASSERT_EQ(results[3], value3);
+ ASSERT_EQ(results[4], "NOT_FOUND");
+
+ // Also check Get
+ ASSERT_EQ(Get(key1, snapshot), value1);
+ ASSERT_EQ(Get(key2, snapshot), value2);
+ ASSERT_EQ(Get(key3, snapshot), value3);
+ ASSERT_EQ(Get(key4, snapshot), "NOT_FOUND");
+
+ db_->ReleaseSnapshot(snapshot);
+}
+
+TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) {
+ constexpr size_t kPrefixSize = 8;
+ const std::string kKey = "key";
+ assert(kKey.size() < kPrefixSize);
+ Options options = CurrentOptions();
+ options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize));
+ options.memtable_prefix_bloom_size_ratio = 0.25;
+ Reopen(options);
+ ASSERT_OK(Put(kKey, "v"));
+ ASSERT_EQ("v", Get(kKey));
+ std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
+ iter->Seek(kKey);
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(kKey, iter->key());
+ iter->SeekForPrev(kKey);
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(kKey, iter->key());
+}
+
+class DBBloomFilterTestVaryPrefixAndFormatVer
+ : public DBTestBase,
+ public testing::WithParamInterface<std::tuple<bool, uint32_t>> {
+ protected:
+ bool use_prefix_;
+ uint32_t format_version_;
+
+ public:
+ DBBloomFilterTestVaryPrefixAndFormatVer()
+ : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {}
+
+ ~DBBloomFilterTestVaryPrefixAndFormatVer() override {}
+
+ void SetUp() override {
+ use_prefix_ = std::get<0>(GetParam());
+ format_version_ = std::get<1>(GetParam());
+ }
+
+ static std::string UKey(uint32_t i) { return Key(static_cast<int>(i)); }
+};
+
+TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) {
+ Options options = CurrentOptions();
+ if (use_prefix_) {
+ // Entire key from UKey()
+ options.prefix_extractor.reset(NewCappedPrefixTransform(9));
+ }
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(20));
+ bbto.partition_filters = true;
+ bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ bbto.whole_key_filtering = !use_prefix_;
+ if (use_prefix_) { // (not related to prefix, just alternating between)
+ // Make sure code appropriately deals with metadata block size setting
+ // that is "too small" (smaller than minimum size for filter builder)
+ bbto.metadata_block_size = 63;
+ } else {
+ // Make sure the test will work even on platforms with large minimum
+ // filter size, due to large cache line size.
+ // (Largest cache line size + 10+% overhead.)
+ bbto.metadata_block_size = 290;
+ }
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ DestroyAndReopen(options);
+ ReadOptions ropts;
+
+ constexpr uint32_t N = 12000;
+ // Add N/2 evens
+ for (uint32_t i = 0; i < N; i += 2) {
+ ASSERT_OK(Put(UKey(i), UKey(i)));
+ }
+ ASSERT_OK(Flush());
+#ifndef ROCKSDB_LITE
+ ASSERT_EQ(TotalTableFiles(), 1);
+#endif
+
+ constexpr uint32_t Q = 29;
+ // MultiGet In
+ std::array<std::string, Q> keys;
+ std::array<Slice, Q> key_slices;
+ std::array<ColumnFamilyHandle*, Q> column_families;
+ // MultiGet Out
+ std::array<Status, Q> statuses;
+ std::array<PinnableSlice, Q> values;
+
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL);
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL);
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED);
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE);
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE);
+
+ // Check that initial clump of keys only loads one partition filter from
+ // block cache.
+ // And that spread out keys load many partition filters.
+ // In both cases, mix present vs. not present keys.
+ for (uint32_t stride : {uint32_t{1}, (N / Q) | 1}) {
+ for (uint32_t i = 0; i < Q; ++i) {
+ keys[i] = UKey(i * stride);
+ key_slices[i] = Slice(keys[i]);
+ column_families[i] = db_->DefaultColumnFamily();
+ statuses[i] = Status();
+ values[i] = PinnableSlice();
+ }
+
+ db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0],
+ /*timestamps=*/nullptr, &statuses[0], true);
+
+ // Confirm correct status results
+ uint32_t number_not_found = 0;
+ for (uint32_t i = 0; i < Q; ++i) {
+ if ((i * stride % 2) == 0) {
+ ASSERT_OK(statuses[i]);
+ } else {
+ ASSERT_TRUE(statuses[i].IsNotFound());
+ ++number_not_found;
+ }
+ }
+
+ // Confirm correct Bloom stats (no FPs)
+ uint64_t filter_useful = TestGetAndResetTickerCount(
+ options,
+ use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL);
+ uint64_t filter_checked =
+ TestGetAndResetTickerCount(options, use_prefix_
+ ? BLOOM_FILTER_PREFIX_CHECKED
+ : BLOOM_FILTER_FULL_POSITIVE) +
+ (use_prefix_ ? 0 : filter_useful);
+ EXPECT_EQ(filter_useful, number_not_found);
+ EXPECT_EQ(filter_checked, Q);
+ if (!use_prefix_) {
+ EXPECT_EQ(
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE),
+ Q - number_not_found);
+ }
+
+ // Confirm no duplicate loading same filter partition
+ uint64_t filter_accesses =
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) +
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+ if (stride == 1) {
+ EXPECT_EQ(filter_accesses, 1);
+ } else {
+ // for large stride
+ EXPECT_GE(filter_accesses, Q / 2 + 1);
+ }
+ }
+
+ // Check that a clump of keys (present and not) works when spanning
+ // two partitions
+ int found_spanning = 0;
+ for (uint32_t start = 0; start < N / 2;) {
+ for (uint32_t i = 0; i < Q; ++i) {
+ keys[i] = UKey(start + i);
+ key_slices[i] = Slice(keys[i]);
+ column_families[i] = db_->DefaultColumnFamily();
+ statuses[i] = Status();
+ values[i] = PinnableSlice();
+ }
+
+ db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0],
+ /*timestamps=*/nullptr, &statuses[0], true);
+
+ // Confirm correct status results
+ uint32_t number_not_found = 0;
+ for (uint32_t i = 0; i < Q; ++i) {
+ if (((start + i) % 2) == 0) {
+ ASSERT_OK(statuses[i]);
+ } else {
+ ASSERT_TRUE(statuses[i].IsNotFound());
+ ++number_not_found;
+ }
+ }
+
+ // Confirm correct Bloom stats (might see some FPs)
+ uint64_t filter_useful = TestGetAndResetTickerCount(
+ options,
+ use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL);
+ uint64_t filter_checked =
+ TestGetAndResetTickerCount(options, use_prefix_
+ ? BLOOM_FILTER_PREFIX_CHECKED
+ : BLOOM_FILTER_FULL_POSITIVE) +
+ (use_prefix_ ? 0 : filter_useful);
+ EXPECT_GE(filter_useful, number_not_found - 2); // possible FP
+ EXPECT_EQ(filter_checked, Q);
+ if (!use_prefix_) {
+ EXPECT_EQ(
+ TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE),
+ Q - number_not_found);
+ }
+
+ // Confirm no duplicate loading of same filter partition
+ uint64_t filter_accesses =
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) +
+ TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+ if (filter_accesses == 2) {
+ // Spanned across partitions.
+ ++found_spanning;
+ if (found_spanning >= 2) {
+ break;
+ } else {
+ // Ensure that at least once we have at least one present and
+ // one non-present key on both sides of partition boundary.
+ start += 2;
+ }
+ } else {
+ EXPECT_EQ(filter_accesses, 1);
+ // See explanation at "start += 2"
+ start += Q - 4;
+ }
+ }
+ EXPECT_TRUE(found_spanning >= 2);
+}
+
+INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer,
+ DBBloomFilterTestVaryPrefixAndFormatVer,
+ ::testing::Values(
+ // (use_prefix, format_version)
+ std::make_tuple(false, 2),
+ std::make_tuple(false, 3),
+ std::make_tuple(false, 4),
+ std::make_tuple(false, 5), std::make_tuple(true, 2),
+ std::make_tuple(true, 3), std::make_tuple(true, 4),
+ std::make_tuple(true, 5)));
+
+#ifndef ROCKSDB_LITE
+namespace {
+static const std::string kPlainTable = "test_PlainTableBloom";
+} // anonymous namespace
+
+class BloomStatsTestWithParam
+ : public DBBloomFilterTest,
+ public testing::WithParamInterface<std::tuple<std::string, bool>> {
+ public:
+ BloomStatsTestWithParam() {
+ bfp_impl_ = std::get<0>(GetParam());
+ partition_filters_ = std::get<1>(GetParam());
+
+ options_.create_if_missing = true;
+ options_.prefix_extractor.reset(
+ ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4));
+ options_.memtable_prefix_bloom_size_ratio =
+ 8.0 * 1024.0 / static_cast<double>(options_.write_buffer_size);
+ if (bfp_impl_ == kPlainTable) {
+ assert(!partition_filters_); // not supported in plain table
+ PlainTableOptions table_options;
+ options_.table_factory.reset(NewPlainTableFactory(table_options));
+ } else {
+ BlockBasedTableOptions table_options;
+ if (partition_filters_) {
+ table_options.partition_filters = partition_filters_;
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ }
+ table_options.filter_policy = Create(10, bfp_impl_);
+ options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ }
+ options_.env = env_;
+
+ get_perf_context()->Reset();
+ DestroyAndReopen(options_);
+ }
+
+ ~BloomStatsTestWithParam() override {
+ get_perf_context()->Reset();
+ Destroy(options_);
+ }
+
+ // Required if inheriting from testing::WithParamInterface<>
+ static void SetUpTestCase() {}
+ static void TearDownTestCase() {}
+
+ std::string bfp_impl_;
+ bool partition_filters_;
+ Options options_;
+};
+
+// 1 Insert 2 K-V pairs into DB
+// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2
+// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1
+// 4 Call Flush() to create SST
+// 5 Call Get() for both keys - expext SST bloom hit stat to be 2
+// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1
+// Test both: block and plain SST
+TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
+ std::string key1("AAAA");
+ std::string key2("RXDB"); // not in DB
+ std::string key3("ZBRA");
+ std::string value1("Value1");
+ std::string value3("Value3");
+
+ ASSERT_OK(Put(key1, value1, WriteOptions()));
+ ASSERT_OK(Put(key3, value3, WriteOptions()));
+
+ // check memtable bloom stats
+ ASSERT_EQ(value1, Get(key1));
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count);
+ ASSERT_EQ(value3, Get(key3));
+ ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count);
+ ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count);
+
+ ASSERT_EQ("NOT_FOUND", Get(key2));
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count);
+ ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count);
+
+ // sanity checks
+ ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count);
+ ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count);
+
+ Flush();
+
+ // sanity checks
+ ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count);
+ ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count);
+
+ // check SST bloom stats
+ ASSERT_EQ(value1, Get(key1));
+ ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count);
+ ASSERT_EQ(value3, Get(key3));
+ ASSERT_EQ(2, get_perf_context()->bloom_sst_hit_count);
+
+ ASSERT_EQ("NOT_FOUND", Get(key2));
+ ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count);
+}
+
+// Same scenario as in BloomStatsTest but using an iterator
+TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
+ std::string key1("AAAA");
+ std::string key2("RXDB"); // not in DB
+ std::string key3("ZBRA");
+ std::string value1("Value1");
+ std::string value3("Value3");
+
+ ASSERT_OK(Put(key1, value1, WriteOptions()));
+ ASSERT_OK(Put(key3, value3, WriteOptions()));
+
+ std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
+
+ // check memtable bloom stats
+ iter->Seek(key1);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(value1, iter->value().ToString());
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count);
+ ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count);
+
+ iter->Seek(key3);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(value3, iter->value().ToString());
+ ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count);
+ ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count);
+
+ iter->Seek(key2);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count);
+ ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count);
+
+ Flush();
+
+ iter.reset(dbfull()->NewIterator(ReadOptions()));
+
+ // Check SST bloom stats
+ iter->Seek(key1);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(value1, iter->value().ToString());
+ ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count);
+
+ iter->Seek(key3);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(value3, iter->value().ToString());
+ uint64_t expected_hits = 2;
+ ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
+
+ iter->Seek(key2);
+ ASSERT_OK(iter->status());
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count);
+ ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ BloomStatsTestWithParam, BloomStatsTestWithParam,
+ ::testing::Values(std::make_tuple(kLegacyBloom, false),
+ std::make_tuple(kLegacyBloom, true),
+ std::make_tuple(kFastLocalBloom, false),
+ std::make_tuple(kFastLocalBloom, true),
+ std::make_tuple(kPlainTable, false)));
+
+namespace {
+void PrefixScanInit(DBBloomFilterTest* dbtest) {
+ char buf[100];
+ std::string keystr;
+ const int small_range_sstfiles = 5;
+ const int big_range_sstfiles = 5;
+
+ // Generate 11 sst files with the following prefix ranges.
+ // GROUP 0: [0,10] (level 1)
+ // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0)
+ // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0)
+ //
+ // A seek with the previous API would do 11 random I/Os (to all the
+ // files). With the new API and a prefix filter enabled, we should
+ // only do 2 random I/O, to the 2 files containing the key.
+
+ // GROUP 0
+ snprintf(buf, sizeof(buf), "%02d______:start", 0);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ snprintf(buf, sizeof(buf), "%02d______:end", 10);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ ASSERT_OK(dbtest->Flush());
+ ASSERT_OK(dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr,
+ nullptr)); // move to level 1
+
+ // GROUP 1
+ for (int i = 1; i <= small_range_sstfiles; i++) {
+ snprintf(buf, sizeof(buf), "%02d______:start", i);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ snprintf(buf, sizeof(buf), "%02d______:end", i + 1);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ dbtest->Flush();
+ }
+
+ // GROUP 2
+ for (int i = 1; i <= big_range_sstfiles; i++) {
+ snprintf(buf, sizeof(buf), "%02d______:start", 0);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1);
+ keystr = std::string(buf);
+ ASSERT_OK(dbtest->Put(keystr, keystr));
+ dbtest->Flush();
+ }
+}
+} // anonymous namespace
+
+TEST_F(DBBloomFilterTest, PrefixScan) {
+ while (ChangeFilterOptions()) {
+ int count;
+ Slice prefix;
+ Slice key;
+ char buf[100];
+ Iterator* iter;
+ snprintf(buf, sizeof(buf), "03______:");
+ prefix = Slice(buf, 8);
+ key = Slice(buf, 9);
+ ASSERT_EQ(key.difference_offset(prefix), 8);
+ ASSERT_EQ(prefix.difference_offset(key), 8);
+ // db configs
+ env_->count_random_reads_ = true;
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(8));
+ options.disable_auto_compactions = true;
+ options.max_background_compactions = 2;
+ options.create_if_missing = true;
+ options.memtable_factory.reset(NewHashSkipListRepFactory(16));
+ assert(!options.unordered_write);
+ // It is incompatible with allow_concurrent_memtable_write=false
+ options.allow_concurrent_memtable_write = false;
+
+ BlockBasedTableOptions table_options;
+ table_options.no_block_cache = true;
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10));
+ table_options.whole_key_filtering = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ // 11 RAND I/Os
+ DestroyAndReopen(options);
+ PrefixScanInit(this);
+ count = 0;
+ env_->random_read_counter_.Reset();
+ iter = db_->NewIterator(ReadOptions());
+ for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
+ if (!iter->key().starts_with(prefix)) {
+ break;
+ }
+ count++;
+ }
+ ASSERT_OK(iter->status());
+ delete iter;
+ ASSERT_EQ(count, 2);
+ ASSERT_EQ(env_->random_read_counter_.Read(), 2);
+ Close();
+ } // end of while
+}
+
+TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
+ Options options = CurrentOptions();
+ options.write_buffer_size = 64 * 1024;
+ options.arena_block_size = 4 * 1024;
+ options.target_file_size_base = 64 * 1024;
+ options.level0_file_num_compaction_trigger = 2;
+ options.level0_slowdown_writes_trigger = 2;
+ options.level0_stop_writes_trigger = 4;
+ options.max_bytes_for_level_base = 256 * 1024;
+ options.max_write_buffer_number = 2;
+ options.max_background_compactions = 8;
+ options.max_background_flushes = 8;
+ options.compression = kNoCompression;
+ options.compaction_style = kCompactionStyleLevel;
+ options.level_compaction_dynamic_level_bytes = true;
+ BlockBasedTableOptions bbto;
+ bbto.cache_index_and_filter_blocks = true;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ options.optimize_filters_for_hits = true;
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ get_perf_context()->Reset();
+ get_perf_context()->EnablePerLevelPerfContext();
+ CreateAndReopenWithCF({"mypikachu"}, options);
+
+ int numkeys = 200000;
+
+ // Generate randomly shuffled keys, so the updates are almost
+ // random.
+ std::vector<int> keys;
+ keys.reserve(numkeys);
+ for (int i = 0; i < numkeys; i += 2) {
+ keys.push_back(i);
+ }
+ RandomShuffle(std::begin(keys), std::end(keys), /*seed*/ 42);
+ int num_inserted = 0;
+ for (int key : keys) {
+ ASSERT_OK(Put(1, Key(key), "val"));
+ if (++num_inserted % 1000 == 0) {
+ ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
+ }
+ }
+ ASSERT_OK(Put(1, Key(0), "val"));
+ ASSERT_OK(Put(1, Key(numkeys), "val"));
+ ASSERT_OK(Flush(1));
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
+
+ if (NumTableFilesAtLevel(0, 1) == 0) {
+ // No Level 0 file. Create one.
+ ASSERT_OK(Put(1, Key(0), "val"));
+ ASSERT_OK(Put(1, Key(numkeys), "val"));
+ ASSERT_OK(Flush(1));
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
+ }
+
+ for (int i = 1; i < numkeys; i += 2) {
+ ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
+ }
+
+ ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
+ ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
+ ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
+
+ // Now we have three sorted run, L0, L5 and L6 with most files in L6 have
+ // no bloom filter. Most keys be checked bloom filters twice.
+ ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2);
+ ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2);
+ uint64_t bloom_filter_useful_all_levels = 0;
+ for (auto& kv : (*(get_perf_context()->level_to_perf_context))) {
+ if (kv.second.bloom_filter_useful > 0) {
+ bloom_filter_useful_all_levels += kv.second.bloom_filter_useful;
+ }
+ }
+ ASSERT_GT(bloom_filter_useful_all_levels, 65000 * 2);
+ ASSERT_LT(bloom_filter_useful_all_levels, 120000 * 2);
+
+ for (int i = 0; i < numkeys; i += 2) {
+ ASSERT_EQ(Get(1, Key(i)), "val");
+ }
+
+ // Part 2 (read path): rewrite last level with blooms, then verify they get
+ // cached only if !optimize_filters_for_hits
+ options.disable_auto_compactions = true;
+ options.num_levels = 9;
+ options.optimize_filters_for_hits = false;
+ options.statistics = CreateDBStatistics();
+ bbto.block_cache.reset();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+ MoveFilesToLevel(7 /* level */, 1 /* column family index */);
+
+ std::string value = Get(1, Key(0));
+ uint64_t prev_cache_filter_hits =
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+ value = Get(1, Key(0));
+ ASSERT_EQ(prev_cache_filter_hits + 1,
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+ // Now that we know the filter blocks exist in the last level files, see if
+ // filter caching is skipped for this optimization
+ options.optimize_filters_for_hits = true;
+ options.statistics = CreateDBStatistics();
+ bbto.block_cache.reset();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+ value = Get(1, Key(0));
+ ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+ ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+ ASSERT_EQ(2 /* index and data block */,
+ TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+ // Check filter block ignored for files preloaded during DB::Open()
+ options.max_open_files = -1;
+ options.statistics = CreateDBStatistics();
+ bbto.block_cache.reset();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+ uint64_t prev_cache_filter_misses =
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+ prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+ Get(1, Key(0));
+ ASSERT_EQ(prev_cache_filter_misses,
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+ ASSERT_EQ(prev_cache_filter_hits,
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+ // Check filter block ignored for file trivially-moved to bottom level
+ bbto.block_cache.reset();
+ options.max_open_files = 100; // setting > -1 makes it not preload all files
+ options.statistics = CreateDBStatistics();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+ ASSERT_OK(Put(1, Key(numkeys + 1), "val"));
+ ASSERT_OK(Flush(1));
+
+ int32_t trivial_move = 0;
+ int32_t non_trivial_move = 0;
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+ "DBImpl::BackgroundCompaction:TrivialMove",
+ [&](void* /*arg*/) { trivial_move++; });
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+ "DBImpl::BackgroundCompaction:NonTrivial",
+ [&](void* /*arg*/) { non_trivial_move++; });
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+
+ CompactRangeOptions compact_options;
+ compact_options.bottommost_level_compaction =
+ BottommostLevelCompaction::kSkip;
+ compact_options.change_level = true;
+ compact_options.target_level = 7;
+ ASSERT_TRUE(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)
+ .IsNotSupported());
+
+ ASSERT_EQ(trivial_move, 1);
+ ASSERT_EQ(non_trivial_move, 0);
+
+ prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+ prev_cache_filter_misses =
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+ value = Get(1, Key(numkeys + 1));
+ ASSERT_EQ(prev_cache_filter_hits,
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+ ASSERT_EQ(prev_cache_filter_misses,
+ TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+
+ // Check filter block not cached for iterator
+ bbto.block_cache.reset();
+ options.statistics = CreateDBStatistics();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+ ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+ std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions(), handles_[1]));
+ iter->SeekToFirst();
+ ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+ ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+ ASSERT_EQ(2 /* index and data block */,
+ TestGetTickerCount(options, BLOCK_CACHE_ADD));
+ get_perf_context()->Reset();
+}
+
+int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
+ int count = 0;
+ for (iter->Seek(key); iter->Valid(); iter->Next()) {
+ count++;
+ }
+ EXPECT_OK(iter->status());
+ return count;
+}
+
+// use iterate_upper_bound to hint compatiability of existing bloom filters.
+// The BF is considered compatible if 1) upper bound and seek key transform
+// into the same string, or 2) the transformed seek key is of the same length
+// as the upper bound and two keys are adjacent according to the comparator.
+TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
+ for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
+ Options options;
+ options.create_if_missing = true;
+ options.env = CurrentOptions().env;
+ options.prefix_extractor.reset(NewCappedPrefixTransform(4));
+ options.disable_auto_compactions = true;
+ options.statistics = CreateDBStatistics();
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.cache_index_and_filter_blocks = true;
+ table_options.filter_policy = Create(10, bfp_impl);
+ table_options.index_shortening = BlockBasedTableOptions::
+ IndexShorteningMode::kShortenSeparatorsAndSuccessor;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("abcdxxx0", "val1"));
+ ASSERT_OK(Put("abcdxxx1", "val2"));
+ ASSERT_OK(Put("abcdxxx2", "val3"));
+ ASSERT_OK(Put("abcdxxx3", "val4"));
+ ASSERT_OK(dbfull()->Flush(FlushOptions()));
+ {
+ // prefix_extractor has not changed, BF will always be read
+ Slice upper_bound("abce");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abcd0000"), 4);
+ }
+ {
+ Slice upper_bound("abcdzzzz");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abcd0000"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}}));
+ ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(),
+ "rocksdb.FixedPrefix.5");
+ {
+ // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read
+ Slice upper_bound("abce");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abcdxx00"), 4);
+ // should check bloom filter since upper bound meets requirement
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ {
+ // [abcdxx01, abcey) is not valid bound since upper bound is too long for
+ // the BF in SST (capped:4)
+ Slice upper_bound("abcey");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abcdxx01"), 4);
+ // should skip bloom filter since upper bound is too long
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ {
+ // [abcdxx02, abcdy) is a valid bound since the prefix is the same
+ Slice upper_bound("abcdy");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abcdxx02"), 4);
+ // should check bloom filter since upper bound matches transformed seek
+ // key
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ {
+ // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the
+ // same prefix, 2) the prefixes are not consecutive
+ Slice upper_bound("abce");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0);
+ // should skip bloom filter since mismatch is found
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}}));
+ {
+ // [abc, abd) is not a valid bound since the upper bound is too short
+ // for BF (capped:4)
+ Slice upper_bound("abd");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abc"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ // Same with re-open
+ options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+ Reopen(options);
+ {
+ Slice upper_bound("abd");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abc"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ // Set back to capped:4 and verify BF is always read
+ options.prefix_extractor.reset(NewCappedPrefixTransform(4));
+ Reopen(options);
+ {
+ Slice upper_bound("abd");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abc"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+ }
+ // Same if there's a problem initally loading prefix transform
+ SyncPoint::GetInstance()->SetCallBack(
+ "BlockBasedTable::Open::ForceNullTablePrefixExtractor",
+ [&](void* arg) { *static_cast<bool*>(arg) = true; });
+ SyncPoint::GetInstance()->EnableProcessing();
+ Reopen(options);
+ {
+ Slice upper_bound("abd");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "abc"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
+ }
+ SyncPoint::GetInstance()->DisableProcessing();
+ }
+}
+
+// Create multiple SST files each with a different prefix_extractor config,
+// verify iterators can read all SST files using the latest config.
+TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
+ for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
+ Options options;
+ options.env = CurrentOptions().env;
+ options.create_if_missing = true;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+ options.disable_auto_compactions = true;
+ options.statistics = CreateDBStatistics();
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.filter_policy = Create(10, bfp_impl);
+ table_options.cache_index_and_filter_blocks = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ Slice upper_bound("foz90000");
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+
+ // first SST with fixed:1 BF
+ ASSERT_OK(Put("foo2", "bar2"));
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_OK(Put("foq1", "bar1"));
+ ASSERT_OK(Put("fpa", "0"));
+ dbfull()->Flush(FlushOptions());
+ std::unique_ptr<Iterator> iter_old(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_old, "foo"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1);
+
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+ ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(),
+ "rocksdb.CappedPrefix.3");
+ read_options.iterate_upper_bound = &upper_bound;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "foo"), 2);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
+ ASSERT_EQ(CountIter(iter, "gpk"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+
+ // second SST with capped:3 BF
+ ASSERT_OK(Put("foo3", "bar3"));
+ ASSERT_OK(Put("foo4", "bar4"));
+ ASSERT_OK(Put("foq5", "bar5"));
+ ASSERT_OK(Put("fpb", "1"));
+ ASSERT_OK(dbfull()->Flush(FlushOptions()));
+ {
+ // BF is cappped:3 now
+ std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_tmp, "foo"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
+ // both counters are incremented because BF is "not changed" for 1 of the
+ // 2 SST files, so filter is checked once and found no match.
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+ }
+
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}}));
+ ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(),
+ "rocksdb.FixedPrefix.2");
+ // third SST with fixed:2 BF
+ ASSERT_OK(Put("foo6", "bar6"));
+ ASSERT_OK(Put("foo7", "bar7"));
+ ASSERT_OK(Put("foq8", "bar8"));
+ ASSERT_OK(Put("fpc", "2"));
+ ASSERT_OK(dbfull()->Flush(FlushOptions()));
+ {
+ // BF is fixed:2 now
+ std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_tmp, "foo"), 9);
+ // the first and last BF are checked
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
+ ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
+ // only last BF is checked and not found
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
+ }
+
+ // iter_old can only see the first SST, so checked plus 1
+ ASSERT_EQ(CountIter(iter_old, "foo"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9);
+ // iter was created after the first setoptions call so only full filter
+ // will check the filter
+ ASSERT_EQ(CountIter(iter, "foo"), 2);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10);
+
+ {
+ // keys in all three SSTs are visible to iterator
+ // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2)
+ // so +2 for checked counter
+ std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_all, "foo"), 9);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
+ ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+ }
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+ ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(),
+ "rocksdb.CappedPrefix.3");
+ {
+ std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_all, "foo"), 6);
+ // all three SST are checked because the current options has the same as
+ // the remaining SST (capped:3)
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+ ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4);
+ }
+ // TODO(Zhongyi): Maybe also need to add Get calls to test point look up?
+ }
+}
+
+// Create a new column family in a running DB, change prefix_extractor
+// dynamically, verify the iterator created on the new column family behaves
+// as expected
+TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
+ int iteration = 0;
+ for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
+ Options options = CurrentOptions();
+ options.create_if_missing = true;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+ options.disable_auto_compactions = true;
+ options.statistics = CreateDBStatistics();
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.cache_index_and_filter_blocks = true;
+ table_options.filter_policy = Create(10, bfp_impl);
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options);
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ // create a new CF and set prefix_extractor dynamically
+ options.prefix_extractor.reset(NewCappedPrefixTransform(3));
+ CreateColumnFamilies({"ramen_dojo_" + std::to_string(iteration)}, options);
+ ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(),
+ "rocksdb.CappedPrefix.3");
+ ASSERT_OK(Put(2, "foo3", "bar3"));
+ ASSERT_OK(Put(2, "foo4", "bar4"));
+ ASSERT_OK(Put(2, "foo5", "bar5"));
+ ASSERT_OK(Put(2, "foq6", "bar6"));
+ ASSERT_OK(Put(2, "fpq7", "bar7"));
+ dbfull()->Flush(FlushOptions());
+ {
+ std::unique_ptr<Iterator> iter(
+ db_->NewIterator(read_options, handles_[2]));
+ ASSERT_EQ(CountIter(iter, "foo"), 3);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ ASSERT_OK(
+ dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}}));
+ ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(),
+ "rocksdb.FixedPrefix.2");
+ {
+ std::unique_ptr<Iterator> iter(
+ db_->NewIterator(read_options, handles_[2]));
+ ASSERT_EQ(CountIter(iter, "foo"), 4);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ ASSERT_OK(dbfull()->DropColumnFamily(handles_[2]));
+ ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[2]));
+ handles_[2] = nullptr;
+ ASSERT_OK(dbfull()->DropColumnFamily(handles_[1]));
+ ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1]));
+ handles_[1] = nullptr;
+ iteration++;
+ }
+}
+
+// Verify it's possible to change prefix_extractor at runtime and iterators
+// behaves as expected
+TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
+ for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
+ Options options;
+ options.env = CurrentOptions().env;
+ options.create_if_missing = true;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+ options.disable_auto_compactions = true;
+ options.statistics = CreateDBStatistics();
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.cache_index_and_filter_blocks = true;
+ table_options.filter_policy = Create(10, bfp_impl);
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("foo2", "bar2"));
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_OK(Put("foo1", "bar1"));
+ ASSERT_OK(Put("fpa", "0"));
+ dbfull()->Flush(FlushOptions());
+ ASSERT_OK(Put("foo3", "bar3"));
+ ASSERT_OK(Put("foo4", "bar4"));
+ ASSERT_OK(Put("foo5", "bar5"));
+ ASSERT_OK(Put("fpb", "1"));
+ dbfull()->Flush(FlushOptions());
+ ASSERT_OK(Put("foo6", "bar6"));
+ ASSERT_OK(Put("foo7", "bar7"));
+ ASSERT_OK(Put("foo8", "bar8"));
+ ASSERT_OK(Put("fpc", "2"));
+ dbfull()->Flush(FlushOptions());
+
+ ReadOptions read_options;
+ read_options.prefix_same_as_start = true;
+ {
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter, "foo"), 12);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+ std::unique_ptr<Iterator> iter_old(db_->NewIterator(read_options));
+ ASSERT_EQ(CountIter(iter_old, "foo"), 12);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
+ ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(),
+ "rocksdb.CappedPrefix.3");
+ {
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ // "fp*" should be skipped
+ ASSERT_EQ(CountIter(iter, "foo"), 9);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ }
+
+ // iterator created before should not be affected and see all keys
+ ASSERT_EQ(CountIter(iter_old, "foo"), 12);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
+ ASSERT_EQ(CountIter(iter_old, "abc"), 0);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12);
+ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
+ }
+}
+
+TEST_F(DBBloomFilterTest, SeekForPrevWithPartitionedFilters) {
+ Options options = CurrentOptions();
+ constexpr size_t kNumKeys = 10000;
+ static_assert(kNumKeys <= 10000, "kNumKeys have to be <= 10000");
+ options.memtable_factory.reset(
+ test::NewSpecialSkipListFactory(kNumKeys + 10));
+ options.create_if_missing = true;
+ constexpr size_t kPrefixLength = 4;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixLength));
+ options.compression = kNoCompression;
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(NewBloomFilterPolicy(50));
+ bbto.index_shortening =
+ BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
+ bbto.block_size = 128;
+ bbto.metadata_block_size = 128;
+ bbto.partition_filters = true;
+ bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ DestroyAndReopen(options);
+
+ const std::string value(64, '\0');
+
+ WriteOptions write_opts;
+ write_opts.disableWAL = true;
+ for (size_t i = 0; i < kNumKeys; ++i) {
+ std::ostringstream oss;
+ oss << std::setfill('0') << std::setw(4) << std::fixed << i;
+ ASSERT_OK(db_->Put(write_opts, oss.str(), value));
+ }
+ ASSERT_OK(Flush());
+
+ ReadOptions read_opts;
+ // Use legacy, implicit prefix seek
+ read_opts.total_order_seek = false;
+ read_opts.auto_prefix_mode = false;
+ std::unique_ptr<Iterator> it(db_->NewIterator(read_opts));
+ for (size_t i = 0; i < kNumKeys; ++i) {
+ // Seek with a key after each one added but with same prefix. One will
+ // surely cross a partition boundary.
+ std::ostringstream oss;
+ oss << std::setfill('0') << std::setw(4) << std::fixed << i << "a";
+ it->SeekForPrev(oss.str());
+ ASSERT_OK(it->status());
+ ASSERT_TRUE(it->Valid());
+ }
+ it.reset();
+}
+
+namespace {
+class BackwardBytewiseComparator : public Comparator {
+ public:
+ const char* Name() const override { return "BackwardBytewiseComparator"; }
+
+ int Compare(const Slice& a, const Slice& b) const override {
+ int min_size_neg = -static_cast<int>(std::min(a.size(), b.size()));
+ const char* a_end = a.data() + a.size();
+ const char* b_end = b.data() + b.size();
+ for (int i = -1; i >= min_size_neg; --i) {
+ if (a_end[i] != b_end[i]) {
+ if (static_cast<unsigned char>(a_end[i]) <
+ static_cast<unsigned char>(b_end[i])) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ }
+ return static_cast<int>(a.size()) - static_cast<int>(b.size());
+ }
+
+ void FindShortestSeparator(std::string* /*start*/,
+ const Slice& /*limit*/) const override {}
+
+ void FindShortSuccessor(std::string* /*key*/) const override {}
+};
+
+const BackwardBytewiseComparator kBackwardBytewiseComparator{};
+
+class FixedSuffix4Transform : public SliceTransform {
+ const char* Name() const override { return "FixedSuffixTransform"; }
+
+ Slice Transform(const Slice& src) const override {
+ return Slice(src.data() + src.size() - 4, 4);
+ }
+
+ bool InDomain(const Slice& src) const override { return src.size() >= 4; }
+};
+
+std::pair<uint64_t, uint64_t> GetBloomStat(const Options& options, bool sst) {
+ if (sst) {
+ return {
+ options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_CHECKED),
+ options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_USEFUL)};
+ } else {
+ auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0);
+ auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0);
+ return {hit + miss, miss};
+ }
+}
+
+std::pair<uint64_t, uint64_t> CheckedAndUseful(uint64_t checked,
+ uint64_t useful) {
+ return {checked, useful};
+}
+} // anonymous namespace
+
+// This uses a prefix_extractor + comparator combination that violates
+// one of the old obsolete, unnecessary axioms of prefix extraction:
+// * key.starts_with(prefix(key))
+// This axiom is not really needed, and we validate that here.
+TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) {
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = false;
+
+ Options options = CurrentOptions();
+ options.comparator = &kBackwardBytewiseComparator;
+ options.prefix_extractor = std::make_shared<FixedSuffix4Transform>();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ options.memtable_prefix_bloom_size_ratio = 0.1;
+ options.statistics = CreateDBStatistics();
+
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("321aaaa", "val1"));
+ ASSERT_OK(Put("112aaaa", "val2"));
+ ASSERT_OK(Put("009aaaa", "val3"));
+ ASSERT_OK(Put("baa", "val4")); // out of domain
+ ASSERT_OK(Put("321abaa", "val5"));
+ ASSERT_OK(Put("zzz", "val6")); // out of domain
+
+ for (auto flushed : {false, true}) {
+ SCOPED_TRACE("flushed=" + std::to_string(flushed));
+ if (flushed) {
+ ASSERT_OK(Flush());
+ }
+ ReadOptions read_options;
+ if (flushed) { // TODO: support auto_prefix_mode in memtable?
+ read_options.auto_prefix_mode = true;
+ }
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ {
+ Slice ub("999aaaa");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa"), 3);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ Slice ub("999abaa");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "abaa"), 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ Slice ub("999acaa");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "acaa"), 0);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1));
+ }
+ {
+ Slice ub("zzzz");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "baa"), 3);
+ if (flushed) { // TODO: fix memtable case
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ }
+ }
+}
+
+// This uses a prefix_extractor + comparator combination that violates
+// one of the old obsolete, unnecessary axioms of prefix extraction:
+// * Compare(prefix(key), key) <= 0
+// This axiom is not really needed, and we validate that here.
+TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) {
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = false;
+
+ Options options = CurrentOptions();
+ options.comparator = ReverseBytewiseComparator();
+ options.prefix_extractor.reset(NewFixedPrefixTransform(4));
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ options.memtable_prefix_bloom_size_ratio = 0.1;
+ options.statistics = CreateDBStatistics();
+
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("aaaa123", "val1"));
+ ASSERT_OK(Put("aaaa211", "val2"));
+ ASSERT_OK(Put("aaaa900", "val3"));
+ ASSERT_OK(Put("aab", "val4")); // out of domain
+ ASSERT_OK(Put("aaba123", "val5"));
+ ASSERT_OK(Put("qqqq123", "val7"));
+ ASSERT_OK(Put("qqqq", "val8"));
+ ASSERT_OK(Put("zzz", "val8")); // out of domain
+
+ for (auto flushed : {false, true}) {
+ SCOPED_TRACE("flushed=" + std::to_string(flushed));
+ if (flushed) {
+ ASSERT_OK(Flush());
+ }
+ ReadOptions read_options;
+ if (flushed) { // TODO: support auto_prefix_mode in memtable?
+ read_options.auto_prefix_mode = true;
+ } else {
+ // TODO: why needed?
+ get_perf_context()->bloom_memtable_hit_count = 0;
+ get_perf_context()->bloom_memtable_miss_count = 0;
+ }
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ {
+ Slice ub("aaaa000");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa999"), 3);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ // Note: prefix does work as upper bound
+ Slice ub("aaaa");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa999"), 3);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ // Note: prefix does not work here as seek key
+ Slice ub("aaaa500");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa"), 0);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ Slice ub("aaba000");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaba999"), 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ Slice ub("aaca000");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaca999"), 0);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1));
+ }
+ {
+ Slice ub("aaaz");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "zzz"), 5);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ {
+ // Note: prefix does work here as seek key, but only finds key equal
+ // to prefix (others with same prefix are less)
+ read_options.auto_prefix_mode = false;
+ read_options.iterate_upper_bound = nullptr;
+ read_options.prefix_same_as_start = true;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "qqqq"), 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ }
+}
+
+namespace {
+// A weird comparator that in combination with NonIdempotentFixed4Transform
+// breaks an old axiom of prefix filtering.
+class WeirdComparator : public Comparator {
+ public:
+ const char* Name() const override { return "WeirdComparator"; }
+
+ int Compare(const Slice& a, const Slice& b) const override {
+ bool a_in = a.size() >= 5;
+ bool b_in = b.size() >= 5;
+ if (a_in != b_in) {
+ // Order keys after prefixes
+ return a_in - b_in;
+ }
+ if (a_in) {
+ return BytewiseComparator()->Compare(a, b);
+ } else {
+ // Different ordering on the prefixes
+ return ReverseBytewiseComparator()->Compare(a, b);
+ }
+ }
+
+ void FindShortestSeparator(std::string* /*start*/,
+ const Slice& /*limit*/) const override {}
+
+ void FindShortSuccessor(std::string* /*key*/) const override {}
+};
+const WeirdComparator kWeirdComparator{};
+
+// Non-idempotentent because prefix is always 4 bytes, but this is
+// out-of-domain for keys to be assigned prefixes (>= 5 bytes)
+class NonIdempotentFixed4Transform : public SliceTransform {
+ const char* Name() const override { return "NonIdempotentFixed4Transform"; }
+
+ Slice Transform(const Slice& src) const override {
+ return Slice(src.data(), 4);
+ }
+
+ bool InDomain(const Slice& src) const override { return src.size() >= 5; }
+};
+} // anonymous namespace
+
+// This uses a prefix_extractor + comparator combination that violates
+// two of the old obsolete, unnecessary axioms of prefix extraction:
+// * prefix(prefix(key)) == prefix(key)
+// * If Compare(k1, k2) <= 0, then Compare(prefix(k1), prefix(k2)) <= 0
+// This axiom is not really needed, and we validate that here.
+TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) {
+ BlockBasedTableOptions bbto;
+ bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
+ bbto.whole_key_filtering = false;
+
+ Options options = CurrentOptions();
+ options.prefix_extractor = std::make_shared<NonIdempotentFixed4Transform>();
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+ options.memtable_prefix_bloom_size_ratio = 0.1;
+ options.statistics = CreateDBStatistics();
+
+ for (auto weird_comparator : {false, true}) {
+ if (weird_comparator) {
+ options.comparator = &kWeirdComparator;
+ }
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("aaaa123", "val1"));
+ ASSERT_OK(Put("aaaa211", "val2"));
+ ASSERT_OK(Put("aaaa900", "val3"));
+ ASSERT_OK(Put("aab", "val4")); // out of domain
+ ASSERT_OK(Put("aaba123", "val5"));
+ ASSERT_OK(Put("qqqq123", "val7"));
+ ASSERT_OK(Put("qqqq", "val8")); // out of domain
+ ASSERT_OK(Put("zzzz", "val8")); // out of domain
+
+ for (auto flushed : {false, true}) {
+ SCOPED_TRACE("flushed=" + std::to_string(flushed));
+ if (flushed) {
+ ASSERT_OK(Flush());
+ }
+ ReadOptions read_options;
+ if (flushed) { // TODO: support auto_prefix_mode in memtable?
+ read_options.auto_prefix_mode = true;
+ } else {
+ // TODO: why needed?
+ get_perf_context()->bloom_memtable_hit_count = 0;
+ get_perf_context()->bloom_memtable_miss_count = 0;
+ }
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ {
+ Slice ub("aaaa999");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa000"), 3);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ // Note: prefix as seek key is not bloom-optimized
+ // Note: the count works with weird_comparator because "aaaa" is
+ // ordered as the last of the prefixes
+ Slice ub("aaaa999");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaaa"), 3);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ {
+ Slice ub("aaba9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaba0"), 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ Slice ub("aaca9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aaca0"), 0);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1));
+ }
+ {
+ Slice ub("qqqq9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "qqqq0"), 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0));
+ }
+ {
+ // Note: prefix as seek key is not bloom-optimized
+ Slice ub("qqqq9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "qqqq"), weird_comparator ? 7 : 2);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ {
+ // Note: prefix as seek key is not bloom-optimized
+ Slice ub("zzzz9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "zzzz"), weird_comparator ? 8 : 1);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ {
+ Slice ub("zzzz9");
+ read_options.iterate_upper_bound = &ub;
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+ EXPECT_EQ(CountIter(iter, "aab"), weird_comparator ? 6 : 5);
+ EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0));
+ }
+ }
+ }
+}
+
+#endif // ROCKSDB_LITE
+
+} // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}