summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/block_based/filter_policy.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/table/block_based/filter_policy.cc')
-rw-r--r--src/rocksdb/table/block_based/filter_policy.cc759
1 files changed, 759 insertions, 0 deletions
diff --git a/src/rocksdb/table/block_based/filter_policy.cc b/src/rocksdb/table/block_based/filter_policy.cc
new file mode 100644
index 000000000..c8f23ee33
--- /dev/null
+++ b/src/rocksdb/table/block_based/filter_policy.cc
@@ -0,0 +1,759 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <array>
+#include <deque>
+
+#include "rocksdb/filter_policy.h"
+
+#include "rocksdb/slice.h"
+#include "table/block_based/block_based_filter_block.h"
+#include "table/block_based/full_filter_block.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "third-party/folly/folly/ConstexprMath.h"
+#include "util/bloom_impl.h"
+#include "util/coding.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// See description in FastLocalBloomImpl
+class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
+ public:
+ explicit FastLocalBloomBitsBuilder(const int millibits_per_key)
+ : millibits_per_key_(millibits_per_key),
+ num_probes_(FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_)) {
+ assert(millibits_per_key >= 1000);
+ }
+
+ // No Copy allowed
+ FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete;
+ void operator=(const FastLocalBloomBitsBuilder&) = delete;
+
+ ~FastLocalBloomBitsBuilder() override {}
+
+ virtual void AddKey(const Slice& key) override {
+ uint64_t hash = GetSliceHash64(key);
+ if (hash_entries_.empty() || hash != hash_entries_.back()) {
+ hash_entries_.push_back(hash);
+ }
+ }
+
+ virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
+ uint32_t len_with_metadata =
+ CalculateSpace(static_cast<uint32_t>(hash_entries_.size()));
+ char* data = new char[len_with_metadata];
+ memset(data, 0, len_with_metadata);
+
+ assert(data);
+ assert(len_with_metadata >= 5);
+
+ uint32_t len = len_with_metadata - 5;
+ if (len > 0) {
+ AddAllEntries(data, len);
+ }
+
+ // See BloomFilterPolicy::GetBloomBitsReader re: metadata
+ // -1 = Marker for newer Bloom implementations
+ data[len] = static_cast<char>(-1);
+ // 0 = Marker for this sub-implementation
+ data[len + 1] = static_cast<char>(0);
+ // num_probes (and 0 in upper bits for 64-byte block size)
+ data[len + 2] = static_cast<char>(num_probes_);
+ // rest of metadata stays zero
+
+ const char* const_data = data;
+ buf->reset(const_data);
+ assert(hash_entries_.empty());
+
+ return Slice(data, len_with_metadata);
+ }
+
+ int CalculateNumEntry(const uint32_t bytes) override {
+ uint32_t bytes_no_meta = bytes >= 5u ? bytes - 5u : 0;
+ return static_cast<int>(uint64_t{8000} * bytes_no_meta /
+ millibits_per_key_);
+ }
+
+ uint32_t CalculateSpace(const int num_entry) override {
+ uint32_t num_cache_lines = 0;
+ if (millibits_per_key_ > 0 && num_entry > 0) {
+ num_cache_lines = static_cast<uint32_t>(
+ (int64_t{num_entry} * millibits_per_key_ + 511999) / 512000);
+ }
+ return num_cache_lines * 64 + /*metadata*/ 5;
+ }
+
+ double EstimatedFpRate(size_t keys, size_t bytes) override {
+ return FastLocalBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5,
+ num_probes_, /*hash bits*/ 64);
+ }
+
+ private:
+ void AddAllEntries(char* data, uint32_t len) {
+ // Simple version without prefetching:
+ //
+ // for (auto h : hash_entries_) {
+ // FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len,
+ // num_probes_, data);
+ // }
+
+ const size_t num_entries = hash_entries_.size();
+ constexpr size_t kBufferMask = 7;
+ static_assert(((kBufferMask + 1) & kBufferMask) == 0,
+ "Must be power of 2 minus 1");
+
+ std::array<uint32_t, kBufferMask + 1> hashes;
+ std::array<uint32_t, kBufferMask + 1> byte_offsets;
+
+ // Prime the buffer
+ size_t i = 0;
+ for (; i <= kBufferMask && i < num_entries; ++i) {
+ uint64_t h = hash_entries_.front();
+ hash_entries_.pop_front();
+ FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
+ /*out*/ &byte_offsets[i]);
+ hashes[i] = Upper32of64(h);
+ }
+
+ // Process and buffer
+ for (; i < num_entries; ++i) {
+ uint32_t& hash_ref = hashes[i & kBufferMask];
+ uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask];
+ // Process (add)
+ FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes_,
+ data + byte_offset_ref);
+ // And buffer
+ uint64_t h = hash_entries_.front();
+ hash_entries_.pop_front();
+ FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
+ /*out*/ &byte_offset_ref);
+ hash_ref = Upper32of64(h);
+ }
+
+ // Finish processing
+ for (i = 0; i <= kBufferMask && i < num_entries; ++i) {
+ FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes_,
+ data + byte_offsets[i]);
+ }
+ }
+
+ int millibits_per_key_;
+ int num_probes_;
+ // A deque avoids unnecessary copying of already-saved values
+ // and has near-minimal peak memory use.
+ std::deque<uint64_t> hash_entries_;
+};
+
+// See description in FastLocalBloomImpl
+class FastLocalBloomBitsReader : public FilterBitsReader {
+ public:
+ FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes)
+ : data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {}
+
+ // No Copy allowed
+ FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete;
+ void operator=(const FastLocalBloomBitsReader&) = delete;
+
+ ~FastLocalBloomBitsReader() override {}
+
+ bool MayMatch(const Slice& key) override {
+ uint64_t h = GetSliceHash64(key);
+ uint32_t byte_offset;
+ FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
+ /*out*/ &byte_offset);
+ return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_,
+ data_ + byte_offset);
+ }
+
+ virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
+ std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+ std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+ for (int i = 0; i < num_keys; ++i) {
+ uint64_t h = GetSliceHash64(*keys[i]);
+ FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
+ /*out*/ &byte_offsets[i]);
+ hashes[i] = Upper32of64(h);
+ }
+ for (int i = 0; i < num_keys; ++i) {
+ may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared(
+ hashes[i], num_probes_, data_ + byte_offsets[i]);
+ }
+ }
+
+ private:
+ const char* data_;
+ const int num_probes_;
+ const uint32_t len_bytes_;
+};
+
+using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
+
+class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
+ public:
+ explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log);
+
+ // No Copy allowed
+ LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete;
+ void operator=(const LegacyBloomBitsBuilder&) = delete;
+
+ ~LegacyBloomBitsBuilder() override;
+
+ void AddKey(const Slice& key) override;
+
+ Slice Finish(std::unique_ptr<const char[]>* buf) override;
+
+ int CalculateNumEntry(const uint32_t bytes) override;
+
+ uint32_t CalculateSpace(const int num_entry) override {
+ uint32_t dont_care1;
+ uint32_t dont_care2;
+ return CalculateSpace(num_entry, &dont_care1, &dont_care2);
+ }
+
+ double EstimatedFpRate(size_t keys, size_t bytes) override {
+ return LegacyBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5,
+ num_probes_);
+ }
+
+ private:
+ int bits_per_key_;
+ int num_probes_;
+ std::vector<uint32_t> hash_entries_;
+ Logger* info_log_;
+
+ // Get totalbits that optimized for cpu cache line
+ uint32_t GetTotalBitsForLocality(uint32_t total_bits);
+
+ // Reserve space for new filter
+ char* ReserveSpace(const int num_entry, uint32_t* total_bits,
+ uint32_t* num_lines);
+
+ // Implementation-specific variant of public CalculateSpace
+ uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
+ uint32_t* num_lines);
+
+ // Assuming single threaded access to this function.
+ void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
+};
+
+LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key,
+ Logger* info_log)
+ : bits_per_key_(bits_per_key),
+ num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)),
+ info_log_(info_log) {
+ assert(bits_per_key_);
+}
+
+LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {}
+
+void LegacyBloomBitsBuilder::AddKey(const Slice& key) {
+ uint32_t hash = BloomHash(key);
+ if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
+ hash_entries_.push_back(hash);
+ }
+}
+
+Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
+ uint32_t total_bits, num_lines;
+ size_t num_entries = hash_entries_.size();
+ char* data =
+ ReserveSpace(static_cast<int>(num_entries), &total_bits, &num_lines);
+ assert(data);
+
+ if (total_bits != 0 && num_lines != 0) {
+ for (auto h : hash_entries_) {
+ AddHash(h, data, num_lines, total_bits);
+ }
+
+ // Check for excessive entries for 32-bit hash function
+ if (num_entries >= /* minimum of 3 million */ 3000000U) {
+ // More specifically, we can detect that the 32-bit hash function
+ // is causing significant increase in FP rate by comparing current
+ // estimated FP rate to what we would get with a normal number of
+ // keys at same memory ratio.
+ double est_fp_rate = LegacyBloomImpl::EstimatedFpRate(
+ num_entries, total_bits / 8, num_probes_);
+ double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate(
+ 1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_);
+
+ if (est_fp_rate >= 1.50 * vs_fp_rate) {
+ // For more details, see
+ // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+ ROCKS_LOG_WARN(
+ info_log_,
+ "Using legacy SST/BBT Bloom filter with excessive key count "
+ "(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. "
+ "Consider using new Bloom with format_version>=5, smaller SST "
+ "file size, or partitioned filters.",
+ num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate);
+ }
+ }
+ }
+ // See BloomFilterPolicy::GetFilterBitsReader for metadata
+ data[total_bits / 8] = static_cast<char>(num_probes_);
+ EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines));
+
+ const char* const_data = data;
+ buf->reset(const_data);
+ hash_entries_.clear();
+
+ return Slice(data, total_bits / 8 + 5);
+}
+
+uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
+ uint32_t num_lines =
+ (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
+
+ // Make num_lines an odd number to make sure more bits are involved
+ // when determining which block.
+ if (num_lines % 2 == 0) {
+ num_lines++;
+ }
+ return num_lines * (CACHE_LINE_SIZE * 8);
+}
+
+uint32_t LegacyBloomBitsBuilder::CalculateSpace(const int num_entry,
+ uint32_t* total_bits,
+ uint32_t* num_lines) {
+ assert(bits_per_key_);
+ if (num_entry != 0) {
+ uint32_t total_bits_tmp = static_cast<uint32_t>(num_entry * bits_per_key_);
+
+ *total_bits = GetTotalBitsForLocality(total_bits_tmp);
+ *num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
+ assert(*total_bits > 0 && *total_bits % 8 == 0);
+ } else {
+ // filter is empty, just leave space for metadata
+ *total_bits = 0;
+ *num_lines = 0;
+ }
+
+ // Reserve space for Filter
+ uint32_t sz = *total_bits / 8;
+ sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
+ return sz;
+}
+
+char* LegacyBloomBitsBuilder::ReserveSpace(const int num_entry,
+ uint32_t* total_bits,
+ uint32_t* num_lines) {
+ uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
+ char* data = new char[sz];
+ memset(data, 0, sz);
+ return data;
+}
+
+int LegacyBloomBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
+ assert(bits_per_key_);
+ assert(bytes > 0);
+ int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
+ int low = 1;
+ int n = high;
+ for (; n >= low; n--) {
+ if (CalculateSpace(n) <= bytes) {
+ break;
+ }
+ }
+ assert(n < high); // High should be an overestimation
+ return n;
+}
+
+inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data,
+ uint32_t num_lines,
+ uint32_t total_bits) {
+#ifdef NDEBUG
+ static_cast<void>(total_bits);
+#endif
+ assert(num_lines > 0 && total_bits > 0);
+
+ LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data,
+ folly::constexpr_log2(CACHE_LINE_SIZE));
+}
+
+class LegacyBloomBitsReader : public FilterBitsReader {
+ public:
+ LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines,
+ uint32_t log2_cache_line_size)
+ : data_(data),
+ num_probes_(num_probes),
+ num_lines_(num_lines),
+ log2_cache_line_size_(log2_cache_line_size) {}
+
+ // No Copy allowed
+ LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete;
+ void operator=(const LegacyBloomBitsReader&) = delete;
+
+ ~LegacyBloomBitsReader() override {}
+
+ // "contents" contains the data built by a preceding call to
+ // FilterBitsBuilder::Finish. MayMatch must return true if the key was
+ // passed to FilterBitsBuilder::AddKey. This method may return true or false
+ // if the key was not on the list, but it should aim to return false with a
+ // high probability.
+ bool MayMatch(const Slice& key) override {
+ uint32_t hash = BloomHash(key);
+ uint32_t byte_offset;
+ LegacyBloomImpl::PrepareHashMayMatch(
+ hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_);
+ return LegacyBloomImpl::HashMayMatchPrepared(
+ hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
+ }
+
+ virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
+ std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+ std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+ for (int i = 0; i < num_keys; ++i) {
+ hashes[i] = BloomHash(*keys[i]);
+ LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_,
+ /*out*/ &byte_offsets[i],
+ log2_cache_line_size_);
+ }
+ for (int i = 0; i < num_keys; ++i) {
+ may_match[i] = LegacyBloomImpl::HashMayMatchPrepared(
+ hashes[i], num_probes_, data_ + byte_offsets[i],
+ log2_cache_line_size_);
+ }
+ }
+
+ private:
+ const char* data_;
+ const int num_probes_;
+ const uint32_t num_lines_;
+ const uint32_t log2_cache_line_size_;
+};
+
+class AlwaysTrueFilter : public FilterBitsReader {
+ public:
+ bool MayMatch(const Slice&) override { return true; }
+ using FilterBitsReader::MayMatch; // inherit overload
+};
+
+class AlwaysFalseFilter : public FilterBitsReader {
+ public:
+ bool MayMatch(const Slice&) override { return false; }
+ using FilterBitsReader::MayMatch; // inherit overload
+};
+
+} // namespace
+
+const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllFixedImpls = {
+ kLegacyBloom,
+ kDeprecatedBlock,
+ kFastLocalBloom,
+};
+
+const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
+ kDeprecatedBlock,
+ kAuto,
+};
+
+BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
+ : mode_(mode), warned_(false) {
+ // Sanitize bits_per_key
+ if (bits_per_key < 1.0) {
+ bits_per_key = 1.0;
+ } else if (!(bits_per_key < 100.0)) { // including NaN
+ bits_per_key = 100.0;
+ }
+
+ // Includes a nudge toward rounding up, to ensure on all platforms
+ // that doubles specified with three decimal digits after the decimal
+ // point are interpreted accurately.
+ millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001);
+
+ // For better or worse, this is a rounding up of a nudged rounding up,
+ // e.g. 7.4999999999999 will round up to 8, but that provides more
+ // predictability against small arithmetic errors in floating point.
+ whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
+}
+
+BloomFilterPolicy::~BloomFilterPolicy() {}
+
+const char* BloomFilterPolicy::Name() const {
+ return "rocksdb.BuiltinBloomFilter";
+}
+
+void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
+ std::string* dst) const {
+ // We should ideally only be using this deprecated interface for
+ // appropriately constructed BloomFilterPolicy
+ assert(mode_ == kDeprecatedBlock);
+
+ // Compute bloom filter size (in both bits and bytes)
+ uint32_t bits = static_cast<uint32_t>(n * whole_bits_per_key_);
+
+ // For small n, we can see a very high false positive rate. Fix it
+ // by enforcing a minimum bloom filter length.
+ if (bits < 64) bits = 64;
+
+ uint32_t bytes = (bits + 7) / 8;
+ bits = bytes * 8;
+
+ int num_probes =
+ LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_);
+
+ const size_t init_size = dst->size();
+ dst->resize(init_size + bytes, 0);
+ dst->push_back(static_cast<char>(num_probes)); // Remember # of probes
+ char* array = &(*dst)[init_size];
+ for (int i = 0; i < n; i++) {
+ LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes,
+ array);
+ }
+}
+
+bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
+ const Slice& bloom_filter) const {
+ const size_t len = bloom_filter.size();
+ if (len < 2 || len > 0xffffffffU) {
+ return false;
+ }
+
+ const char* array = bloom_filter.data();
+ const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
+
+ // Use the encoded k so that we can read filters generated by
+ // bloom filters created using different parameters.
+ const int k = static_cast<uint8_t>(array[len - 1]);
+ if (k > 30) {
+ // Reserved for potentially new encodings for short bloom filters.
+ // Consider it a match.
+ return true;
+ }
+ // NB: using stored k not num_probes for whole_bits_per_key_
+ return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k,
+ array);
+}
+
+FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const {
+ // This code path should no longer be used, for the built-in
+ // BloomFilterPolicy. Internal to RocksDB and outside
+ // BloomFilterPolicy, only get a FilterBitsBuilder with
+ // BloomFilterPolicy::GetBuilderFromContext(), which will call
+ // BloomFilterPolicy::GetBuilderWithContext(). RocksDB users have
+ // been warned (HISTORY.md) that they can no longer call this on
+ // the built-in BloomFilterPolicy (unlikely).
+ assert(false);
+ return GetBuilderWithContext(FilterBuildingContext(BlockBasedTableOptions()));
+}
+
+FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
+ const FilterBuildingContext& context) const {
+ Mode cur = mode_;
+ // Unusual code construction so that we can have just
+ // one exhaustive switch without (risky) recursion
+ for (int i = 0; i < 2; ++i) {
+ switch (cur) {
+ case kAuto:
+ if (context.table_options.format_version < 5) {
+ cur = kLegacyBloom;
+ } else {
+ cur = kFastLocalBloom;
+ }
+ break;
+ case kDeprecatedBlock:
+ return nullptr;
+ case kFastLocalBloom:
+ return new FastLocalBloomBitsBuilder(millibits_per_key_);
+ case kLegacyBloom:
+ if (whole_bits_per_key_ >= 14 && context.info_log &&
+ !warned_.load(std::memory_order_relaxed)) {
+ warned_ = true;
+ const char* adjective;
+ if (whole_bits_per_key_ >= 20) {
+ adjective = "Dramatic";
+ } else {
+ adjective = "Significant";
+ }
+ // For more details, see
+ // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
+ ROCKS_LOG_WARN(
+ context.info_log,
+ "Using legacy Bloom filter with high (%d) bits/key. "
+ "%s filter space and/or accuracy improvement is available "
+ "with format_version>=5.",
+ whole_bits_per_key_, adjective);
+ }
+ return new LegacyBloomBitsBuilder(whole_bits_per_key_,
+ context.info_log);
+ }
+ }
+ assert(false);
+ return nullptr; // something legal
+}
+
+FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
+ const FilterBuildingContext& context) {
+ if (context.table_options.filter_policy) {
+ return context.table_options.filter_policy->GetBuilderWithContext(context);
+ } else {
+ return nullptr;
+ }
+}
+
+// Read metadata to determine what kind of FilterBitsReader is needed
+// and return a new one.
+FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader(
+ const Slice& contents) const {
+ uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
+ if (len_with_meta <= 5) {
+ // filter is empty or broken. Treat like zero keys added.
+ return new AlwaysFalseFilter();
+ }
+
+ // Legacy Bloom filter data:
+ // 0 +-----------------------------------+
+ // | Raw Bloom filter data |
+ // | ... |
+ // len +-----------------------------------+
+ // | byte for num_probes or |
+ // | marker for new implementations |
+ // len+1 +-----------------------------------+
+ // | four bytes for number of cache |
+ // | lines |
+ // len_with_meta +-----------------------------------+
+
+ int8_t raw_num_probes =
+ static_cast<int8_t>(contents.data()[len_with_meta - 5]);
+ // NB: *num_probes > 30 and < 128 probably have not been used, because of
+ // BloomFilterPolicy::initialize, unless directly calling
+ // LegacyBloomBitsBuilder as an API, but we are leaving those cases in
+ // limbo with LegacyBloomBitsReader for now.
+
+ if (raw_num_probes < 1) {
+ // Note: < 0 (or unsigned > 127) indicate special new implementations
+ // (or reserved for future use)
+ if (raw_num_probes == -1) {
+ // Marker for newer Bloom implementations
+ return GetBloomBitsReader(contents);
+ }
+ // otherwise
+ // Treat as zero probes (always FP) for now.
+ return new AlwaysTrueFilter();
+ }
+ // else attempt decode for LegacyBloomBitsReader
+
+ int num_probes = raw_num_probes;
+ assert(num_probes >= 1);
+ assert(num_probes <= 127);
+
+ uint32_t len = len_with_meta - 5;
+ assert(len > 0);
+
+ uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4);
+ uint32_t log2_cache_line_size;
+
+ if (num_lines * CACHE_LINE_SIZE == len) {
+ // Common case
+ log2_cache_line_size = folly::constexpr_log2(CACHE_LINE_SIZE);
+ } else if (num_lines == 0 || len % num_lines != 0) {
+ // Invalid (no solution to num_lines * x == len)
+ // Treat as zero probes (always FP) for now.
+ return new AlwaysTrueFilter();
+ } else {
+ // Determine the non-native cache line size (from another system)
+ log2_cache_line_size = 0;
+ while ((num_lines << log2_cache_line_size) < len) {
+ ++log2_cache_line_size;
+ }
+ if ((num_lines << log2_cache_line_size) != len) {
+ // Invalid (block size not a power of two)
+ // Treat as zero probes (always FP) for now.
+ return new AlwaysTrueFilter();
+ }
+ }
+ // if not early return
+ return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines,
+ log2_cache_line_size);
+}
+
+// For newer Bloom filter implementations
+FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader(
+ const Slice& contents) const {
+ uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
+ uint32_t len = len_with_meta - 5;
+
+ assert(len > 0); // precondition
+
+ // New Bloom filter data:
+ // 0 +-----------------------------------+
+ // | Raw Bloom filter data |
+ // | ... |
+ // len +-----------------------------------+
+ // | char{-1} byte -> new Bloom filter |
+ // len+1 +-----------------------------------+
+ // | byte for subimplementation |
+ // | 0: FastLocalBloom |
+ // | other: reserved |
+ // len+2 +-----------------------------------+
+ // | byte for block_and_probes |
+ // | 0 in top 3 bits -> 6 -> 64-byte |
+ // | reserved: |
+ // | 1 in top 3 bits -> 7 -> 128-byte|
+ // | 2 in top 3 bits -> 8 -> 256-byte|
+ // | ... |
+ // | num_probes in bottom 5 bits, |
+ // | except 0 and 31 reserved |
+ // len+3 +-----------------------------------+
+ // | two bytes reserved |
+ // | possibly for hash seed |
+ // len_with_meta +-----------------------------------+
+
+ // Read more metadata (see above)
+ char sub_impl_val = contents.data()[len_with_meta - 4];
+ char block_and_probes = contents.data()[len_with_meta - 3];
+ int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6;
+
+ int num_probes = (block_and_probes & 31);
+ if (num_probes < 1 || num_probes > 30) {
+ // Reserved / future safe
+ return new AlwaysTrueFilter();
+ }
+
+ uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2);
+ if (rest != 0) {
+ // Reserved, possibly for hash seed
+ // Future safe
+ return new AlwaysTrueFilter();
+ }
+
+ if (sub_impl_val == 0) { // FastLocalBloom
+ if (log2_block_bytes == 6) { // Only block size supported for now
+ return new FastLocalBloomBitsReader(contents.data(), num_probes, len);
+ }
+ }
+ // otherwise
+ // Reserved / future safe
+ return new AlwaysTrueFilter();
+}
+
+const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
+ bool use_block_based_builder) {
+ BloomFilterPolicy::Mode m;
+ if (use_block_based_builder) {
+ m = BloomFilterPolicy::kDeprecatedBlock;
+ } else {
+ m = BloomFilterPolicy::kAuto;
+ }
+ assert(std::find(BloomFilterPolicy::kAllUserModes.begin(),
+ BloomFilterPolicy::kAllUserModes.end(),
+ m) != BloomFilterPolicy::kAllUserModes.end());
+ return new BloomFilterPolicy(bits_per_key, m);
+}
+
+FilterBuildingContext::FilterBuildingContext(
+ const BlockBasedTableOptions& _table_options)
+ : table_options(_table_options) {}
+
+FilterPolicy::~FilterPolicy() { }
+
+} // namespace ROCKSDB_NAMESPACE