diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/table/block_based/filter_policy.cc | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/rocksdb/table/block_based/filter_policy.cc | 1973 |
1 files changed, 1973 insertions, 0 deletions
diff --git a/src/rocksdb/table/block_based/filter_policy.cc b/src/rocksdb/table/block_based/filter_policy.cc new file mode 100644 index 000000000..f84f804dd --- /dev/null +++ b/src/rocksdb/table/block_based/filter_policy.cc @@ -0,0 +1,1973 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "rocksdb/filter_policy.h" + +#include <array> +#include <climits> +#include <cstring> +#include <deque> +#include <limits> +#include <memory> + +#include "cache/cache_entry_roles.h" +#include "cache/cache_reservation_manager.h" +#include "logging/logging.h" +#include "port/lang.h" +#include "rocksdb/convenience.h" +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/slice.h" +#include "rocksdb/utilities/object_registry.h" +#include "table/block_based/block_based_table_reader.h" +#include "table/block_based/filter_policy_internal.h" +#include "table/block_based/full_filter_block.h" +#include "util/bloom_impl.h" +#include "util/coding.h" +#include "util/hash.h" +#include "util/math.h" +#include "util/ribbon_config.h" +#include "util/ribbon_impl.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { + +namespace { + +// Metadata trailer size for built-in filters. (This is separate from +// block-based table block trailer.) +// +// Originally this was 1 byte for num_probes and 4 bytes for number of +// cache lines in the Bloom filter, but now the first trailer byte is +// usually an implementation marker and remaining 4 bytes have various +// meanings. +static constexpr uint32_t kMetadataLen = 5; + +Slice FinishAlwaysFalse(std::unique_ptr<const char[]>* /*buf*/) { + // Missing metadata, treated as zero entries + return Slice(nullptr, 0); +} + +Slice FinishAlwaysTrue(std::unique_ptr<const char[]>* /*buf*/) { + return Slice("\0\0\0\0\0\0", 6); +} + +// Base class for filter builders using the XXH3 preview hash, +// also known as Hash64 or GetSliceHash64. +class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder { + public: + explicit XXPH3FilterBitsBuilder( + std::atomic<int64_t>* aggregate_rounding_balance, + std::shared_ptr<CacheReservationManager> cache_res_mgr, + bool detect_filter_construct_corruption) + : aggregate_rounding_balance_(aggregate_rounding_balance), + cache_res_mgr_(cache_res_mgr), + detect_filter_construct_corruption_( + detect_filter_construct_corruption) {} + + ~XXPH3FilterBitsBuilder() override {} + + virtual void AddKey(const Slice& key) override { + uint64_t hash = GetSliceHash64(key); + // Especially with prefixes, it is common to have repetition, + // though only adjacent repetition, which we want to immediately + // recognize and collapse for estimating true filter space + // requirements. + if (hash_entries_info_.entries.empty() || + hash != hash_entries_info_.entries.back()) { + if (detect_filter_construct_corruption_) { + hash_entries_info_.xor_checksum ^= hash; + } + hash_entries_info_.entries.push_back(hash); + if (cache_res_mgr_ && + // Traditional rounding to whole bucket size + ((hash_entries_info_.entries.size() % + kUint64tHashEntryCacheResBucketSize) == + kUint64tHashEntryCacheResBucketSize / 2)) { + hash_entries_info_.cache_res_bucket_handles.emplace_back(nullptr); + Status s = cache_res_mgr_->MakeCacheReservation( + kUint64tHashEntryCacheResBucketSize * sizeof(hash), + &hash_entries_info_.cache_res_bucket_handles.back()); + s.PermitUncheckedError(); + } + } + } + + virtual size_t EstimateEntriesAdded() override { + return hash_entries_info_.entries.size(); + } + + virtual Status MaybePostVerify(const Slice& filter_content) override; + + protected: + static constexpr uint32_t kMetadataLen = 5; + + // Number of hash entries to accumulate before charging their memory usage to + // the cache when cache charging is available + static const std::size_t kUint64tHashEntryCacheResBucketSize = + CacheReservationManagerImpl< + CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / + sizeof(uint64_t); + + // For delegating between XXPH3FilterBitsBuilders + void SwapEntriesWith(XXPH3FilterBitsBuilder* other) { + assert(other != nullptr); + hash_entries_info_.Swap(&(other->hash_entries_info_)); + } + + void ResetEntries() { hash_entries_info_.Reset(); } + + virtual size_t RoundDownUsableSpace(size_t available_size) = 0; + + // To choose size using malloc_usable_size, we have to actually allocate. + size_t AllocateMaybeRounding(size_t target_len_with_metadata, + size_t num_entries, + std::unique_ptr<char[]>* buf) { + // Return value set to a default; overwritten in some cases + size_t rv = target_len_with_metadata; +#ifdef ROCKSDB_MALLOC_USABLE_SIZE + if (aggregate_rounding_balance_ != nullptr) { + // Do optimize_filters_for_memory, using malloc_usable_size. + // Approach: try to keep FP rate balance better than or on + // target (negative aggregate_rounding_balance_). We can then select a + // lower bound filter size (within reasonable limits) that gets us as + // close to on target as possible. We request allocation for that filter + // size and use malloc_usable_size to "round up" to the actual + // allocation size. + + // Although it can be considered bad practice to use malloc_usable_size + // to access an object beyond its original size, this approach should be + // quite general: working for all allocators that properly support + // malloc_usable_size. + + // Race condition on balance is OK because it can only cause temporary + // skew in rounding up vs. rounding down, as long as updates are atomic + // and relative. + int64_t balance = aggregate_rounding_balance_->load(); + + double target_fp_rate = + EstimatedFpRate(num_entries, target_len_with_metadata); + double rv_fp_rate = target_fp_rate; + + if (balance < 0) { + // See formula for BloomFilterPolicy::aggregate_rounding_balance_ + double for_balance_fp_rate = + -balance / double{0x100000000} + target_fp_rate; + + // To simplify, we just try a few modified smaller sizes. This also + // caps how much we vary filter size vs. target, to avoid outlier + // behavior from excessive variance. + size_t target_len = target_len_with_metadata - kMetadataLen; + assert(target_len < target_len_with_metadata); // check underflow + for (uint64_t maybe_len_rough : + {uint64_t{3} * target_len / 4, uint64_t{13} * target_len / 16, + uint64_t{7} * target_len / 8, uint64_t{15} * target_len / 16}) { + size_t maybe_len_with_metadata = + RoundDownUsableSpace(maybe_len_rough + kMetadataLen); + double maybe_fp_rate = + EstimatedFpRate(num_entries, maybe_len_with_metadata); + if (maybe_fp_rate <= for_balance_fp_rate) { + rv = maybe_len_with_metadata; + rv_fp_rate = maybe_fp_rate; + break; + } + } + } + + // Filter blocks are loaded into block cache with their block trailer. + // We need to make sure that's accounted for in choosing a + // fragmentation-friendly size. + const size_t kExtraPadding = BlockBasedTable::kBlockTrailerSize; + size_t requested = rv + kExtraPadding; + + // Allocate and get usable size + buf->reset(new char[requested]); + size_t usable = malloc_usable_size(buf->get()); + + if (usable - usable / 4 > requested) { + // Ratio greater than 4/3 is too much for utilizing, if it's + // not a buggy or mislinked malloc_usable_size implementation. + // Non-linearity of FP rates with bits/key means rapidly + // diminishing returns in overall accuracy for additional + // storage on disk. + // Nothing to do, except assert that the result is accurate about + // the usable size. (Assignment never used.) + assert(((*buf)[usable - 1] = 'x')); + } else if (usable > requested) { + rv = RoundDownUsableSpace(usable - kExtraPadding); + assert(rv <= usable - kExtraPadding); + rv_fp_rate = EstimatedFpRate(num_entries, rv); + } else { + // Too small means bad malloc_usable_size + assert(usable == requested); + } + memset(buf->get(), 0, rv); + + // Update balance + int64_t diff = static_cast<int64_t>((rv_fp_rate - target_fp_rate) * + double{0x100000000}); + *aggregate_rounding_balance_ += diff; + } else { + buf->reset(new char[rv]()); + } +#else + (void)num_entries; + buf->reset(new char[rv]()); +#endif // ROCKSDB_MALLOC_USABLE_SIZE + return rv; + } + + // TODO: Ideally we want to verify the hash entry + // as it is added to the filter and eliminate this function + // for speeding up and leaving fewer spaces for undetected memory/CPU + // corruption. For Ribbon Filter, it's bit harder. + // Possible solution: + // pass a custom iterator that tracks the xor checksum as + // it iterates to ResetAndFindSeedToSolve + Status MaybeVerifyHashEntriesChecksum() { + if (!detect_filter_construct_corruption_) { + return Status::OK(); + } + + uint64_t actual_hash_entries_xor_checksum = 0; + for (uint64_t h : hash_entries_info_.entries) { + actual_hash_entries_xor_checksum ^= h; + } + + if (actual_hash_entries_xor_checksum == hash_entries_info_.xor_checksum) { + return Status::OK(); + } else { + // Since these hash entries are corrupted and they will not be used + // anymore, we can reset them and release memory. + ResetEntries(); + return Status::Corruption("Filter's hash entries checksum mismatched"); + } + } + + // See BloomFilterPolicy::aggregate_rounding_balance_. If nullptr, + // always "round up" like historic behavior. + std::atomic<int64_t>* aggregate_rounding_balance_; + + // For reserving memory used in (new) Bloom and Ribbon Filter construction + std::shared_ptr<CacheReservationManager> cache_res_mgr_; + + // For managing cache charge for final filter in (new) Bloom and Ribbon + // Filter construction + std::deque<std::unique_ptr<CacheReservationManager::CacheReservationHandle>> + final_filter_cache_res_handles_; + + bool detect_filter_construct_corruption_; + + struct HashEntriesInfo { + // A deque avoids unnecessary copying of already-saved values + // and has near-minimal peak memory use. + std::deque<uint64_t> entries; + + // If cache_res_mgr_ != nullptr, + // it manages cache charge for buckets of hash entries in (new) Bloom + // or Ribbon Filter construction. + // Otherwise, it is empty. + std::deque<std::unique_ptr<CacheReservationManager::CacheReservationHandle>> + cache_res_bucket_handles; + + // If detect_filter_construct_corruption_ == true, + // it records the xor checksum of hash entries. + // Otherwise, it is 0. + uint64_t xor_checksum = 0; + + void Swap(HashEntriesInfo* other) { + assert(other != nullptr); + std::swap(entries, other->entries); + std::swap(cache_res_bucket_handles, other->cache_res_bucket_handles); + std::swap(xor_checksum, other->xor_checksum); + } + + void Reset() { + entries.clear(); + cache_res_bucket_handles.clear(); + xor_checksum = 0; + } + }; + + HashEntriesInfo hash_entries_info_; +}; + +// #################### FastLocalBloom implementation ################## // +// ############## also known as format_version=5 Bloom filter ########## // + +// See description in FastLocalBloomImpl +class FastLocalBloomBitsBuilder : public XXPH3FilterBitsBuilder { + public: + // Non-null aggregate_rounding_balance implies optimize_filters_for_memory + explicit FastLocalBloomBitsBuilder( + const int millibits_per_key, + std::atomic<int64_t>* aggregate_rounding_balance, + std::shared_ptr<CacheReservationManager> cache_res_mgr, + bool detect_filter_construct_corruption) + : XXPH3FilterBitsBuilder(aggregate_rounding_balance, cache_res_mgr, + detect_filter_construct_corruption), + millibits_per_key_(millibits_per_key) { + assert(millibits_per_key >= 1000); + } + + // No Copy allowed + FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete; + void operator=(const FastLocalBloomBitsBuilder&) = delete; + + ~FastLocalBloomBitsBuilder() override {} + + using FilterBitsBuilder::Finish; + + virtual Slice Finish(std::unique_ptr<const char[]>* buf) override { + return Finish(buf, nullptr); + } + + virtual Slice Finish(std::unique_ptr<const char[]>* buf, + Status* status) override { + size_t num_entries = hash_entries_info_.entries.size(); + size_t len_with_metadata = CalculateSpace(num_entries); + + std::unique_ptr<char[]> mutable_buf; + std::unique_ptr<CacheReservationManager::CacheReservationHandle> + final_filter_cache_res_handle; + len_with_metadata = + AllocateMaybeRounding(len_with_metadata, num_entries, &mutable_buf); + // Cache charging for mutable_buf + if (cache_res_mgr_) { + Status s = cache_res_mgr_->MakeCacheReservation( + len_with_metadata * sizeof(char), &final_filter_cache_res_handle); + s.PermitUncheckedError(); + } + + assert(mutable_buf); + assert(len_with_metadata >= kMetadataLen); + + // Max size supported by implementation + assert(len_with_metadata <= 0xffffffffU); + + // Compute num_probes after any rounding / adjustments + int num_probes = GetNumProbes(num_entries, len_with_metadata); + + uint32_t len = static_cast<uint32_t>(len_with_metadata - kMetadataLen); + if (len > 0) { + TEST_SYNC_POINT_CALLBACK( + "XXPH3FilterBitsBuilder::Finish::" + "TamperHashEntries", + &hash_entries_info_.entries); + AddAllEntries(mutable_buf.get(), len, num_probes); + Status verify_hash_entries_checksum_status = + MaybeVerifyHashEntriesChecksum(); + if (!verify_hash_entries_checksum_status.ok()) { + if (status) { + *status = verify_hash_entries_checksum_status; + } + return FinishAlwaysTrue(buf); + } + } + + bool keep_entries_for_postverify = detect_filter_construct_corruption_; + if (!keep_entries_for_postverify) { + ResetEntries(); + } + + // See BloomFilterPolicy::GetBloomBitsReader re: metadata + // -1 = Marker for newer Bloom implementations + mutable_buf[len] = static_cast<char>(-1); + // 0 = Marker for this sub-implementation + mutable_buf[len + 1] = static_cast<char>(0); + // num_probes (and 0 in upper bits for 64-byte block size) + mutable_buf[len + 2] = static_cast<char>(num_probes); + // rest of metadata stays zero + + auto TEST_arg_pair __attribute__((__unused__)) = + std::make_pair(&mutable_buf, len_with_metadata); + TEST_SYNC_POINT_CALLBACK("XXPH3FilterBitsBuilder::Finish::TamperFilter", + &TEST_arg_pair); + + Slice rv(mutable_buf.get(), len_with_metadata); + *buf = std::move(mutable_buf); + final_filter_cache_res_handles_.push_back( + std::move(final_filter_cache_res_handle)); + if (status) { + *status = Status::OK(); + } + return rv; + } + + size_t ApproximateNumEntries(size_t bytes) override { + size_t bytes_no_meta = + bytes >= kMetadataLen ? RoundDownUsableSpace(bytes) - kMetadataLen : 0; + return static_cast<size_t>(uint64_t{8000} * bytes_no_meta / + millibits_per_key_); + } + + size_t CalculateSpace(size_t num_entries) override { + // If not for cache line blocks in the filter, what would the target + // length in bytes be? + size_t raw_target_len = static_cast<size_t>( + (uint64_t{num_entries} * millibits_per_key_ + 7999) / 8000); + + if (raw_target_len >= size_t{0xffffffc0}) { + // Max supported for this data structure implementation + raw_target_len = size_t{0xffffffc0}; + } + + // Round up to nearest multiple of 64 (block size). This adjustment is + // used for target FP rate only so that we don't receive complaints about + // lower FP rate vs. historic Bloom filter behavior. + return ((raw_target_len + 63) & ~size_t{63}) + kMetadataLen; + } + + double EstimatedFpRate(size_t keys, size_t len_with_metadata) override { + int num_probes = GetNumProbes(keys, len_with_metadata); + return FastLocalBloomImpl::EstimatedFpRate( + keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64); + } + + protected: + size_t RoundDownUsableSpace(size_t available_size) override { + size_t rv = available_size - kMetadataLen; + + if (rv >= size_t{0xffffffc0}) { + // Max supported for this data structure implementation + rv = size_t{0xffffffc0}; + } + + // round down to multiple of 64 (block size) + rv &= ~size_t{63}; + + return rv + kMetadataLen; + } + + private: + // Compute num_probes after any rounding / adjustments + int GetNumProbes(size_t keys, size_t len_with_metadata) { + uint64_t millibits = uint64_t{len_with_metadata - kMetadataLen} * 8000; + int actual_millibits_per_key = + static_cast<int>(millibits / std::max(keys, size_t{1})); + // BEGIN XXX/TODO(peterd): preserving old/default behavior for now to + // minimize unit test churn. Remove this some time. + if (!aggregate_rounding_balance_) { + actual_millibits_per_key = millibits_per_key_; + } + // END XXX/TODO + return FastLocalBloomImpl::ChooseNumProbes(actual_millibits_per_key); + } + + void AddAllEntries(char* data, uint32_t len, int num_probes) { + // Simple version without prefetching: + // + // for (auto h : hash_entries_info_.entries) { + // FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len, + // num_probes, data); + // } + + const size_t num_entries = hash_entries_info_.entries.size(); + constexpr size_t kBufferMask = 7; + static_assert(((kBufferMask + 1) & kBufferMask) == 0, + "Must be power of 2 minus 1"); + + std::array<uint32_t, kBufferMask + 1> hashes; + std::array<uint32_t, kBufferMask + 1> byte_offsets; + + // Prime the buffer + size_t i = 0; + std::deque<uint64_t>::iterator hash_entries_it = + hash_entries_info_.entries.begin(); + for (; i <= kBufferMask && i < num_entries; ++i) { + uint64_t h = *hash_entries_it; + FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data, + /*out*/ &byte_offsets[i]); + hashes[i] = Upper32of64(h); + ++hash_entries_it; + } + + // Process and buffer + for (; i < num_entries; ++i) { + uint32_t& hash_ref = hashes[i & kBufferMask]; + uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask]; + // Process (add) + FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes, + data + byte_offset_ref); + // And buffer + uint64_t h = *hash_entries_it; + FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data, + /*out*/ &byte_offset_ref); + hash_ref = Upper32of64(h); + ++hash_entries_it; + } + + // Finish processing + for (i = 0; i <= kBufferMask && i < num_entries; ++i) { + FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes, + data + byte_offsets[i]); + } + } + + // Target allocation per added key, in thousandths of a bit. + int millibits_per_key_; +}; + +// See description in FastLocalBloomImpl +class FastLocalBloomBitsReader : public BuiltinFilterBitsReader { + public: + FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes) + : data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {} + + // No Copy allowed + FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete; + void operator=(const FastLocalBloomBitsReader&) = delete; + + ~FastLocalBloomBitsReader() override {} + + bool MayMatch(const Slice& key) override { + uint64_t h = GetSliceHash64(key); + uint32_t byte_offset; + FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_, + /*out*/ &byte_offset); + return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_, + data_ + byte_offset); + } + + virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { + std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes; + std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets; + for (int i = 0; i < num_keys; ++i) { + uint64_t h = GetSliceHash64(*keys[i]); + FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_, + /*out*/ &byte_offsets[i]); + hashes[i] = Upper32of64(h); + } + for (int i = 0; i < num_keys; ++i) { + may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared( + hashes[i], num_probes_, data_ + byte_offsets[i]); + } + } + + bool HashMayMatch(const uint64_t h) override { + return FastLocalBloomImpl::HashMayMatch(Lower32of64(h), Upper32of64(h), + len_bytes_, num_probes_, data_); + } + + private: + const char* data_; + const int num_probes_; + const uint32_t len_bytes_; +}; + +// ##################### Ribbon filter implementation ################### // + +// Implements concept RehasherTypesAndSettings in ribbon_impl.h +struct Standard128RibbonRehasherTypesAndSettings { + // These are schema-critical. Any change almost certainly changes + // underlying data. + static constexpr bool kIsFilter = true; + static constexpr bool kHomogeneous = false; + static constexpr bool kFirstCoeffAlwaysOne = true; + static constexpr bool kUseSmash = false; + using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128; + using Hash = uint64_t; + using Seed = uint32_t; + // Changing these doesn't necessarily change underlying data, + // but might affect supported scalability of those dimensions. + using Index = uint32_t; + using ResultRow = uint32_t; + // Save a conditional in Ribbon queries + static constexpr bool kAllowZeroStarts = false; +}; + +using Standard128RibbonTypesAndSettings = + ribbon::StandardRehasherAdapter<Standard128RibbonRehasherTypesAndSettings>; + +class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder { + public: + explicit Standard128RibbonBitsBuilder( + double desired_one_in_fp_rate, int bloom_millibits_per_key, + std::atomic<int64_t>* aggregate_rounding_balance, + std::shared_ptr<CacheReservationManager> cache_res_mgr, + bool detect_filter_construct_corruption, Logger* info_log) + : XXPH3FilterBitsBuilder(aggregate_rounding_balance, cache_res_mgr, + detect_filter_construct_corruption), + desired_one_in_fp_rate_(desired_one_in_fp_rate), + info_log_(info_log), + bloom_fallback_(bloom_millibits_per_key, aggregate_rounding_balance, + cache_res_mgr, detect_filter_construct_corruption) { + assert(desired_one_in_fp_rate >= 1.0); + } + + // No Copy allowed + Standard128RibbonBitsBuilder(const Standard128RibbonBitsBuilder&) = delete; + void operator=(const Standard128RibbonBitsBuilder&) = delete; + + ~Standard128RibbonBitsBuilder() override {} + + using FilterBitsBuilder::Finish; + + virtual Slice Finish(std::unique_ptr<const char[]>* buf) override { + return Finish(buf, nullptr); + } + + virtual Slice Finish(std::unique_ptr<const char[]>* buf, + Status* status) override { + if (hash_entries_info_.entries.size() > kMaxRibbonEntries) { + ROCKS_LOG_WARN( + info_log_, "Too many keys for Ribbon filter: %llu", + static_cast<unsigned long long>(hash_entries_info_.entries.size())); + SwapEntriesWith(&bloom_fallback_); + assert(hash_entries_info_.entries.empty()); + return bloom_fallback_.Finish(buf, status); + } + if (hash_entries_info_.entries.size() == 0) { + // Save a conditional in Ribbon queries by using alternate reader + // for zero entries added. + if (status) { + *status = Status::OK(); + } + return FinishAlwaysFalse(buf); + } + uint32_t num_entries = + static_cast<uint32_t>(hash_entries_info_.entries.size()); + uint32_t num_slots; + size_t len_with_metadata; + + CalculateSpaceAndSlots(num_entries, &len_with_metadata, &num_slots); + + // Bloom fall-back indicator + if (num_slots == 0) { + SwapEntriesWith(&bloom_fallback_); + assert(hash_entries_info_.entries.empty()); + return bloom_fallback_.Finish(buf, status); + } + + uint32_t entropy = 0; + if (!hash_entries_info_.entries.empty()) { + entropy = Lower32of64(hash_entries_info_.entries.front()); + } + + BandingType banding; + std::size_t bytes_banding = ribbon::StandardBanding< + Standard128RibbonTypesAndSettings>::EstimateMemoryUsage(num_slots); + Status status_banding_cache_res = Status::OK(); + + // Cache charging for banding + std::unique_ptr<CacheReservationManager::CacheReservationHandle> + banding_res_handle; + if (cache_res_mgr_) { + status_banding_cache_res = cache_res_mgr_->MakeCacheReservation( + bytes_banding, &banding_res_handle); + } + + if (status_banding_cache_res.IsMemoryLimit()) { + ROCKS_LOG_WARN(info_log_, + "Cache charging for Ribbon filter banding failed due " + "to cache full"); + SwapEntriesWith(&bloom_fallback_); + assert(hash_entries_info_.entries.empty()); + // Release cache for banding since the banding won't be allocated + banding_res_handle.reset(); + return bloom_fallback_.Finish(buf, status); + } + + TEST_SYNC_POINT_CALLBACK( + "XXPH3FilterBitsBuilder::Finish::" + "TamperHashEntries", + &hash_entries_info_.entries); + + bool success = banding.ResetAndFindSeedToSolve( + num_slots, hash_entries_info_.entries.begin(), + hash_entries_info_.entries.end(), + /*starting seed*/ entropy & 255, /*seed mask*/ 255); + if (!success) { + ROCKS_LOG_WARN( + info_log_, "Too many re-seeds (256) for Ribbon filter, %llu / %llu", + static_cast<unsigned long long>(hash_entries_info_.entries.size()), + static_cast<unsigned long long>(num_slots)); + SwapEntriesWith(&bloom_fallback_); + assert(hash_entries_info_.entries.empty()); + return bloom_fallback_.Finish(buf, status); + } + + Status verify_hash_entries_checksum_status = + MaybeVerifyHashEntriesChecksum(); + if (!verify_hash_entries_checksum_status.ok()) { + ROCKS_LOG_WARN(info_log_, "Verify hash entries checksum error: %s", + verify_hash_entries_checksum_status.getState()); + if (status) { + *status = verify_hash_entries_checksum_status; + } + return FinishAlwaysTrue(buf); + } + + bool keep_entries_for_postverify = detect_filter_construct_corruption_; + if (!keep_entries_for_postverify) { + ResetEntries(); + } + + uint32_t seed = banding.GetOrdinalSeed(); + assert(seed < 256); + + std::unique_ptr<char[]> mutable_buf; + std::unique_ptr<CacheReservationManager::CacheReservationHandle> + final_filter_cache_res_handle; + len_with_metadata = + AllocateMaybeRounding(len_with_metadata, num_entries, &mutable_buf); + // Cache charging for mutable_buf + if (cache_res_mgr_) { + Status s = cache_res_mgr_->MakeCacheReservation( + len_with_metadata * sizeof(char), &final_filter_cache_res_handle); + s.PermitUncheckedError(); + } + + SolnType soln(mutable_buf.get(), len_with_metadata); + soln.BackSubstFrom(banding); + uint32_t num_blocks = soln.GetNumBlocks(); + // This should be guaranteed: + // num_entries < 2^30 + // => (overhead_factor < 2.0) + // num_entries * overhead_factor == num_slots < 2^31 + // => (num_blocks = num_slots / 128) + // num_blocks < 2^24 + assert(num_blocks < 0x1000000U); + + // See BloomFilterPolicy::GetBloomBitsReader re: metadata + // -2 = Marker for Standard128 Ribbon + mutable_buf[len_with_metadata - 5] = static_cast<char>(-2); + // Hash seed + mutable_buf[len_with_metadata - 4] = static_cast<char>(seed); + // Number of blocks, in 24 bits + // (Along with bytes, we can derive other settings) + mutable_buf[len_with_metadata - 3] = static_cast<char>(num_blocks & 255); + mutable_buf[len_with_metadata - 2] = + static_cast<char>((num_blocks >> 8) & 255); + mutable_buf[len_with_metadata - 1] = + static_cast<char>((num_blocks >> 16) & 255); + + auto TEST_arg_pair __attribute__((__unused__)) = + std::make_pair(&mutable_buf, len_with_metadata); + TEST_SYNC_POINT_CALLBACK("XXPH3FilterBitsBuilder::Finish::TamperFilter", + &TEST_arg_pair); + + Slice rv(mutable_buf.get(), len_with_metadata); + *buf = std::move(mutable_buf); + final_filter_cache_res_handles_.push_back( + std::move(final_filter_cache_res_handle)); + if (status) { + *status = Status::OK(); + } + return rv; + } + + // Setting num_slots to 0 means "fall back on Bloom filter." + // And note this implementation does not support num_entries or num_slots + // beyond uint32_t; see kMaxRibbonEntries. + void CalculateSpaceAndSlots(size_t num_entries, + size_t* target_len_with_metadata, + uint32_t* num_slots) { + if (num_entries > kMaxRibbonEntries) { + // More entries than supported by this Ribbon + *num_slots = 0; // use Bloom + *target_len_with_metadata = bloom_fallback_.CalculateSpace(num_entries); + return; + } + uint32_t entropy = 0; + if (!hash_entries_info_.entries.empty()) { + entropy = Upper32of64(hash_entries_info_.entries.front()); + } + + *num_slots = NumEntriesToNumSlots(static_cast<uint32_t>(num_entries)); + *target_len_with_metadata = + SolnType::GetBytesForOneInFpRate(*num_slots, desired_one_in_fp_rate_, + /*rounding*/ entropy) + + kMetadataLen; + + // Consider possible Bloom fallback for small filters + if (*num_slots < 1024) { + size_t bloom = bloom_fallback_.CalculateSpace(num_entries); + if (bloom < *target_len_with_metadata) { + *num_slots = 0; // use Bloom + *target_len_with_metadata = bloom; + return; + } + } + } + + size_t CalculateSpace(size_t num_entries) override { + if (num_entries == 0) { + // See FinishAlwaysFalse + return 0; + } + size_t target_len_with_metadata; + uint32_t num_slots; + CalculateSpaceAndSlots(num_entries, &target_len_with_metadata, &num_slots); + (void)num_slots; + return target_len_with_metadata; + } + + // This is a somewhat ugly but reasonably fast and reasonably accurate + // reversal of CalculateSpace. + size_t ApproximateNumEntries(size_t bytes) override { + size_t len_no_metadata = + RoundDownUsableSpace(std::max(bytes, size_t{kMetadataLen})) - + kMetadataLen; + + if (!(desired_one_in_fp_rate_ > 1.0)) { + // Effectively asking for 100% FP rate, or NaN etc. + // Note that NaN is neither < 1.0 nor > 1.0 + return kMaxRibbonEntries; + } + + // Find a slight under-estimate for actual average bits per slot + double min_real_bits_per_slot; + if (desired_one_in_fp_rate_ >= 1.0 + std::numeric_limits<uint32_t>::max()) { + // Max of 32 solution columns (result bits) + min_real_bits_per_slot = 32.0; + } else { + // Account for mix of b and b+1 solution columns being slightly + // suboptimal vs. ideal log2(1/fp_rate) bits. + uint32_t rounded = static_cast<uint32_t>(desired_one_in_fp_rate_); + int upper_bits_per_key = 1 + FloorLog2(rounded); + double fp_rate_for_upper = std::pow(2.0, -upper_bits_per_key); + double portion_lower = + (1.0 / desired_one_in_fp_rate_ - fp_rate_for_upper) / + fp_rate_for_upper; + min_real_bits_per_slot = upper_bits_per_key - portion_lower; + assert(min_real_bits_per_slot > 0.0); + assert(min_real_bits_per_slot <= 32.0); + } + + // An overestimate, but this should only be O(1) slots away from truth. + double max_slots = len_no_metadata * 8.0 / min_real_bits_per_slot; + + // Let's not bother accounting for overflow to Bloom filter + // (Includes NaN case) + if (!(max_slots < ConfigHelper::GetNumSlots(kMaxRibbonEntries))) { + return kMaxRibbonEntries; + } + + // Set up for short iteration + uint32_t slots = static_cast<uint32_t>(max_slots); + slots = SolnType::RoundUpNumSlots(slots); + + // Assert that we have a valid upper bound on slots + assert(SolnType::GetBytesForOneInFpRate( + SolnType::RoundUpNumSlots(slots + 1), desired_one_in_fp_rate_, + /*rounding*/ 0) > len_no_metadata); + + // Iterate up to a few times to rather precisely account for small effects + for (int i = 0; slots > 0; ++i) { + size_t reqd_bytes = + SolnType::GetBytesForOneInFpRate(slots, desired_one_in_fp_rate_, + /*rounding*/ 0); + if (reqd_bytes <= len_no_metadata) { + break; // done + } + if (i >= 2) { + // should have been enough iterations + assert(false); + break; + } + slots = SolnType::RoundDownNumSlots(slots - 1); + } + + uint32_t num_entries = ConfigHelper::GetNumToAdd(slots); + + // Consider possible Bloom fallback for small filters + if (slots < 1024) { + size_t bloom = bloom_fallback_.ApproximateNumEntries(bytes); + if (bloom > num_entries) { + return bloom; + } else { + return num_entries; + } + } else { + return std::min(num_entries, kMaxRibbonEntries); + } + } + + double EstimatedFpRate(size_t num_entries, + size_t len_with_metadata) override { + if (num_entries > kMaxRibbonEntries) { + // More entries than supported by this Ribbon + return bloom_fallback_.EstimatedFpRate(num_entries, len_with_metadata); + } + uint32_t num_slots = + NumEntriesToNumSlots(static_cast<uint32_t>(num_entries)); + SolnType fake_soln(nullptr, len_with_metadata); + fake_soln.ConfigureForNumSlots(num_slots); + return fake_soln.ExpectedFpRate(); + } + + Status MaybePostVerify(const Slice& filter_content) override { + bool fall_back = (bloom_fallback_.EstimateEntriesAdded() > 0); + return fall_back ? bloom_fallback_.MaybePostVerify(filter_content) + : XXPH3FilterBitsBuilder::MaybePostVerify(filter_content); + } + + protected: + size_t RoundDownUsableSpace(size_t available_size) override { + size_t rv = available_size - kMetadataLen; + + // round down to multiple of 16 (segment size) + rv &= ~size_t{15}; + + return rv + kMetadataLen; + } + + private: + using TS = Standard128RibbonTypesAndSettings; + using SolnType = ribbon::SerializableInterleavedSolution<TS>; + using BandingType = ribbon::StandardBanding<TS>; + using ConfigHelper = ribbon::BandingConfigHelper1TS<ribbon::kOneIn20, TS>; + + static uint32_t NumEntriesToNumSlots(uint32_t num_entries) { + uint32_t num_slots1 = ConfigHelper::GetNumSlots(num_entries); + return SolnType::RoundUpNumSlots(num_slots1); + } + + // Approximate num_entries to ensure number of bytes fits in 32 bits, which + // is not an inherent limitation but does ensure somewhat graceful Bloom + // fallback for crazy high number of entries, since the Bloom implementation + // does not support number of bytes bigger than fits in 32 bits. This is + // within an order of magnitude of implementation limit on num_slots + // fitting in 32 bits, and even closer for num_blocks fitting in 24 bits + // (for filter metadata). + static constexpr uint32_t kMaxRibbonEntries = 950000000; // ~ 1 billion + + // A desired value for 1/fp_rate. For example, 100 -> 1% fp rate. + double desired_one_in_fp_rate_; + + // For warnings, or can be nullptr + Logger* info_log_; + + // For falling back on Bloom filter in some exceptional cases and + // very small filter cases + FastLocalBloomBitsBuilder bloom_fallback_; +}; + +// for the linker, at least with DEBUG_LEVEL=2 +constexpr uint32_t Standard128RibbonBitsBuilder::kMaxRibbonEntries; + +class Standard128RibbonBitsReader : public BuiltinFilterBitsReader { + public: + Standard128RibbonBitsReader(const char* data, size_t len_bytes, + uint32_t num_blocks, uint32_t seed) + : soln_(const_cast<char*>(data), len_bytes) { + soln_.ConfigureForNumBlocks(num_blocks); + hasher_.SetOrdinalSeed(seed); + } + + // No Copy allowed + Standard128RibbonBitsReader(const Standard128RibbonBitsReader&) = delete; + void operator=(const Standard128RibbonBitsReader&) = delete; + + ~Standard128RibbonBitsReader() override {} + + bool MayMatch(const Slice& key) override { + uint64_t h = GetSliceHash64(key); + return soln_.FilterQuery(h, hasher_); + } + + virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { + struct SavedData { + uint64_t seeded_hash; + uint32_t segment_num; + uint32_t num_columns; + uint32_t start_bits; + }; + std::array<SavedData, MultiGetContext::MAX_BATCH_SIZE> saved; + for (int i = 0; i < num_keys; ++i) { + ribbon::InterleavedPrepareQuery( + GetSliceHash64(*keys[i]), hasher_, soln_, &saved[i].seeded_hash, + &saved[i].segment_num, &saved[i].num_columns, &saved[i].start_bits); + } + for (int i = 0; i < num_keys; ++i) { + may_match[i] = ribbon::InterleavedFilterQuery( + saved[i].seeded_hash, saved[i].segment_num, saved[i].num_columns, + saved[i].start_bits, hasher_, soln_); + } + } + + bool HashMayMatch(const uint64_t h) override { + return soln_.FilterQuery(h, hasher_); + } + + private: + using TS = Standard128RibbonTypesAndSettings; + ribbon::SerializableInterleavedSolution<TS> soln_; + ribbon::StandardHasher<TS> hasher_; +}; + +// ##################### Legacy Bloom implementation ################### // + +using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>; + +class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { + public: + explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log); + + // No Copy allowed + LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete; + void operator=(const LegacyBloomBitsBuilder&) = delete; + + ~LegacyBloomBitsBuilder() override; + + void AddKey(const Slice& key) override; + + virtual size_t EstimateEntriesAdded() override { + return hash_entries_.size(); + } + + using FilterBitsBuilder::Finish; + + Slice Finish(std::unique_ptr<const char[]>* buf) override; + + size_t CalculateSpace(size_t num_entries) override { + uint32_t dont_care1; + uint32_t dont_care2; + return CalculateSpace(num_entries, &dont_care1, &dont_care2); + } + + double EstimatedFpRate(size_t keys, size_t bytes) override { + return LegacyBloomImpl::EstimatedFpRate(keys, bytes - kMetadataLen, + num_probes_); + } + + size_t ApproximateNumEntries(size_t bytes) override; + + private: + int bits_per_key_; + int num_probes_; + std::vector<uint32_t> hash_entries_; + Logger* info_log_; + + // Get totalbits that optimized for cpu cache line + uint32_t GetTotalBitsForLocality(uint32_t total_bits); + + // Reserve space for new filter + char* ReserveSpace(size_t num_entries, uint32_t* total_bits, + uint32_t* num_lines); + + // Implementation-specific variant of public CalculateSpace + uint32_t CalculateSpace(size_t num_entries, uint32_t* total_bits, + uint32_t* num_lines); + + // Assuming single threaded access to this function. + void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits); +}; + +LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key, + Logger* info_log) + : bits_per_key_(bits_per_key), + num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)), + info_log_(info_log) { + assert(bits_per_key_); +} + +LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {} + +void LegacyBloomBitsBuilder::AddKey(const Slice& key) { + uint32_t hash = BloomHash(key); + if (hash_entries_.size() == 0 || hash != hash_entries_.back()) { + hash_entries_.push_back(hash); + } +} + +Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) { + uint32_t total_bits, num_lines; + size_t num_entries = hash_entries_.size(); + char* data = + ReserveSpace(static_cast<int>(num_entries), &total_bits, &num_lines); + assert(data); + + if (total_bits != 0 && num_lines != 0) { + for (auto h : hash_entries_) { + AddHash(h, data, num_lines, total_bits); + } + + // Check for excessive entries for 32-bit hash function + if (num_entries >= /* minimum of 3 million */ 3000000U) { + // More specifically, we can detect that the 32-bit hash function + // is causing significant increase in FP rate by comparing current + // estimated FP rate to what we would get with a normal number of + // keys at same memory ratio. + double est_fp_rate = LegacyBloomImpl::EstimatedFpRate( + num_entries, total_bits / 8, num_probes_); + double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate( + 1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_); + + if (est_fp_rate >= 1.50 * vs_fp_rate) { + // For more details, see + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + ROCKS_LOG_WARN( + info_log_, + "Using legacy SST/BBT Bloom filter with excessive key count " + "(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. " + "Consider using new Bloom with format_version>=5, smaller SST " + "file size, or partitioned filters.", + num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate); + } + } + } + // See BloomFilterPolicy::GetFilterBitsReader for metadata + data[total_bits / 8] = static_cast<char>(num_probes_); + EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines)); + + const char* const_data = data; + buf->reset(const_data); + hash_entries_.clear(); + + return Slice(data, total_bits / 8 + kMetadataLen); +} + +size_t LegacyBloomBitsBuilder::ApproximateNumEntries(size_t bytes) { + assert(bits_per_key_); + assert(bytes > 0); + + uint64_t total_bits_tmp = bytes * 8; + // total bits, including temporary computations, cannot exceed 2^32 + // for compatibility + total_bits_tmp = std::min(total_bits_tmp, uint64_t{0xffff0000}); + + uint32_t high = static_cast<uint32_t>(total_bits_tmp) / + static_cast<uint32_t>(bits_per_key_) + + 1; + uint32_t low = 1; + uint32_t n = high; + for (; n >= low; n--) { + if (CalculateSpace(n) <= bytes) { + break; + } + } + return n; +} + +uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) { + uint32_t num_lines = + (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8); + + // Make num_lines an odd number to make sure more bits are involved + // when determining which block. + if (num_lines % 2 == 0) { + num_lines++; + } + return num_lines * (CACHE_LINE_SIZE * 8); +} + +uint32_t LegacyBloomBitsBuilder::CalculateSpace(size_t num_entries, + uint32_t* total_bits, + uint32_t* num_lines) { + assert(bits_per_key_); + if (num_entries != 0) { + size_t total_bits_tmp = num_entries * bits_per_key_; + // total bits, including temporary computations, cannot exceed 2^32 + // for compatibility + total_bits_tmp = std::min(total_bits_tmp, size_t{0xffff0000}); + + *total_bits = + GetTotalBitsForLocality(static_cast<uint32_t>(total_bits_tmp)); + *num_lines = *total_bits / (CACHE_LINE_SIZE * 8); + assert(*total_bits > 0 && *total_bits % 8 == 0); + } else { + // filter is empty, just leave space for metadata + *total_bits = 0; + *num_lines = 0; + } + + // Reserve space for Filter + uint32_t sz = *total_bits / 8; + sz += kMetadataLen; // 4 bytes for num_lines, 1 byte for num_probes + return sz; +} + +char* LegacyBloomBitsBuilder::ReserveSpace(size_t num_entries, + uint32_t* total_bits, + uint32_t* num_lines) { + uint32_t sz = CalculateSpace(num_entries, total_bits, num_lines); + char* data = new char[sz]; + memset(data, 0, sz); + return data; +} + +inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data, + uint32_t num_lines, + uint32_t total_bits) { +#ifdef NDEBUG + static_cast<void>(total_bits); +#endif + assert(num_lines > 0 && total_bits > 0); + + LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data, + ConstexprFloorLog2(CACHE_LINE_SIZE)); +} + +class LegacyBloomBitsReader : public BuiltinFilterBitsReader { + public: + LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines, + uint32_t log2_cache_line_size) + : data_(data), + num_probes_(num_probes), + num_lines_(num_lines), + log2_cache_line_size_(log2_cache_line_size) {} + + // No Copy allowed + LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete; + void operator=(const LegacyBloomBitsReader&) = delete; + + ~LegacyBloomBitsReader() override {} + + // "contents" contains the data built by a preceding call to + // FilterBitsBuilder::Finish. MayMatch must return true if the key was + // passed to FilterBitsBuilder::AddKey. This method may return true or false + // if the key was not on the list, but it should aim to return false with a + // high probability. + bool MayMatch(const Slice& key) override { + uint32_t hash = BloomHash(key); + uint32_t byte_offset; + LegacyBloomImpl::PrepareHashMayMatch( + hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_); + return LegacyBloomImpl::HashMayMatchPrepared( + hash, num_probes_, data_ + byte_offset, log2_cache_line_size_); + } + + virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { + std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes; + std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets; + for (int i = 0; i < num_keys; ++i) { + hashes[i] = BloomHash(*keys[i]); + LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_, + /*out*/ &byte_offsets[i], + log2_cache_line_size_); + } + for (int i = 0; i < num_keys; ++i) { + may_match[i] = LegacyBloomImpl::HashMayMatchPrepared( + hashes[i], num_probes_, data_ + byte_offsets[i], + log2_cache_line_size_); + } + } + + bool HashMayMatch(const uint64_t /* h */) override { return false; } + + private: + const char* data_; + const int num_probes_; + const uint32_t num_lines_; + const uint32_t log2_cache_line_size_; +}; + +class AlwaysTrueFilter : public BuiltinFilterBitsReader { + public: + bool MayMatch(const Slice&) override { return true; } + using FilterBitsReader::MayMatch; // inherit overload + bool HashMayMatch(const uint64_t) override { return true; } + using BuiltinFilterBitsReader::HashMayMatch; // inherit overload +}; + +class AlwaysFalseFilter : public BuiltinFilterBitsReader { + public: + bool MayMatch(const Slice&) override { return false; } + using FilterBitsReader::MayMatch; // inherit overload + bool HashMayMatch(const uint64_t) override { return false; } + using BuiltinFilterBitsReader::HashMayMatch; // inherit overload +}; + +Status XXPH3FilterBitsBuilder::MaybePostVerify(const Slice& filter_content) { + Status s = Status::OK(); + + if (!detect_filter_construct_corruption_) { + return s; + } + + std::unique_ptr<BuiltinFilterBitsReader> bits_reader( + BuiltinFilterPolicy::GetBuiltinFilterBitsReader(filter_content)); + + for (uint64_t h : hash_entries_info_.entries) { + // The current approach will not detect corruption from XXPH3Filter to + // AlwaysTrueFilter, which can lead to performance cost later due to + // AlwaysTrueFilter not filtering anything. But this cost is acceptable + // given the extra implementation complixity to detect such case. + bool may_match = bits_reader->HashMayMatch(h); + if (!may_match) { + s = Status::Corruption("Corrupted filter content"); + break; + } + } + + ResetEntries(); + return s; +} +} // namespace + +const char* BuiltinFilterPolicy::kClassName() { + return "rocksdb.internal.BuiltinFilter"; +} + +bool BuiltinFilterPolicy::IsInstanceOf(const std::string& name) const { + if (name == kClassName()) { + return true; + } else { + return FilterPolicy::IsInstanceOf(name); + } +} + +static const char* kBuiltinFilterMetadataName = "rocksdb.BuiltinBloomFilter"; + +const char* BuiltinFilterPolicy::kCompatibilityName() { + return kBuiltinFilterMetadataName; +} + +const char* BuiltinFilterPolicy::CompatibilityName() const { + return kBuiltinFilterMetadataName; +} + +BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key) + : warned_(false), aggregate_rounding_balance_(0) { + // Sanitize bits_per_key + if (bits_per_key < 0.5) { + // Round down to no filter + bits_per_key = 0; + } else if (bits_per_key < 1.0) { + // Minimum 1 bit per key (equiv) when creating filter + bits_per_key = 1.0; + } else if (!(bits_per_key < 100.0)) { // including NaN + bits_per_key = 100.0; + } + + // Includes a nudge toward rounding up, to ensure on all platforms + // that doubles specified with three decimal digits after the decimal + // point are interpreted accurately. + millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001); + + // For now configure Ribbon filter to match Bloom FP rate and save + // memory. (Ribbon bits per key will be ~30% less than Bloom bits per key + // for same FP rate.) + desired_one_in_fp_rate_ = + 1.0 / BloomMath::CacheLocalFpRate( + bits_per_key, + FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_), + /*cache_line_bits*/ 512); + + // For better or worse, this is a rounding up of a nudged rounding up, + // e.g. 7.4999999999999 will round up to 8, but that provides more + // predictability against small arithmetic errors in floating point. + whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000; +} + +BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {} +const char* BloomLikeFilterPolicy::kClassName() { + return "rocksdb.internal.BloomLikeFilter"; +} + +bool BloomLikeFilterPolicy::IsInstanceOf(const std::string& name) const { + if (name == kClassName()) { + return true; + } else { + return BuiltinFilterPolicy::IsInstanceOf(name); + } +} + +const char* ReadOnlyBuiltinFilterPolicy::kClassName() { + return kBuiltinFilterMetadataName; +} + +std::string BloomLikeFilterPolicy::GetId() const { + return Name() + GetBitsPerKeySuffix(); +} + +BloomFilterPolicy::BloomFilterPolicy(double bits_per_key) + : BloomLikeFilterPolicy(bits_per_key) {} + +FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } else if (context.table_options.format_version < 5) { + return GetLegacyBloomBuilderWithContext(context); + } else { + return GetFastLocalBloomBuilderWithContext(context); + } +} + +const char* BloomFilterPolicy::kClassName() { return "bloomfilter"; } +const char* BloomFilterPolicy::kNickName() { return "rocksdb.BloomFilter"; } + +std::string BloomFilterPolicy::GetId() const { + // Including ":false" for better forward-compatibility with 6.29 and earlier + // which required a boolean `use_block_based_builder` parameter + return BloomLikeFilterPolicy::GetId() + ":false"; +} + +FilterBitsBuilder* BloomLikeFilterPolicy::GetFastLocalBloomBuilderWithContext( + const FilterBuildingContext& context) const { + bool offm = context.table_options.optimize_filters_for_memory; + const auto options_overrides_iter = + context.table_options.cache_usage_options.options_overrides.find( + CacheEntryRole::kFilterConstruction); + const auto filter_construction_charged = + options_overrides_iter != + context.table_options.cache_usage_options.options_overrides.end() + ? options_overrides_iter->second.charged + : context.table_options.cache_usage_options.options.charged; + + std::shared_ptr<CacheReservationManager> cache_res_mgr; + if (context.table_options.block_cache && + filter_construction_charged == + CacheEntryRoleOptions::Decision::kEnabled) { + cache_res_mgr = std::make_shared< + CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>>( + context.table_options.block_cache); + } + return new FastLocalBloomBitsBuilder( + millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr, + cache_res_mgr, context.table_options.detect_filter_construct_corruption); +} + +FilterBitsBuilder* BloomLikeFilterPolicy::GetLegacyBloomBuilderWithContext( + const FilterBuildingContext& context) const { + if (whole_bits_per_key_ >= 14 && context.info_log && + !warned_.load(std::memory_order_relaxed)) { + warned_ = true; + const char* adjective; + if (whole_bits_per_key_ >= 20) { + adjective = "Dramatic"; + } else { + adjective = "Significant"; + } + // For more details, see + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + ROCKS_LOG_WARN(context.info_log, + "Using legacy Bloom filter with high (%d) bits/key. " + "%s filter space and/or accuracy improvement is available " + "with format_version>=5.", + whole_bits_per_key_, adjective); + } + return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log); +} + +FilterBitsBuilder* +BloomLikeFilterPolicy::GetStandard128RibbonBuilderWithContext( + const FilterBuildingContext& context) const { + // FIXME: code duplication with GetFastLocalBloomBuilderWithContext + bool offm = context.table_options.optimize_filters_for_memory; + const auto options_overrides_iter = + context.table_options.cache_usage_options.options_overrides.find( + CacheEntryRole::kFilterConstruction); + const auto filter_construction_charged = + options_overrides_iter != + context.table_options.cache_usage_options.options_overrides.end() + ? options_overrides_iter->second.charged + : context.table_options.cache_usage_options.options.charged; + + std::shared_ptr<CacheReservationManager> cache_res_mgr; + if (context.table_options.block_cache && + filter_construction_charged == + CacheEntryRoleOptions::Decision::kEnabled) { + cache_res_mgr = std::make_shared< + CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>>( + context.table_options.block_cache); + } + return new Standard128RibbonBitsBuilder( + desired_one_in_fp_rate_, millibits_per_key_, + offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr, + context.table_options.detect_filter_construct_corruption, + context.info_log); +} + +std::string BloomLikeFilterPolicy::GetBitsPerKeySuffix() const { + std::string rv = ":" + std::to_string(millibits_per_key_ / 1000); + int frac = millibits_per_key_ % 1000; + if (frac > 0) { + rv.push_back('.'); + rv.push_back(static_cast<char>('0' + (frac / 100))); + frac %= 100; + if (frac > 0) { + rv.push_back(static_cast<char>('0' + (frac / 10))); + frac %= 10; + if (frac > 0) { + rv.push_back(static_cast<char>('0' + frac)); + } + } + } + return rv; +} + +FilterBitsBuilder* BuiltinFilterPolicy::GetBuilderFromContext( + const FilterBuildingContext& context) { + if (context.table_options.filter_policy) { + return context.table_options.filter_policy->GetBuilderWithContext(context); + } else { + return nullptr; + } +} + +// For testing only, but always constructable with internal names +namespace test { + +const char* LegacyBloomFilterPolicy::kClassName() { + return "rocksdb.internal.LegacyBloomFilter"; +} + +FilterBitsBuilder* LegacyBloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetLegacyBloomBuilderWithContext(context); +} + +const char* FastLocalBloomFilterPolicy::kClassName() { + return "rocksdb.internal.FastLocalBloomFilter"; +} + +FilterBitsBuilder* FastLocalBloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetFastLocalBloomBuilderWithContext(context); +} + +const char* Standard128RibbonFilterPolicy::kClassName() { + return "rocksdb.internal.Standard128RibbonFilter"; +} + +FilterBitsBuilder* Standard128RibbonFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetStandard128RibbonBuilderWithContext(context); +} + +} // namespace test + +BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader( + const Slice& contents) { + uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); + if (len_with_meta <= kMetadataLen) { + // filter is empty or broken. Treat like zero keys added. + return new AlwaysFalseFilter(); + } + + // Legacy Bloom filter data: + // 0 +-----------------------------------+ + // | Raw Bloom filter data | + // | ... | + // len +-----------------------------------+ + // | byte for num_probes or | + // | marker for new implementations | + // len+1 +-----------------------------------+ + // | four bytes for number of cache | + // | lines | + // len_with_meta +-----------------------------------+ + + int8_t raw_num_probes = + static_cast<int8_t>(contents.data()[len_with_meta - kMetadataLen]); + // NB: *num_probes > 30 and < 128 probably have not been used, because of + // BloomFilterPolicy::initialize, unless directly calling + // LegacyBloomBitsBuilder as an API, but we are leaving those cases in + // limbo with LegacyBloomBitsReader for now. + + if (raw_num_probes < 1) { + // Note: < 0 (or unsigned > 127) indicate special new implementations + // (or reserved for future use) + switch (raw_num_probes) { + case 0: + // Treat as zero probes (always FP) + return new AlwaysTrueFilter(); + case -1: + // Marker for newer Bloom implementations + return GetBloomBitsReader(contents); + case -2: + // Marker for Ribbon implementations + return GetRibbonBitsReader(contents); + default: + // Reserved (treat as zero probes, always FP, for now) + return new AlwaysTrueFilter(); + } + } + // else attempt decode for LegacyBloomBitsReader + + int num_probes = raw_num_probes; + assert(num_probes >= 1); + assert(num_probes <= 127); + + uint32_t len = len_with_meta - kMetadataLen; + assert(len > 0); + + uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4); + uint32_t log2_cache_line_size; + + if (num_lines * CACHE_LINE_SIZE == len) { + // Common case + log2_cache_line_size = ConstexprFloorLog2(CACHE_LINE_SIZE); + } else if (num_lines == 0 || len % num_lines != 0) { + // Invalid (no solution to num_lines * x == len) + // Treat as zero probes (always FP) for now. + return new AlwaysTrueFilter(); + } else { + // Determine the non-native cache line size (from another system) + log2_cache_line_size = 0; + while ((num_lines << log2_cache_line_size) < len) { + ++log2_cache_line_size; + } + if ((num_lines << log2_cache_line_size) != len) { + // Invalid (block size not a power of two) + // Treat as zero probes (always FP) for now. + return new AlwaysTrueFilter(); + } + } + // if not early return + return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines, + log2_cache_line_size); +} + +// Read metadata to determine what kind of FilterBitsReader is needed +// and return a new one. +FilterBitsReader* BuiltinFilterPolicy::GetFilterBitsReader( + const Slice& contents) const { + return BuiltinFilterPolicy::GetBuiltinFilterBitsReader(contents); +} + +BuiltinFilterBitsReader* BuiltinFilterPolicy::GetRibbonBitsReader( + const Slice& contents) { + uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); + uint32_t len = len_with_meta - kMetadataLen; + + assert(len > 0); // precondition + + uint32_t seed = static_cast<uint8_t>(contents.data()[len + 1]); + uint32_t num_blocks = static_cast<uint8_t>(contents.data()[len + 2]); + num_blocks |= static_cast<uint8_t>(contents.data()[len + 3]) << 8; + num_blocks |= static_cast<uint8_t>(contents.data()[len + 4]) << 16; + if (num_blocks < 2) { + // Not supported + // num_blocks == 1 is not used because num_starts == 1 is problematic + // for the hashing scheme. num_blocks == 0 is unused because there's + // already a concise encoding of an "always false" filter. + // Return something safe: + return new AlwaysTrueFilter(); + } + return new Standard128RibbonBitsReader(contents.data(), len, num_blocks, + seed); +} + +// For newer Bloom filter implementations +BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBloomBitsReader( + const Slice& contents) { + uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); + uint32_t len = len_with_meta - kMetadataLen; + + assert(len > 0); // precondition + + // New Bloom filter data: + // 0 +-----------------------------------+ + // | Raw Bloom filter data | + // | ... | + // len +-----------------------------------+ + // | char{-1} byte -> new Bloom filter | + // len+1 +-----------------------------------+ + // | byte for subimplementation | + // | 0: FastLocalBloom | + // | other: reserved | + // len+2 +-----------------------------------+ + // | byte for block_and_probes | + // | 0 in top 3 bits -> 6 -> 64-byte | + // | reserved: | + // | 1 in top 3 bits -> 7 -> 128-byte| + // | 2 in top 3 bits -> 8 -> 256-byte| + // | ... | + // | num_probes in bottom 5 bits, | + // | except 0 and 31 reserved | + // len+3 +-----------------------------------+ + // | two bytes reserved | + // | possibly for hash seed | + // len_with_meta +-----------------------------------+ + + // Read more metadata (see above) + char sub_impl_val = contents.data()[len_with_meta - 4]; + char block_and_probes = contents.data()[len_with_meta - 3]; + int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6; + + int num_probes = (block_and_probes & 31); + if (num_probes < 1 || num_probes > 30) { + // Reserved / future safe + return new AlwaysTrueFilter(); + } + + uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2); + if (rest != 0) { + // Reserved, possibly for hash seed + // Future safe + return new AlwaysTrueFilter(); + } + + if (sub_impl_val == 0) { // FastLocalBloom + if (log2_block_bytes == 6) { // Only block size supported for now + return new FastLocalBloomBitsReader(contents.data(), num_probes, len); + } + } + // otherwise + // Reserved / future safe + return new AlwaysTrueFilter(); +} + +const FilterPolicy* NewBloomFilterPolicy(double bits_per_key, + bool /*use_block_based_builder*/) { + // NOTE: use_block_based_builder now ignored so block-based filter is no + // longer accessible in public API. + return new BloomFilterPolicy(bits_per_key); +} + +RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level) + : BloomLikeFilterPolicy(bloom_equivalent_bits_per_key), + bloom_before_level_(bloom_before_level) {} + +FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + // Treat unknown same as bottommost + int levelish = INT_MAX; + + switch (context.compaction_style) { + case kCompactionStyleLevel: + case kCompactionStyleUniversal: { + if (context.reason == TableFileCreationReason::kFlush) { + // Treat flush as level -1 + assert(context.level_at_creation == 0); + levelish = -1; + } else if (context.level_at_creation == -1) { + // Unknown level + assert(levelish == INT_MAX); + } else { + levelish = context.level_at_creation; + } + break; + } + case kCompactionStyleFIFO: + case kCompactionStyleNone: + // Treat as bottommost + assert(levelish == INT_MAX); + break; + } + if (levelish < bloom_before_level_) { + return GetFastLocalBloomBuilderWithContext(context); + } else { + return GetStandard128RibbonBuilderWithContext(context); + } +} + +const char* RibbonFilterPolicy::kClassName() { return "ribbonfilter"; } +const char* RibbonFilterPolicy::kNickName() { return "rocksdb.RibbonFilter"; } + +std::string RibbonFilterPolicy::GetId() const { + return BloomLikeFilterPolicy::GetId() + ":" + + std::to_string(bloom_before_level_); +} + +const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level) { + return new RibbonFilterPolicy(bloom_equivalent_bits_per_key, + bloom_before_level); +} + +FilterBuildingContext::FilterBuildingContext( + const BlockBasedTableOptions& _table_options) + : table_options(_table_options) {} + +FilterPolicy::~FilterPolicy() {} + +std::shared_ptr<const FilterPolicy> BloomLikeFilterPolicy::Create( + const std::string& name, double bits_per_key) { + if (name == test::LegacyBloomFilterPolicy::kClassName()) { + return std::make_shared<test::LegacyBloomFilterPolicy>(bits_per_key); + } else if (name == test::FastLocalBloomFilterPolicy::kClassName()) { + return std::make_shared<test::FastLocalBloomFilterPolicy>(bits_per_key); + } else if (name == test::Standard128RibbonFilterPolicy::kClassName()) { + return std::make_shared<test::Standard128RibbonFilterPolicy>(bits_per_key); + } else if (name == BloomFilterPolicy::kClassName()) { + // For testing + return std::make_shared<BloomFilterPolicy>(bits_per_key); + } else if (name == RibbonFilterPolicy::kClassName()) { + // For testing + return std::make_shared<RibbonFilterPolicy>(bits_per_key, + /*bloom_before_level*/ 0); + } else { + return nullptr; + } +} + +#ifndef ROCKSDB_LITE +namespace { +static ObjectLibrary::PatternEntry FilterPatternEntryWithBits( + const char* name) { + return ObjectLibrary::PatternEntry(name, false).AddNumber(":", false); +} + +template <typename T> +T* NewBuiltinFilterPolicyWithBits(const std::string& uri) { + const std::vector<std::string> vals = StringSplit(uri, ':'); + double bits_per_key = ParseDouble(vals[1]); + return new T(bits_per_key); +} +static int RegisterBuiltinFilterPolicies(ObjectLibrary& library, + const std::string& /*arg*/) { + library.AddFactory<const FilterPolicy>( + ReadOnlyBuiltinFilterPolicy::kClassName(), + [](const std::string& /*uri*/, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset(new ReadOnlyBuiltinFilterPolicy()); + return guard->get(); + }); + + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(BloomFilterPolicy::kClassName()) + .AnotherName(BloomFilterPolicy::kNickName()), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset(NewBuiltinFilterPolicyWithBits<BloomFilterPolicy>(uri)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(BloomFilterPolicy::kClassName()) + .AnotherName(BloomFilterPolicy::kNickName()) + .AddSuffix(":false"), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset(NewBuiltinFilterPolicyWithBits<BloomFilterPolicy>(uri)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(BloomFilterPolicy::kClassName()) + .AnotherName(BloomFilterPolicy::kNickName()) + .AddSuffix(":true"), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + const std::vector<std::string> vals = StringSplit(uri, ':'); + double bits_per_key = ParseDouble(vals[1]); + // NOTE: This case previously configured the deprecated block-based + // filter, but old ways of configuring that now map to full filter. We + // defer to the corresponding API to ensure consistency in case that + // change is reverted. + guard->reset(NewBloomFilterPolicy(bits_per_key, true)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(RibbonFilterPolicy::kClassName()) + .AnotherName(RibbonFilterPolicy::kNickName()), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + const std::vector<std::string> vals = StringSplit(uri, ':'); + double bits_per_key = ParseDouble(vals[1]); + guard->reset(NewRibbonFilterPolicy(bits_per_key)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(RibbonFilterPolicy::kClassName()) + .AnotherName(RibbonFilterPolicy::kNickName()) + .AddNumber(":", true), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + const std::vector<std::string> vals = StringSplit(uri, ':'); + double bits_per_key = ParseDouble(vals[1]); + int bloom_before_level = ParseInt(vals[2]); + guard->reset(NewRibbonFilterPolicy(bits_per_key, bloom_before_level)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits(test::LegacyBloomFilterPolicy::kClassName()), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset( + NewBuiltinFilterPolicyWithBits<test::LegacyBloomFilterPolicy>(uri)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits( + test::FastLocalBloomFilterPolicy::kClassName()), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset( + NewBuiltinFilterPolicyWithBits<test::FastLocalBloomFilterPolicy>( + uri)); + return guard->get(); + }); + library.AddFactory<const FilterPolicy>( + FilterPatternEntryWithBits( + test::Standard128RibbonFilterPolicy::kClassName()), + [](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard, + std::string* /* errmsg */) { + guard->reset( + NewBuiltinFilterPolicyWithBits<test::Standard128RibbonFilterPolicy>( + uri)); + return guard->get(); + }); + size_t num_types; + return static_cast<int>(library.GetFactoryCount(&num_types)); +} +} // namespace +#endif // ROCKSDB_LITE + +Status FilterPolicy::CreateFromString( + const ConfigOptions& options, const std::string& value, + std::shared_ptr<const FilterPolicy>* policy) { + if (value == kNullptrString || value.empty()) { + policy->reset(); + return Status::OK(); + } else if (value == ReadOnlyBuiltinFilterPolicy::kClassName()) { + *policy = std::make_shared<ReadOnlyBuiltinFilterPolicy>(); + return Status::OK(); + } + + std::string id; + std::unordered_map<std::string, std::string> opt_map; + Status status = + Customizable::GetOptionsMap(options, policy->get(), value, &id, &opt_map); + if (!status.ok()) { // GetOptionsMap failed + return status; + } else if (id.empty()) { // We have no Id but have options. Not good + return Status::NotSupported("Cannot reset object ", id); + } else { +#ifndef ROCKSDB_LITE + static std::once_flag loaded; + std::call_once(loaded, [&]() { + RegisterBuiltinFilterPolicies(*(ObjectLibrary::Default().get()), ""); + }); + status = options.registry->NewSharedObject(id, policy); +#else + status = + Status::NotSupported("Cannot load filter policy in LITE mode ", value); +#endif // ROCKSDB_LITE + } + if (options.ignore_unsupported_options && status.IsNotSupported()) { + return Status::OK(); + } else if (status.ok()) { + status = Customizable::ConfigureNewObject( + options, const_cast<FilterPolicy*>(policy->get()), opt_map); + } + return status; +} + +const std::vector<std::string>& BloomLikeFilterPolicy::GetAllFixedImpls() { + STATIC_AVOID_DESTRUCTION(std::vector<std::string>, impls){ + // Match filter_bench -impl=x ordering + test::LegacyBloomFilterPolicy::kClassName(), + test::FastLocalBloomFilterPolicy::kClassName(), + test::Standard128RibbonFilterPolicy::kClassName(), + }; + return impls; +} + +} // namespace ROCKSDB_NAMESPACE |