summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/block_based/filter_policy_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/table/block_based/filter_policy_internal.h')
-rw-r--r--src/rocksdb/table/block_based/filter_policy_internal.h340
1 files changed, 340 insertions, 0 deletions
diff --git a/src/rocksdb/table/block_based/filter_policy_internal.h b/src/rocksdb/table/block_based/filter_policy_internal.h
new file mode 100644
index 000000000..9bc3a2482
--- /dev/null
+++ b/src/rocksdb/table/block_based/filter_policy_internal.h
@@ -0,0 +1,340 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/table.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A class that takes a bunch of keys, then generates filter
+class FilterBitsBuilder {
+ public:
+ virtual ~FilterBitsBuilder() {}
+
+ // Add a key (or prefix) to the filter. Typically, a builder will keep
+ // a set of 64-bit key hashes and only build the filter in Finish
+ // when the final number of keys is known. Keys are added in sorted order
+ // and duplicated keys are possible, so typically, the builder will
+ // only add this key if its hash is different from the most recently
+ // added.
+ virtual void AddKey(const Slice& key) = 0;
+
+ // Called by RocksDB before Finish to populate
+ // TableProperties::num_filter_entries, so should represent the
+ // number of unique keys (and/or prefixes) added, but does not have
+ // to be exact. `return 0;` may be used to conspicuously indicate "unknown".
+ virtual size_t EstimateEntriesAdded() = 0;
+
+ // Generate the filter using the keys that are added
+ // The return value of this function would be the filter bits,
+ // The ownership of actual data is set to buf
+ virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
+
+ // Similar to Finish(std::unique_ptr<const char[]>* buf), except that
+ // for a non-null status pointer argument, it will point to
+ // Status::Corruption() when there is any corruption during filter
+ // construction or Status::OK() otherwise.
+ //
+ // WARNING: do not use a filter resulted from a corrupted construction
+ // TODO: refactor this to have a better signature, consolidate
+ virtual Slice Finish(std::unique_ptr<const char[]>* buf,
+ Status* /* status */) {
+ return Finish(buf);
+ }
+
+ // Verify the filter returned from calling FilterBitsBuilder::Finish.
+ // The function returns Status::Corruption() if there is any corruption in the
+ // constructed filter or Status::OK() otherwise.
+ //
+ // Implementations should normally consult
+ // FilterBuildingContext::table_options.detect_filter_construct_corruption
+ // to determine whether to perform verification or to skip by returning
+ // Status::OK(). The decision is left to the FilterBitsBuilder so that
+ // verification prerequisites before PostVerify can be skipped when not
+ // configured.
+ //
+ // RocksDB internal will always call MaybePostVerify() on the filter after
+ // it is returned from calling FilterBitsBuilder::Finish
+ // except for FilterBitsBuilder::Finish resulting a corruption
+ // status, which indicates the filter is already in a corrupted state and
+ // there is no need to post-verify
+ virtual Status MaybePostVerify(const Slice& /* filter_content */) {
+ return Status::OK();
+ }
+
+ // Approximate the number of keys that can be added and generate a filter
+ // <= the specified number of bytes. Callers (including RocksDB) should
+ // only use this result for optimizing performance and not as a guarantee.
+ virtual size_t ApproximateNumEntries(size_t bytes) = 0;
+};
+
+// A class that checks if a key can be in filter
+// It should be initialized by Slice generated by BitsBuilder
+class FilterBitsReader {
+ public:
+ virtual ~FilterBitsReader() {}
+
+ // Check if the entry match the bits in filter
+ virtual bool MayMatch(const Slice& entry) = 0;
+
+ // Check if an array of entries match the bits in filter
+ virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
+ for (int i = 0; i < num_keys; ++i) {
+ may_match[i] = MayMatch(*keys[i]);
+ }
+ }
+};
+
+// Exposes any extra information needed for testing built-in
+// FilterBitsBuilders
+class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
+ public:
+ // Calculate number of bytes needed for a new filter, including
+ // metadata. Passing the result to ApproximateNumEntries should
+ // (ideally, usually) return >= the num_entry passed in.
+ // When optimize_filters_for_memory is enabled, this function
+ // is not authoritative but represents a target size that should
+ // be close to the average size.
+ virtual size_t CalculateSpace(size_t num_entries) = 0;
+
+ // Returns an estimate of the FP rate of the returned filter if
+ // `num_entries` keys are added and the filter returned by Finish
+ // is `bytes` bytes.
+ virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
+};
+
+// Base class for RocksDB built-in filter reader with
+// extra useful functionalities for inernal.
+class BuiltinFilterBitsReader : public FilterBitsReader {
+ public:
+ // Check if the hash of the entry match the bits in filter
+ virtual bool HashMayMatch(const uint64_t /* h */) { return true; }
+};
+
+// Base class for RocksDB built-in filter policies. This provides the
+// ability to read all kinds of built-in filters (so that old filters can
+// be used even when you change between built-in policies).
+class BuiltinFilterPolicy : public FilterPolicy {
+ public: // overrides
+ // Read metadata to determine what kind of FilterBitsReader is needed
+ // and return a new one. This must successfully process any filter data
+ // generated by a built-in FilterBitsBuilder, regardless of the impl
+ // chosen for this BloomFilterPolicy.
+ FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
+ static const char* kClassName();
+ bool IsInstanceOf(const std::string& id) const override;
+ // All variants of BuiltinFilterPolicy can read each others filters.
+ const char* CompatibilityName() const override;
+ static const char* kCompatibilityName();
+
+ public: // new
+ // An internal function for the implementation of
+ // BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance
+ // or working around potential virtual overrides.
+ static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
+ const Slice& contents);
+
+ // Returns a new FilterBitsBuilder from the filter_policy in
+ // table_options of a context, or nullptr if not applicable.
+ // (An internal convenience function to save boilerplate.)
+ static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
+
+ private:
+ // For Bloom filter implementation(s)
+ static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents);
+
+ // For Ribbon filter implementation(s)
+ static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents);
+};
+
+// A "read only" filter policy used for backward compatibility with old
+// OPTIONS files, which did not specifying a Bloom configuration, just
+// "rocksdb.BuiltinBloomFilter". Although this can read existing filters,
+// this policy does not build new filters, so new SST files generated
+// under the policy will get no filters (like nullptr FilterPolicy).
+// This class is considered internal API and subject to change.
+class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy {
+ public:
+ const char* Name() const override { return kClassName(); }
+ static const char* kClassName();
+
+ // Does not write filters.
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext&) const override {
+ return nullptr;
+ }
+};
+
+// RocksDB built-in filter policy for Bloom or Bloom-like filters including
+// Ribbon filters.
+// This class is considered internal API and subject to change.
+// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
+class BloomLikeFilterPolicy : public BuiltinFilterPolicy {
+ public:
+ explicit BloomLikeFilterPolicy(double bits_per_key);
+
+ ~BloomLikeFilterPolicy() override;
+ static const char* kClassName();
+ bool IsInstanceOf(const std::string& id) const override;
+
+ std::string GetId() const override;
+
+ // Essentially for testing only: configured millibits/key
+ int GetMillibitsPerKey() const { return millibits_per_key_; }
+ // Essentially for testing only: legacy whole bits/key
+ int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
+
+ // All the different underlying implementations that a BloomLikeFilterPolicy
+ // might use, as a configuration string name for a testing mode for
+ // "always use this implementation." Only appropriate for unit tests.
+ static const std::vector<std::string>& GetAllFixedImpls();
+
+ // Convenience function for creating by name for fixed impls
+ static std::shared_ptr<const FilterPolicy> Create(const std::string& name,
+ double bits_per_key);
+
+ protected:
+ // Some implementations used by aggregating policies
+ FilterBitsBuilder* GetLegacyBloomBuilderWithContext(
+ const FilterBuildingContext& context) const;
+ FilterBitsBuilder* GetFastLocalBloomBuilderWithContext(
+ const FilterBuildingContext& context) const;
+ FilterBitsBuilder* GetStandard128RibbonBuilderWithContext(
+ const FilterBuildingContext& context) const;
+
+ std::string GetBitsPerKeySuffix() const;
+
+ private:
+ // Bits per key settings are for configuring Bloom filters.
+
+ // Newer filters support fractional bits per key. For predictable behavior
+ // of 0.001-precision values across floating point implementations, we
+ // round to thousandths of a bit (on average) per key.
+ int millibits_per_key_;
+
+ // Older filters round to whole number bits per key. (There *should* be no
+ // compatibility issue with fractional bits per key, but preserving old
+ // behavior with format_version < 5 just in case.)
+ int whole_bits_per_key_;
+
+ // For configuring Ribbon filter: a desired value for 1/fp_rate. For
+ // example, 100 -> 1% fp rate.
+ double desired_one_in_fp_rate_;
+
+ // Whether relevant warnings have been logged already. (Remember so we
+ // only report once per BloomFilterPolicy instance, to keep the noise down.)
+ mutable std::atomic<bool> warned_;
+
+ // State for implementing optimize_filters_for_memory. Essentially, this
+ // tracks a surplus or deficit in total FP rate of filters generated by
+ // builders under this policy vs. what would have been generated without
+ // optimize_filters_for_memory.
+ //
+ // To avoid floating point weirdness, the actual value is
+ // Sum over all generated filters f:
+ // (predicted_fp_rate(f) - predicted_fp_rate(f|o_f_f_m=false)) * 2^32
+ mutable std::atomic<int64_t> aggregate_rounding_balance_;
+};
+
+// For NewBloomFilterPolicy
+//
+// This is a user-facing policy that automatically choose between
+// LegacyBloom and FastLocalBloom based on context at build time,
+// including compatibility with format_version.
+class BloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+ explicit BloomFilterPolicy(double bits_per_key);
+
+ // To use this function, call BuiltinFilterPolicy::GetBuilderFromContext().
+ //
+ // Neither the context nor any objects therein should be saved beyond
+ // the call to this function, unless it's shared_ptr.
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext&) const override;
+
+ static const char* kClassName();
+ const char* Name() const override { return kClassName(); }
+ static const char* kNickName();
+ const char* NickName() const override { return kNickName(); }
+ std::string GetId() const override;
+};
+
+// For NewRibbonFilterPolicy
+//
+// This is a user-facing policy that chooses between Standard128Ribbon
+// and FastLocalBloom based on context at build time (LSM level and other
+// factors in extreme cases).
+class RibbonFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+ explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
+ int bloom_before_level);
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext&) const override;
+
+ int GetBloomBeforeLevel() const { return bloom_before_level_; }
+
+ static const char* kClassName();
+ const char* Name() const override { return kClassName(); }
+ static const char* kNickName();
+ const char* NickName() const override { return kNickName(); }
+ std::string GetId() const override;
+
+ private:
+ const int bloom_before_level_;
+};
+
+// For testing only, but always constructable with internal names
+namespace test {
+
+class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+ explicit LegacyBloomFilterPolicy(double bits_per_key)
+ : BloomLikeFilterPolicy(bits_per_key) {}
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext& context) const override;
+
+ static const char* kClassName();
+ const char* Name() const override { return kClassName(); }
+};
+
+class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+ explicit FastLocalBloomFilterPolicy(double bits_per_key)
+ : BloomLikeFilterPolicy(bits_per_key) {}
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext& context) const override;
+
+ static const char* kClassName();
+ const char* Name() const override { return kClassName(); }
+};
+
+class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy {
+ public:
+ explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key)
+ : BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {}
+
+ FilterBitsBuilder* GetBuilderWithContext(
+ const FilterBuildingContext& context) const override;
+
+ static const char* kClassName();
+ const char* Name() const override { return kClassName(); }
+};
+
+} // namespace test
+
+} // namespace ROCKSDB_NAMESPACE