summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/block_based/filter_policy_internal.h
blob: 2ca9dc8595fc263ac98f786b2315fc336287b019 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#pragma once

#include <atomic>
#include <memory>
#include <string>
#include <vector>

#include "rocksdb/filter_policy.h"
#include "rocksdb/table.h"

namespace ROCKSDB_NAMESPACE {

class Slice;

// Exposes any extra information needed for testing built-in
// FilterBitsBuilders
class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
 public:
  // Calculate number of bytes needed for a new filter, including
  // metadata. Passing the result to CalculateNumEntry should
  // return >= the num_entry passed in.
  virtual uint32_t CalculateSpace(const int num_entry) = 0;

  // Returns an estimate of the FP rate of the returned filter if
  // `keys` keys are added and the filter returned by Finish is `bytes`
  // bytes.
  virtual double EstimatedFpRate(size_t keys, size_t bytes) = 0;
};

// RocksDB built-in filter policy for Bloom or Bloom-like filters.
// This class is considered internal API and subject to change.
// See NewBloomFilterPolicy.
class BloomFilterPolicy : public FilterPolicy {
 public:
  // An internal marker for operating modes of BloomFilterPolicy, in terms
  // of selecting an implementation. This makes it easier for tests to track
  // or to walk over the built-in set of Bloom filter implementations. The
  // only variance in BloomFilterPolicy by mode/implementation is in
  // GetFilterBitsBuilder(), so an enum is practical here vs. subclasses.
  //
  // This enum is essentially the union of all the different kinds of return
  // value from GetFilterBitsBuilder, or "underlying implementation", and
  // higher-level modes that choose an underlying implementation based on
  // context information.
  enum Mode {
    // Legacy implementation of Bloom filter for full and partitioned filters.
    // Set to 0 in case of value confusion with bool use_block_based_builder
    // NOTE: TESTING ONLY as this mode does not use best compatible
    // implementation
    kLegacyBloom = 0,
    // Deprecated block-based Bloom filter implementation.
    // Set to 1 in case of value confusion with bool use_block_based_builder
    // NOTE: DEPRECATED but user exposed
    kDeprecatedBlock = 1,
    // A fast, cache-local Bloom filter implementation. See description in
    // FastLocalBloomImpl.
    // NOTE: TESTING ONLY as this mode does not check format_version
    kFastLocalBloom = 2,
    // Automatically choose from the above (except kDeprecatedBlock) based on
    // context at build time, including compatibility with format_version.
    // NOTE: This is currently the only recommended mode that is user exposed.
    kAuto = 100,
  };
  // All the different underlying implementations that a BloomFilterPolicy
  // might use, as a mode that says "always use this implementation."
  // Only appropriate for unit tests.
  static const std::vector<Mode> kAllFixedImpls;

  // All the different modes of BloomFilterPolicy that are exposed from
  // user APIs. Only appropriate for higher-level unit tests. Integration
  // tests should prefer using NewBloomFilterPolicy (user-exposed).
  static const std::vector<Mode> kAllUserModes;

  explicit BloomFilterPolicy(double bits_per_key, Mode mode);

  ~BloomFilterPolicy() override;

  const char* Name() const override;

  // Deprecated block-based filter only
  void CreateFilter(const Slice* keys, int n, std::string* dst) const override;

  // Deprecated block-based filter only
  bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override;

  FilterBitsBuilder* GetFilterBitsBuilder() const override;

  // To use this function, call GetBuilderFromContext().
  //
  // Neither the context nor any objects therein should be saved beyond
  // the call to this function, unless it's shared_ptr.
  FilterBitsBuilder* GetBuilderWithContext(
      const FilterBuildingContext&) const override;

  // Returns a new FilterBitsBuilder from the filter_policy in
  // table_options of a context, or nullptr if not applicable.
  // (An internal convenience function to save boilerplate.)
  static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);

  // Read metadata to determine what kind of FilterBitsReader is needed
  // and return a new one. This must successfully process any filter data
  // generated by a built-in FilterBitsBuilder, regardless of the impl
  // chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
  FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;

  // Essentially for testing only: configured millibits/key
  int GetMillibitsPerKey() const { return millibits_per_key_; }
  // Essentially for testing only: legacy whole bits/key
  int GetWholeBitsPerKey() const { return whole_bits_per_key_; }

 private:
  // Newer filters support fractional bits per key. For predictable behavior
  // of 0.001-precision values across floating point implementations, we
  // round to thousandths of a bit (on average) per key.
  int millibits_per_key_;

  // Older filters round to whole number bits per key. (There *should* be no
  // compatibility issue with fractional bits per key, but preserving old
  // behavior with format_version < 5 just in case.)
  int whole_bits_per_key_;

  // Selected mode (a specific implementation or way of selecting an
  // implementation) for building new SST filters.
  Mode mode_;

  // Whether relevant warnings have been logged already. (Remember so we
  // only report once per BloomFilterPolicy instance, to keep the noise down.)
  mutable std::atomic<bool> warned_;

  // For newer Bloom filter implementation(s)
  FilterBitsReader* GetBloomBitsReader(const Slice& contents) const;
};

}  // namespace ROCKSDB_NAMESPACE