summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/block_based/partitioned_filter_block.h
blob: 955b50739636cf021ebc3c62810edd1359018dd2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <deque>
#include <list>
#include <string>
#include <unordered_map>

#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "table/block_based/block.h"
#include "table/block_based/filter_block_reader_common.h"
#include "table/block_based/full_filter_block.h"
#include "table/block_based/index_builder.h"
#include "util/autovector.h"
#include "util/hash_containers.h"

namespace ROCKSDB_NAMESPACE {
class InternalKeyComparator;

class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
 public:
  explicit PartitionedFilterBlockBuilder(
      const SliceTransform* prefix_extractor, bool whole_key_filtering,
      FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
      const bool use_value_delta_encoding,
      PartitionedIndexBuilder* const p_index_builder,
      const uint32_t partition_size);

  virtual ~PartitionedFilterBlockBuilder();

  void AddKey(const Slice& key) override;
  void Add(const Slice& key) override;
  size_t EstimateEntriesAdded() override;

  virtual Slice Finish(
      const BlockHandle& last_partition_block_handle, Status* status,
      std::unique_ptr<const char[]>* filter_data = nullptr) override;

  virtual void ResetFilterBitsBuilder() override {
    // Previously constructed partitioned filters by
    // this to-be-reset FiterBitsBuilder can also be
    // cleared
    filters.clear();
    FullFilterBlockBuilder::ResetFilterBitsBuilder();
  }

  // For PartitionFilter, optional post-verifing the filter is done
  // as part of PartitionFilterBlockBuilder::Finish
  // to avoid implementation complexity of doing it elsewhere.
  // Therefore we are skipping it in here.
  virtual Status MaybePostVerifyFilter(
      const Slice& /* filter_content */) override {
    return Status::OK();
  }

 private:
  // Filter data
  BlockBuilder index_on_filter_block_builder_;  // top-level index builder
  BlockBuilder
      index_on_filter_block_builder_without_seq_;  // same for user keys
  struct FilterEntry {
    std::string key;
    std::unique_ptr<const char[]> filter_data;
    Slice filter;
  };
  std::deque<FilterEntry> filters;  // list of partitioned filters and keys used
                                    // in building the index

  // Set to the first non-okay status if any of the filter
  // partitions experiences construction error.
  // If partitioned_filters_construction_status_ is non-okay,
  // then the whole partitioned filters should not be used.
  Status partitioned_filters_construction_status_;
  std::string last_filter_entry_key;
  std::unique_ptr<const char[]> last_filter_data;
  std::unique_ptr<IndexBuilder> value;
  bool finishing_filters =
      false;  // true if Finish is called once but not complete yet.
  // The policy of when cut a filter block and Finish it
  void MaybeCutAFilterBlock(const Slice* next_key);
  // Currently we keep the same number of partitions for filters and indexes.
  // This would allow for some potentioal optimizations in future. If such
  // optimizations did not realize we can use different number of partitions and
  // eliminate p_index_builder_
  PartitionedIndexBuilder* const p_index_builder_;
  // The desired number of keys per partition
  uint32_t keys_per_partition_;
  // The number of keys added to the last partition so far
  uint32_t keys_added_to_partition_;
  // According to the bits builders, how many keys/prefixes added
  // in all the filters we have fully built
  uint64_t total_added_in_built_;
  BlockHandle last_encoded_handle_;
};

class PartitionedFilterBlockReader : public FilterBlockReaderCommon<Block> {
 public:
  PartitionedFilterBlockReader(const BlockBasedTable* t,
                               CachableEntry<Block>&& filter_block);

  static std::unique_ptr<FilterBlockReader> Create(
      const BlockBasedTable* table, const ReadOptions& ro,
      FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
      bool pin, BlockCacheLookupContext* lookup_context);

  bool KeyMayMatch(const Slice& key, const bool no_io,
                   const Slice* const const_ikey_ptr, GetContext* get_context,
                   BlockCacheLookupContext* lookup_context,
                   Env::IOPriority rate_limiter_priority) override;
  void KeysMayMatch(MultiGetRange* range, const bool no_io,
                    BlockCacheLookupContext* lookup_context,
                    Env::IOPriority rate_limiter_priority) override;

  bool PrefixMayMatch(const Slice& prefix, const bool no_io,
                      const Slice* const const_ikey_ptr,
                      GetContext* get_context,
                      BlockCacheLookupContext* lookup_context,
                      Env::IOPriority rate_limiter_priority) override;
  void PrefixesMayMatch(MultiGetRange* range,
                        const SliceTransform* prefix_extractor,
                        const bool no_io,
                        BlockCacheLookupContext* lookup_context,
                        Env::IOPriority rate_limiter_priority) override;

  size_t ApproximateMemoryUsage() const override;

 private:
  BlockHandle GetFilterPartitionHandle(const CachableEntry<Block>& filter_block,
                                       const Slice& entry) const;
  Status GetFilterPartitionBlock(
      FilePrefetchBuffer* prefetch_buffer, const BlockHandle& handle,
      bool no_io, GetContext* get_context,
      BlockCacheLookupContext* lookup_context,
      Env::IOPriority rate_limiter_priority,
      CachableEntry<ParsedFullFilterBlock>* filter_block) const;

  using FilterFunction = bool (FullFilterBlockReader::*)(
      const Slice& slice, const bool no_io, const Slice* const const_ikey_ptr,
      GetContext* get_context, BlockCacheLookupContext* lookup_context,
      Env::IOPriority rate_limiter_priority);
  bool MayMatch(const Slice& slice, bool no_io, const Slice* const_ikey_ptr,
                GetContext* get_context,
                BlockCacheLookupContext* lookup_context,
                Env::IOPriority rate_limiter_priority,
                FilterFunction filter_function) const;
  using FilterManyFunction = void (FullFilterBlockReader::*)(
      MultiGetRange* range, const SliceTransform* prefix_extractor,
      const bool no_io, BlockCacheLookupContext* lookup_context,
      Env::IOPriority rate_limiter_priority);
  void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor,
                bool no_io, BlockCacheLookupContext* lookup_context,
                Env::IOPriority rate_limiter_priority,
                FilterManyFunction filter_function) const;
  void MayMatchPartition(MultiGetRange* range,
                         const SliceTransform* prefix_extractor,
                         BlockHandle filter_handle, bool no_io,
                         BlockCacheLookupContext* lookup_context,
                         Env::IOPriority rate_limiter_priority,
                         FilterManyFunction filter_function) const;
  Status CacheDependencies(const ReadOptions& ro, bool pin) override;

  const InternalKeyComparator* internal_comparator() const;
  bool index_key_includes_seq() const;
  bool index_value_is_full() const;

 protected:
  // For partition blocks pinned in cache. Can be a subset of blocks
  // in case some fail insertion on attempt to pin.
  UnorderedMap<uint64_t, CachableEntry<ParsedFullFilterBlock>> filter_map_;
};

}  // namespace ROCKSDB_NAMESPACE