1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "table/block_based_filter_block.h"
#include "table/full_filter_bits_builder.h"
#include "table/full_filter_block.h"
#include "util/coding.h"
#include "util/hash.h"
namespace rocksdb {
class BlockBasedFilterBlockBuilder;
class FullFilterBlockBuilder;
FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key,
const size_t num_probes)
: bits_per_key_(bits_per_key), num_probes_(num_probes) {
assert(bits_per_key_);
}
FullFilterBitsBuilder::~FullFilterBitsBuilder() {}
void FullFilterBitsBuilder::AddKey(const Slice& key) {
uint32_t hash = BloomHash(key);
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
uint32_t total_bits, num_lines;
char* data = ReserveSpace(static_cast<int>(hash_entries_.size()),
&total_bits, &num_lines);
assert(data);
if (total_bits != 0 && num_lines != 0) {
for (auto h : hash_entries_) {
AddHash(h, data, num_lines, total_bits);
}
}
data[total_bits/8] = static_cast<char>(num_probes_);
EncodeFixed32(data + total_bits/8 + 1, static_cast<uint32_t>(num_lines));
const char* const_data = data;
buf->reset(const_data);
hash_entries_.clear();
return Slice(data, total_bits / 8 + 5);
}
uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
uint32_t num_lines =
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
// Make num_lines an odd number to make sure more bits are involved
// when determining which block.
if (num_lines % 2 == 0) {
num_lines++;
}
return num_lines * (CACHE_LINE_SIZE * 8);
}
uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
assert(bits_per_key_);
if (num_entry != 0) {
uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
assert(*total_bits > 0 && *total_bits % 8 == 0);
} else {
// filter is empty, just leave space for metadata
*total_bits = 0;
*num_lines = 0;
}
// Reserve space for Filter
uint32_t sz = *total_bits / 8;
sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
return sz;
}
char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
char* data = new char[sz];
memset(data, 0, sz);
return data;
}
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
assert(bits_per_key_);
assert(space > 0);
uint32_t dont_care1, dont_care2;
int high = (int) (space * 8 / bits_per_key_ + 1);
int low = 1;
int n = high;
for (; n >= low; n--) {
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
if (sz <= space) {
break;
}
}
assert(n < high); // High should be an overestimation
return n;
}
inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
uint32_t num_lines, uint32_t total_bits) {
#ifdef NDEBUG
(void)total_bits;
#endif
assert(num_lines > 0 && total_bits > 0);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < num_probes_; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
namespace {
class FullFilterBitsReader : public FilterBitsReader {
public:
explicit FullFilterBitsReader(const Slice& contents)
: data_(const_cast<char*>(contents.data())),
data_len_(static_cast<uint32_t>(contents.size())),
num_probes_(0),
num_lines_(0),
log2_cache_line_size_(0) {
assert(data_);
GetFilterMeta(contents, &num_probes_, &num_lines_);
// Sanitize broken parameter
if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) {
num_lines_ = 0;
num_probes_ = 0;
} else if (num_lines_ != 0) {
while (true) {
uint32_t num_lines_at_curr_cache_size =
(data_len_ - 5) >> log2_cache_line_size_;
if (num_lines_at_curr_cache_size == 0) {
// The cache line size seems not a power of two. It's not supported
// and indicates a corruption so disable using this filter.
assert(false);
num_lines_ = 0;
num_probes_ = 0;
break;
}
if (num_lines_at_curr_cache_size == num_lines_) {
break;
}
++log2_cache_line_size_;
}
}
}
~FullFilterBitsReader() override {}
bool MayMatch(const Slice& entry) override {
if (data_len_ <= 5) { // remain same with original filter
return false;
}
// Other Error params, including a broken filter, regarded as match
if (num_probes_ == 0 || num_lines_ == 0) return true;
uint32_t hash = BloomHash(entry);
return HashMayMatch(hash, Slice(data_, data_len_),
num_probes_, num_lines_);
}
private:
// Filter meta data
char* data_;
uint32_t data_len_;
size_t num_probes_;
uint32_t num_lines_;
uint32_t log2_cache_line_size_;
// Get num_probes, and num_lines from filter
// If filter format broken, set both to 0.
void GetFilterMeta(const Slice& filter, size_t* num_probes,
uint32_t* num_lines);
// "filter" contains the data appended by a preceding call to
// FilterBitsBuilder::Finish. This method must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
//
// hash: target to be checked
// filter: the whole filter, including meta data bytes
// num_probes: number of probes, read before hand
// num_lines: filter metadata, read before hand
// Before calling this function, need to ensure the input meta data
// is valid.
bool HashMayMatch(const uint32_t& hash, const Slice& filter,
const size_t& num_probes, const uint32_t& num_lines);
// No Copy allowed
FullFilterBitsReader(const FullFilterBitsReader&);
void operator=(const FullFilterBitsReader&);
};
void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
size_t* num_probes, uint32_t* num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) {
// filter is empty or broken
*num_probes = 0;
*num_lines = 0;
return;
}
*num_probes = filter.data()[len - 5];
*num_lines = DecodeFixed32(filter.data() + len - 4);
}
bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
const Slice& filter, const size_t& num_probes,
const uint32_t& num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return false; // remain the same with original filter
// It is ensured the params are valid before calling it
assert(num_probes != 0);
assert(num_lines != 0 && (len - 5) % num_lines == 0);
const char* data = filter.data();
uint32_t h = hash;
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
// Left shift by an extra 3 to convert bytes to bits
uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3);
PREFETCH(&data[b / 8], 0 /* rw */, 1 /* locality */);
PREFETCH(&data[b / 8 + (1 << log2_cache_line_size_) - 1], 0 /* rw */,
1 /* locality */);
for (uint32_t i = 0; i < num_probes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h & ((1 << (log2_cache_line_size_ + 3)) - 1));
if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
// An implementation of filter policy
class BloomFilterPolicy : public FilterPolicy {
public:
explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder)
: bits_per_key_(bits_per_key), hash_func_(BloomHash),
use_block_based_builder_(use_block_based_builder) {
initialize();
}
~BloomFilterPolicy() override {}
const char* Name() const override { return "rocksdb.BuiltinBloomFilter"; }
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
// Compute bloom filter size (in both bits and bytes)
size_t bits = n * bits_per_key_;
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8;
bits = bytes * 8;
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes
char* array = &(*dst)[init_size];
for (size_t i = 0; i < (size_t)n; i++) {
// Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006].
uint32_t h = hash_func_(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < num_probes_; j++) {
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
}
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
const size_t len = bloom_filter.size();
if (len < 2) return false;
const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8;
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
const size_t k = array[len-1];
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
uint32_t h = hash_func_(key);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
}
FilterBitsBuilder* GetFilterBitsBuilder() const override {
if (use_block_based_builder_) {
return nullptr;
}
return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
}
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
return new FullFilterBitsReader(contents);
}
// If choose to use block based builder
bool UseBlockBasedBuilder() { return use_block_based_builder_; }
private:
size_t bits_per_key_;
size_t num_probes_;
uint32_t (*hash_func_)(const Slice& key);
const bool use_block_based_builder_;
void initialize() {
// We intentionally round down to reduce probing cost a little bit
num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
if (num_probes_ < 1) num_probes_ = 1;
if (num_probes_ > 30) num_probes_ = 30;
}
};
} // namespace
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
bool use_block_based_builder) {
return new BloomFilterPolicy(bits_per_key, use_block_based_builder);
}
} // namespace rocksdb
|