diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/rocksdb/util/bloom_test.cc | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/rocksdb/util/bloom_test.cc | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/src/rocksdb/util/bloom_test.cc b/src/rocksdb/util/bloom_test.cc new file mode 100644 index 00000000..4b25e9b6 --- /dev/null +++ b/src/rocksdb/util/bloom_test.cc @@ -0,0 +1,317 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef GFLAGS +#include <cstdio> +int main() { + fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); + return 0; +} +#else + +#include <vector> + +#include "rocksdb/filter_policy.h" +#include "table/full_filter_bits_builder.h" +#include "util/arena.h" +#include "util/gflags_compat.h" +#include "util/logging.h" +#include "util/testharness.h" +#include "util/testutil.h" + +using GFLAGS_NAMESPACE::ParseCommandLineFlags; + +DEFINE_int32(bits_per_key, 10, ""); + +namespace rocksdb { + +static const int kVerbose = 1; + +static Slice Key(int i, char* buffer) { + std::string s; + PutFixed32(&s, static_cast<uint32_t>(i)); + memcpy(buffer, s.c_str(), sizeof(i)); + return Slice(buffer, sizeof(i)); +} + +static int NextLength(int length) { + if (length < 10) { + length += 1; + } else if (length < 100) { + length += 10; + } else if (length < 1000) { + length += 100; + } else { + length += 1000; + } + return length; +} + +class BloomTest : public testing::Test { + private: + const FilterPolicy* policy_; + std::string filter_; + std::vector<std::string> keys_; + + public: + BloomTest() : policy_( + NewBloomFilterPolicy(FLAGS_bits_per_key)) {} + + ~BloomTest() override { delete policy_; } + + void Reset() { + keys_.clear(); + filter_.clear(); + } + + void Add(const Slice& s) { + keys_.push_back(s.ToString()); + } + + void Build() { + std::vector<Slice> key_slices; + for (size_t i = 0; i < keys_.size(); i++) { + key_slices.push_back(Slice(keys_[i])); + } + filter_.clear(); + policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()), + &filter_); + keys_.clear(); + if (kVerbose >= 2) DumpFilter(); + } + + size_t FilterSize() const { + return filter_.size(); + } + + void DumpFilter() { + fprintf(stderr, "F("); + for (size_t i = 0; i+1 < filter_.size(); i++) { + const unsigned int c = static_cast<unsigned int>(filter_[i]); + for (int j = 0; j < 8; j++) { + fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.'); + } + } + fprintf(stderr, ")\n"); + } + + bool Matches(const Slice& s) { + if (!keys_.empty()) { + Build(); + } + return policy_->KeyMayMatch(s, filter_); + } + + double FalsePositiveRate() { + char buffer[sizeof(int)]; + int result = 0; + for (int i = 0; i < 10000; i++) { + if (Matches(Key(i + 1000000000, buffer))) { + result++; + } + } + return result / 10000.0; + } +}; + +TEST_F(BloomTest, EmptyFilter) { + ASSERT_TRUE(! Matches("hello")); + ASSERT_TRUE(! Matches("world")); +} + +TEST_F(BloomTest, Small) { + Add("hello"); + Add("world"); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + ASSERT_TRUE(! Matches("x")); + ASSERT_TRUE(! Matches("foo")); +} + +TEST_F(BloomTest, VaryingLengths) { + char buffer[sizeof(int)]; + + // Count number of filters that significantly exceed the false positive rate + int mediocre_filters = 0; + int good_filters = 0; + + for (int length = 1; length <= 10000; length = NextLength(length)) { + Reset(); + for (int i = 0; i < length; i++) { + Add(Key(i, buffer)); + } + Build(); + + ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length; + + // All added keys must match + for (int i = 0; i < length; i++) { + ASSERT_TRUE(Matches(Key(i, buffer))) + << "Length " << length << "; key " << i; + } + + // Check false positive rate + double rate = FalsePositiveRate(); + if (kVerbose >= 1) { + fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", + rate*100.0, length, static_cast<int>(FilterSize())); + } + ASSERT_LE(rate, 0.02); // Must not be over 2% + if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often + else good_filters++; + } + if (kVerbose >= 1) { + fprintf(stderr, "Filters: %d good, %d mediocre\n", + good_filters, mediocre_filters); + } + ASSERT_LE(mediocre_filters, good_filters/5); +} + +// Different bits-per-byte + +class FullBloomTest : public testing::Test { + private: + const FilterPolicy* policy_; + std::unique_ptr<FilterBitsBuilder> bits_builder_; + std::unique_ptr<FilterBitsReader> bits_reader_; + std::unique_ptr<const char[]> buf_; + size_t filter_size_; + + public: + FullBloomTest() : + policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, false)), + filter_size_(0) { + Reset(); + } + + ~FullBloomTest() override { delete policy_; } + + FullFilterBitsBuilder* GetFullFilterBitsBuilder() { + return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get()); + } + + void Reset() { + bits_builder_.reset(policy_->GetFilterBitsBuilder()); + bits_reader_.reset(nullptr); + buf_.reset(nullptr); + filter_size_ = 0; + } + + void Add(const Slice& s) { + bits_builder_->AddKey(s); + } + + void Build() { + Slice filter = bits_builder_->Finish(&buf_); + bits_reader_.reset(policy_->GetFilterBitsReader(filter)); + filter_size_ = filter.size(); + } + + size_t FilterSize() const { + return filter_size_; + } + + bool Matches(const Slice& s) { + if (bits_reader_ == nullptr) { + Build(); + } + return bits_reader_->MayMatch(s); + } + + double FalsePositiveRate() { + char buffer[sizeof(int)]; + int result = 0; + for (int i = 0; i < 10000; i++) { + if (Matches(Key(i + 1000000000, buffer))) { + result++; + } + } + return result / 10000.0; + } +}; + +TEST_F(FullBloomTest, FilterSize) { + uint32_t dont_care1, dont_care2; + auto full_bits_builder = GetFullFilterBitsBuilder(); + for (int n = 1; n < 100; n++) { + auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2); + auto n2 = full_bits_builder->CalculateNumEntry(space); + ASSERT_GE(n2, n); + auto space2 = + full_bits_builder->CalculateSpace(n2, &dont_care1, &dont_care2); + ASSERT_EQ(space, space2); + } +} + +TEST_F(FullBloomTest, FullEmptyFilter) { + // Empty filter is not match, at this level + ASSERT_TRUE(!Matches("hello")); + ASSERT_TRUE(!Matches("world")); +} + +TEST_F(FullBloomTest, FullSmall) { + Add("hello"); + Add("world"); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + ASSERT_TRUE(!Matches("x")); + ASSERT_TRUE(!Matches("foo")); +} + +TEST_F(FullBloomTest, FullVaryingLengths) { + char buffer[sizeof(int)]; + + // Count number of filters that significantly exceed the false positive rate + int mediocre_filters = 0; + int good_filters = 0; + + for (int length = 1; length <= 10000; length = NextLength(length)) { + Reset(); + for (int i = 0; i < length; i++) { + Add(Key(i, buffer)); + } + Build(); + + ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 128 + 5)) << length; + + // All added keys must match + for (int i = 0; i < length; i++) { + ASSERT_TRUE(Matches(Key(i, buffer))) + << "Length " << length << "; key " << i; + } + + // Check false positive rate + double rate = FalsePositiveRate(); + if (kVerbose >= 1) { + fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", + rate*100.0, length, static_cast<int>(FilterSize())); + } + ASSERT_LE(rate, 0.02); // Must not be over 2% + if (rate > 0.0125) + mediocre_filters++; // Allowed, but not too often + else + good_filters++; + } + if (kVerbose >= 1) { + fprintf(stderr, "Filters: %d good, %d mediocre\n", + good_filters, mediocre_filters); + } + ASSERT_LE(mediocre_filters, good_filters/5); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ParseCommandLineFlags(&argc, &argv, true); + + return RUN_ALL_TESTS(); +} + +#endif // GFLAGS |