diff options
Diffstat (limited to 'src/rocksdb/table/block_test.cc')
-rw-r--r-- | src/rocksdb/table/block_test.cc | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/src/rocksdb/table/block_test.cc b/src/rocksdb/table/block_test.cc new file mode 100644 index 00000000..3e0ff3ea --- /dev/null +++ b/src/rocksdb/table/block_test.cc @@ -0,0 +1,609 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +#include <stdio.h> +#include <algorithm> +#include <set> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "db/dbformat.h" +#include "db/memtable.h" +#include "db/write_batch_internal.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/iterator.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/table.h" +#include "table/block.h" +#include "table/block_builder.h" +#include "table/format.h" +#include "util/random.h" +#include "util/testharness.h" +#include "util/testutil.h" + +namespace rocksdb { + +static std::string RandomString(Random *rnd, int len) { + std::string r; + test::RandomString(rnd, len, &r); + return r; +} +std::string GenerateKey(int primary_key, int secondary_key, int padding_size, + Random *rnd) { + char buf[50]; + char *p = &buf[0]; + snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); + std::string k(p); + if (padding_size) { + k += RandomString(rnd, padding_size); + } + + return k; +} + +// Generate random key value pairs. +// The generated key will be sorted. You can tune the parameters to generated +// different kinds of test key/value pairs for different scenario. +void GenerateRandomKVs(std::vector<std::string> *keys, + std::vector<std::string> *values, const int from, + const int len, const int step = 1, + const int padding_size = 0, + const int keys_share_prefix = 1) { + Random rnd(302); + + // generate different prefix + for (int i = from; i < from + len; i += step) { + // generating keys that shares the prefix + for (int j = 0; j < keys_share_prefix; ++j) { + keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); + + // 100 bytes values + values->emplace_back(RandomString(&rnd, 100)); + } + } +} + +// Same as GenerateRandomKVs but the values are BlockHandle +void GenerateRandomKBHs(std::vector<std::string> *keys, + std::vector<BlockHandle> *values, const int from, + const int len, const int step = 1, + const int padding_size = 0, + const int keys_share_prefix = 1) { + Random rnd(302); + uint64_t offset = 0; + + // generate different prefix + for (int i = from; i < from + len; i += step) { + // generate keys that shares the prefix + for (int j = 0; j < keys_share_prefix; ++j) { + keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); + + uint64_t size = rnd.Uniform(1024 * 16); + BlockHandle handle(offset, size); + offset += size + kBlockTrailerSize; + values->emplace_back(handle); + } + } +} + +class BlockTest : public testing::Test {}; + +// block test +TEST_F(BlockTest, SimpleTest) { + Random rnd(301); + Options options = Options(); + std::unique_ptr<InternalKeyComparator> ic; + ic.reset(new test::PlainInternalKeyComparator(options.comparator)); + + std::vector<std::string> keys; + std::vector<std::string> values; + BlockBuilder builder(16); + int num_records = 100000; + + GenerateRandomKVs(&keys, &values, 0, num_records); + // add a bunch of records to a block + for (int i = 0; i < num_records; i++) { + builder.Add(keys[i], values[i]); + } + + // read serialized contents of the block + Slice rawblock = builder.Finish(); + + // create block reader + BlockContents contents; + contents.data = rawblock; + Block reader(std::move(contents), kDisableGlobalSequenceNumber); + + // read contents of block sequentially + int count = 0; + InternalIterator *iter = + reader.NewIterator<DataBlockIter>(options.comparator, options.comparator); + for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) { + // read kv from block + Slice k = iter->key(); + Slice v = iter->value(); + + // compare with lookaside array + ASSERT_EQ(k.ToString().compare(keys[count]), 0); + ASSERT_EQ(v.ToString().compare(values[count]), 0); + } + delete iter; + + // read block contents randomly + iter = + reader.NewIterator<DataBlockIter>(options.comparator, options.comparator); + for (int i = 0; i < num_records; i++) { + // find a random key in the lookaside array + int index = rnd.Uniform(num_records); + Slice k(keys[index]); + + // search in block for this key + iter->Seek(k); + ASSERT_TRUE(iter->Valid()); + Slice v = iter->value(); + ASSERT_EQ(v.ToString().compare(values[index]), 0); + } + delete iter; +} + +TEST_F(BlockTest, ValueDeltaEncodingTest) { + Random rnd(301); + Options options = Options(); + std::unique_ptr<InternalKeyComparator> ic; + ic.reset(new test::PlainInternalKeyComparator(options.comparator)); + + std::vector<std::string> keys; + std::vector<BlockHandle> values; + const bool kUseDeltaEncoding = true; + const bool kUseValueDeltaEncoding = true; + BlockBuilder builder(16, kUseDeltaEncoding, kUseValueDeltaEncoding); + int num_records = 100; + + GenerateRandomKBHs(&keys, &values, 0, num_records); + // add a bunch of records to a block + BlockHandle last_encoded_handle; + for (int i = 0; i < num_records; i++) { + auto block_handle = values[i]; + std::string handle_encoding; + block_handle.EncodeTo(&handle_encoding); + std::string handle_delta_encoding; + PutVarsignedint64(&handle_delta_encoding, + block_handle.size() - last_encoded_handle.size()); + last_encoded_handle = block_handle; + const Slice handle_delta_encoding_slice(handle_delta_encoding); + builder.Add(keys[i], handle_encoding, &handle_delta_encoding_slice); + } + + // read serialized contents of the block + Slice rawblock = builder.Finish(); + + // create block reader + BlockContents contents; + contents.data = rawblock; + Block reader(std::move(contents), kDisableGlobalSequenceNumber); + + const bool kTotalOrderSeek = true; + const bool kIncludesSeq = true; + const bool kValueIsFull = !kUseValueDeltaEncoding; + IndexBlockIter *kNullIter = nullptr; + Statistics *kNullStats = nullptr; + // read contents of block sequentially + int count = 0; + InternalIteratorBase<BlockHandle> *iter = reader.NewIterator<IndexBlockIter>( + options.comparator, options.comparator, kNullIter, kNullStats, + kTotalOrderSeek, kIncludesSeq, kValueIsFull); + for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) { + // read kv from block + Slice k = iter->key(); + BlockHandle handle = iter->value(); + + // compare with lookaside array + ASSERT_EQ(k.ToString().compare(keys[count]), 0); + + ASSERT_EQ(values[count].offset(), handle.offset()); + ASSERT_EQ(values[count].size(), handle.size()); + } + delete iter; + + // read block contents randomly + iter = reader.NewIterator<IndexBlockIter>( + options.comparator, options.comparator, kNullIter, kNullStats, + kTotalOrderSeek, kIncludesSeq, kValueIsFull); + for (int i = 0; i < num_records; i++) { + // find a random key in the lookaside array + int index = rnd.Uniform(num_records); + Slice k(keys[index]); + + // search in block for this key + iter->Seek(k); + ASSERT_TRUE(iter->Valid()); + BlockHandle handle = iter->value(); + ASSERT_EQ(values[index].offset(), handle.offset()); + ASSERT_EQ(values[index].size(), handle.size()); + } + delete iter; +} +// return the block contents +BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder, + const std::vector<std::string> &keys, + const std::vector<std::string> &values, + const int /*prefix_group_size*/ = 1) { + builder->reset(new BlockBuilder(1 /* restart interval */)); + + // Add only half of the keys + for (size_t i = 0; i < keys.size(); ++i) { + (*builder)->Add(keys[i], values[i]); + } + Slice rawblock = (*builder)->Finish(); + + BlockContents contents; + contents.data = rawblock; + + return contents; +} + +void CheckBlockContents(BlockContents contents, const int max_key, + const std::vector<std::string> &keys, + const std::vector<std::string> &values) { + const size_t prefix_size = 6; + // create block reader + BlockContents contents_ref(contents.data); + Block reader1(std::move(contents), kDisableGlobalSequenceNumber); + Block reader2(std::move(contents_ref), kDisableGlobalSequenceNumber); + + std::unique_ptr<const SliceTransform> prefix_extractor( + NewFixedPrefixTransform(prefix_size)); + + std::unique_ptr<InternalIterator> regular_iter( + reader2.NewIterator<DataBlockIter>(BytewiseComparator(), + BytewiseComparator())); + + // Seek existent keys + for (size_t i = 0; i < keys.size(); i++) { + regular_iter->Seek(keys[i]); + ASSERT_OK(regular_iter->status()); + ASSERT_TRUE(regular_iter->Valid()); + + Slice v = regular_iter->value(); + ASSERT_EQ(v.ToString().compare(values[i]), 0); + } + + // Seek non-existent keys. + // For hash index, if no key with a given prefix is not found, iterator will + // simply be set as invalid; whereas the binary search based iterator will + // return the one that is closest. + for (int i = 1; i < max_key - 1; i += 2) { + auto key = GenerateKey(i, 0, 0, nullptr); + regular_iter->Seek(key); + ASSERT_TRUE(regular_iter->Valid()); + } +} + +// In this test case, no two key share same prefix. +TEST_F(BlockTest, SimpleIndexHash) { + const int kMaxKey = 100000; + std::vector<std::string> keys; + std::vector<std::string> values; + GenerateRandomKVs(&keys, &values, 0 /* first key id */, + kMaxKey /* last key id */, 2 /* step */, + 8 /* padding size (8 bytes randomly generated suffix) */); + + std::unique_ptr<BlockBuilder> builder; + auto contents = GetBlockContents(&builder, keys, values); + + CheckBlockContents(std::move(contents), kMaxKey, keys, values); +} + +TEST_F(BlockTest, IndexHashWithSharedPrefix) { + const int kMaxKey = 100000; + // for each prefix, there will be 5 keys starts with it. + const int kPrefixGroup = 5; + std::vector<std::string> keys; + std::vector<std::string> values; + // Generate keys with same prefix. + GenerateRandomKVs(&keys, &values, 0, // first key id + kMaxKey, // last key id + 2, // step + 10, // padding size, + kPrefixGroup); + + std::unique_ptr<BlockBuilder> builder; + auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup); + + CheckBlockContents(std::move(contents), kMaxKey, keys, values); +} + +// A slow and accurate version of BlockReadAmpBitmap that simply store +// all the marked ranges in a set. +class BlockReadAmpBitmapSlowAndAccurate { + public: + void Mark(size_t start_offset, size_t end_offset) { + assert(end_offset >= start_offset); + marked_ranges_.emplace(end_offset, start_offset); + } + + void ResetCheckSequence() { iter_valid_ = false; } + + // Return true if any byte in this range was Marked + // This does linear search from the previous position. When calling + // multiple times, `offset` needs to be incremental to get correct results. + // Call ResetCheckSequence() to reset it. + bool IsPinMarked(size_t offset) { + if (iter_valid_) { + // Has existing iterator, try linear search from + // the iterator. + for (int i = 0; i < 64; i++) { + if (offset < iter_->second) { + return false; + } + if (offset <= iter_->first) { + return true; + } + + iter_++; + if (iter_ == marked_ranges_.end()) { + iter_valid_ = false; + return false; + } + } + } + // Initial call or have linear searched too many times. + // Do binary search. + iter_ = marked_ranges_.lower_bound( + std::make_pair(offset, static_cast<size_t>(0))); + if (iter_ == marked_ranges_.end()) { + iter_valid_ = false; + return false; + } + iter_valid_ = true; + return offset <= iter_->first && offset >= iter_->second; + } + + private: + std::set<std::pair<size_t, size_t>> marked_ranges_; + std::set<std::pair<size_t, size_t>>::iterator iter_; + bool iter_valid_ = false; +}; + +TEST_F(BlockTest, BlockReadAmpBitmap) { + uint32_t pin_offset = 0; + SyncPoint::GetInstance()->SetCallBack( + "BlockReadAmpBitmap:rnd", [&pin_offset](void *arg) { + pin_offset = *(static_cast<uint32_t *>(arg)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + std::vector<size_t> block_sizes = { + 1, // 1 byte + 32, // 32 bytes + 61, // 61 bytes + 64, // 64 bytes + 512, // 0.5 KB + 1024, // 1 KB + 1024 * 4, // 4 KB + 1024 * 10, // 10 KB + 1024 * 50, // 50 KB + 1024 * 1024 * 4, // 5 MB + 777, + 124653, + }; + const size_t kBytesPerBit = 64; + + Random rnd(301); + for (size_t block_size : block_sizes) { + std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics(); + BlockReadAmpBitmap read_amp_bitmap(block_size, kBytesPerBit, stats.get()); + BlockReadAmpBitmapSlowAndAccurate read_amp_slow_and_accurate; + + size_t needed_bits = (block_size / kBytesPerBit); + if (block_size % kBytesPerBit != 0) { + needed_bits++; + } + + ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), block_size); + + // Generate some random entries + std::vector<size_t> random_entry_offsets; + for (int i = 0; i < 1000; i++) { + random_entry_offsets.push_back(rnd.Next() % block_size); + } + std::sort(random_entry_offsets.begin(), random_entry_offsets.end()); + auto it = + std::unique(random_entry_offsets.begin(), random_entry_offsets.end()); + random_entry_offsets.resize( + std::distance(random_entry_offsets.begin(), it)); + + std::vector<std::pair<size_t, size_t>> random_entries; + for (size_t i = 0; i < random_entry_offsets.size(); i++) { + size_t entry_start = random_entry_offsets[i]; + size_t entry_end; + if (i + 1 < random_entry_offsets.size()) { + entry_end = random_entry_offsets[i + 1] - 1; + } else { + entry_end = block_size - 1; + } + random_entries.emplace_back(entry_start, entry_end); + } + + for (size_t i = 0; i < random_entries.size(); i++) { + read_amp_slow_and_accurate.ResetCheckSequence(); + auto ¤t_entry = random_entries[rnd.Next() % random_entries.size()]; + + read_amp_bitmap.Mark(static_cast<uint32_t>(current_entry.first), + static_cast<uint32_t>(current_entry.second)); + read_amp_slow_and_accurate.Mark(current_entry.first, + current_entry.second); + + size_t total_bits = 0; + for (size_t bit_idx = 0; bit_idx < needed_bits; bit_idx++) { + total_bits += read_amp_slow_and_accurate.IsPinMarked( + bit_idx * kBytesPerBit + pin_offset); + } + size_t expected_estimate_useful = total_bits * kBytesPerBit; + size_t got_estimate_useful = + stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); + ASSERT_EQ(expected_estimate_useful, got_estimate_useful); + } + } + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(BlockTest, BlockWithReadAmpBitmap) { + Random rnd(301); + Options options = Options(); + std::unique_ptr<InternalKeyComparator> ic; + ic.reset(new test::PlainInternalKeyComparator(options.comparator)); + + std::vector<std::string> keys; + std::vector<std::string> values; + BlockBuilder builder(16); + int num_records = 10000; + + GenerateRandomKVs(&keys, &values, 0, num_records, 1); + // add a bunch of records to a block + for (int i = 0; i < num_records; i++) { + builder.Add(keys[i], values[i]); + } + + Slice rawblock = builder.Finish(); + const size_t kBytesPerBit = 8; + + // Read the block sequentially using Next() + { + std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics(); + + // create block reader + BlockContents contents; + contents.data = rawblock; + Block reader(std::move(contents), kDisableGlobalSequenceNumber, + kBytesPerBit, stats.get()); + + // read contents of block sequentially + size_t read_bytes = 0; + DataBlockIter *iter = + static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>( + options.comparator, options.comparator, nullptr, stats.get())); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + iter->value(); + read_bytes += iter->TEST_CurrentEntrySize(); + + double semi_acc_read_amp = + static_cast<double>(read_bytes) / rawblock.size(); + double read_amp = static_cast<double>(stats->getTickerCount( + READ_AMP_ESTIMATE_USEFUL_BYTES)) / + stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); + + // Error in read amplification will be less than 1% if we are reading + // sequentially + double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; + EXPECT_LT(error_pct, 1); + } + + delete iter; + } + + // Read the block sequentially using Seek() + { + std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics(); + + // create block reader + BlockContents contents; + contents.data = rawblock; + Block reader(std::move(contents), kDisableGlobalSequenceNumber, + kBytesPerBit, stats.get()); + + size_t read_bytes = 0; + DataBlockIter *iter = + static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>( + options.comparator, options.comparator, nullptr, stats.get())); + for (int i = 0; i < num_records; i++) { + Slice k(keys[i]); + + // search in block for this key + iter->Seek(k); + iter->value(); + read_bytes += iter->TEST_CurrentEntrySize(); + + double semi_acc_read_amp = + static_cast<double>(read_bytes) / rawblock.size(); + double read_amp = static_cast<double>(stats->getTickerCount( + READ_AMP_ESTIMATE_USEFUL_BYTES)) / + stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); + + // Error in read amplification will be less than 1% if we are reading + // sequentially + double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; + EXPECT_LT(error_pct, 1); + } + delete iter; + } + + // Read the block randomly + { + std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics(); + + // create block reader + BlockContents contents; + contents.data = rawblock; + Block reader(std::move(contents), kDisableGlobalSequenceNumber, + kBytesPerBit, stats.get()); + + size_t read_bytes = 0; + DataBlockIter *iter = + static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>( + options.comparator, options.comparator, nullptr, stats.get())); + std::unordered_set<int> read_keys; + for (int i = 0; i < num_records; i++) { + int index = rnd.Uniform(num_records); + Slice k(keys[index]); + + iter->Seek(k); + iter->value(); + if (read_keys.find(index) == read_keys.end()) { + read_keys.insert(index); + read_bytes += iter->TEST_CurrentEntrySize(); + } + + double semi_acc_read_amp = + static_cast<double>(read_bytes) / rawblock.size(); + double read_amp = static_cast<double>(stats->getTickerCount( + READ_AMP_ESTIMATE_USEFUL_BYTES)) / + stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); + + double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; + // Error in read amplification will be less than 2% if we are reading + // randomly + EXPECT_LT(error_pct, 2); + } + delete iter; + } +} + +TEST_F(BlockTest, ReadAmpBitmapPow2) { + std::shared_ptr<Statistics> stats = rocksdb::CreateDBStatistics(); + ASSERT_EQ(BlockReadAmpBitmap(100, 1, stats.get()).GetBytesPerBit(), 1); + ASSERT_EQ(BlockReadAmpBitmap(100, 2, stats.get()).GetBytesPerBit(), 2); + ASSERT_EQ(BlockReadAmpBitmap(100, 4, stats.get()).GetBytesPerBit(), 4); + ASSERT_EQ(BlockReadAmpBitmap(100, 8, stats.get()).GetBytesPerBit(), 8); + ASSERT_EQ(BlockReadAmpBitmap(100, 16, stats.get()).GetBytesPerBit(), 16); + ASSERT_EQ(BlockReadAmpBitmap(100, 32, stats.get()).GetBytesPerBit(), 32); + + ASSERT_EQ(BlockReadAmpBitmap(100, 3, stats.get()).GetBytesPerBit(), 2); + ASSERT_EQ(BlockReadAmpBitmap(100, 7, stats.get()).GetBytesPerBit(), 4); + ASSERT_EQ(BlockReadAmpBitmap(100, 11, stats.get()).GetBytesPerBit(), 8); + ASSERT_EQ(BlockReadAmpBitmap(100, 17, stats.get()).GetBytesPerBit(), 16); + ASSERT_EQ(BlockReadAmpBitmap(100, 33, stats.get()).GetBytesPerBit(), 32); + ASSERT_EQ(BlockReadAmpBitmap(100, 35, stats.get()).GetBytesPerBit(), 32); +} + +} // namespace rocksdb + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} |