diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/table/table_test.cc | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/table/table_test.cc')
-rw-r--r-- | src/rocksdb/table/table_test.cc | 5596 |
1 files changed, 5596 insertions, 0 deletions
diff --git a/src/rocksdb/table/table_test.cc b/src/rocksdb/table/table_test.cc new file mode 100644 index 000000000..af9c177e8 --- /dev/null +++ b/src/rocksdb/table/table_test.cc @@ -0,0 +1,5596 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "rocksdb/table.h" + +#include <gtest/gtest.h> +#include <stddef.h> +#include <stdio.h> + +#include <algorithm> +#include <iostream> +#include <map> +#include <memory> +#include <string> +#include <unordered_set> +#include <vector> + +#include "cache/lru_cache.h" +#include "db/db_test_util.h" +#include "db/dbformat.h" +#include "db/memtable.h" +#include "db/write_batch_internal.h" +#include "memtable/stl_wrappers.h" +#include "monitoring/statistics.h" +#include "options/options_helper.h" +#include "port/port.h" +#include "port/stack_trace.h" +#include "rocksdb/cache.h" +#include "rocksdb/compression_type.h" +#include "rocksdb/convenience.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/file_checksum.h" +#include "rocksdb/file_system.h" +#include "rocksdb/filter_policy.h" +#include "rocksdb/iterator.h" +#include "rocksdb/memtablerep.h" +#include "rocksdb/options.h" +#include "rocksdb/perf_context.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/statistics.h" +#include "rocksdb/table_properties.h" +#include "rocksdb/trace_record.h" +#include "rocksdb/unique_id.h" +#include "rocksdb/write_buffer_manager.h" +#include "table/block_based/block.h" +#include "table/block_based/block_based_table_builder.h" +#include "table/block_based/block_based_table_factory.h" +#include "table/block_based/block_based_table_reader.h" +#include "table/block_based/block_builder.h" +#include "table/block_based/filter_policy_internal.h" +#include "table/block_based/flush_block_policy.h" +#include "table/block_fetcher.h" +#include "table/format.h" +#include "table/get_context.h" +#include "table/internal_iterator.h" +#include "table/meta_blocks.h" +#include "table/plain/plain_table_factory.h" +#include "table/scoped_arena_iterator.h" +#include "table/sst_file_writer_collectors.h" +#include "table/unique_id_impl.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/coding_lean.h" +#include "util/compression.h" +#include "util/file_checksum_helper.h" +#include "util/random.h" +#include "util/string_util.h" +#include "utilities/memory_allocators.h" +#include "utilities/merge_operators.h" + +namespace ROCKSDB_NAMESPACE { + +extern const uint64_t kLegacyBlockBasedTableMagicNumber; +extern const uint64_t kLegacyPlainTableMagicNumber; +extern const uint64_t kBlockBasedTableMagicNumber; +extern const uint64_t kPlainTableMagicNumber; + +namespace { + +const std::string kDummyValue(10000, 'o'); + +// DummyPropertiesCollector used to test BlockBasedTableProperties +class DummyPropertiesCollector : public TablePropertiesCollector { + public: + const char* Name() const override { return "DummyPropertiesCollector"; } + + Status Finish(UserCollectedProperties* /*properties*/) override { + return Status::OK(); + } + + Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override { + return Status::OK(); + } + + UserCollectedProperties GetReadableProperties() const override { + return UserCollectedProperties{}; + } +}; + +class DummyPropertiesCollectorFactory1 + : public TablePropertiesCollectorFactory { + public: + TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context /*context*/) override { + return new DummyPropertiesCollector(); + } + const char* Name() const override { + return "DummyPropertiesCollectorFactory1"; + } +}; + +class DummyPropertiesCollectorFactory2 + : public TablePropertiesCollectorFactory { + public: + TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context /*context*/) override { + return new DummyPropertiesCollector(); + } + const char* Name() const override { + return "DummyPropertiesCollectorFactory2"; + } +}; + +// Return reverse of "key". +// Used to test non-lexicographic comparators. +std::string Reverse(const Slice& key) { + auto rev = key.ToString(); + std::reverse(rev.begin(), rev.end()); + return rev; +} + +class ReverseKeyComparator : public Comparator { + public: + const char* Name() const override { + return "rocksdb.ReverseBytewiseComparator"; + } + + int Compare(const Slice& a, const Slice& b) const override { + return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); + } + + void FindShortestSeparator(std::string* start, + const Slice& limit) const override { + std::string s = Reverse(*start); + std::string l = Reverse(limit); + BytewiseComparator()->FindShortestSeparator(&s, l); + *start = Reverse(s); + } + + void FindShortSuccessor(std::string* key) const override { + std::string s = Reverse(*key); + BytewiseComparator()->FindShortSuccessor(&s); + *key = Reverse(s); + } +}; + +ReverseKeyComparator reverse_key_comparator; + +void Increment(const Comparator* cmp, std::string* key) { + if (cmp == BytewiseComparator()) { + key->push_back('\0'); + } else { + assert(cmp == &reverse_key_comparator); + std::string rev = Reverse(*key); + rev.push_back('\0'); + *key = Reverse(rev); + } +} + +const auto kUnknownColumnFamily = + TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; + +} // namespace + +// Helper class for tests to unify the interface between +// BlockBuilder/TableBuilder and Block/Table. +class Constructor { + public: + explicit Constructor(const Comparator* cmp) + : data_(stl_wrappers::LessOfComparator(cmp)) {} + virtual ~Constructor() {} + + void Add(const std::string& key, const Slice& value) { + data_[key] = value.ToString(); + } + + // Finish constructing the data structure with all the keys that have + // been added so far. Returns the keys in sorted order in "*keys" + // and stores the key/value pairs in "*kvmap" + void Finish(const Options& options, const ImmutableOptions& ioptions, + const MutableCFOptions& moptions, + const BlockBasedTableOptions& table_options, + const InternalKeyComparator& internal_comparator, + std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) { + last_internal_comparator_ = &internal_comparator; + *kvmap = data_; + keys->clear(); + for (const auto& kv : data_) { + keys->push_back(kv.first); + } + data_.clear(); + Status s = FinishImpl(options, ioptions, moptions, table_options, + internal_comparator, *kvmap); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + // Construct the data structure from the data in "data" + virtual Status FinishImpl(const Options& options, + const ImmutableOptions& ioptions, + const MutableCFOptions& moptions, + const BlockBasedTableOptions& table_options, + const InternalKeyComparator& internal_comparator, + const stl_wrappers::KVMap& data) = 0; + + virtual InternalIterator* NewIterator( + const SliceTransform* prefix_extractor = nullptr) const = 0; + + virtual const stl_wrappers::KVMap& data() { return data_; } + + virtual bool IsArenaMode() const { return false; } + + virtual DB* db() const { return nullptr; } // Overridden in DBConstructor + + virtual bool AnywayDeleteIterator() const { return false; } + + protected: + const InternalKeyComparator* last_internal_comparator_; + + private: + stl_wrappers::KVMap data_; +}; + +// A helper class that converts internal format keys into user keys +class KeyConvertingIterator : public InternalIterator { + public: + explicit KeyConvertingIterator(InternalIterator* iter, + bool arena_mode = false) + : iter_(iter), arena_mode_(arena_mode) {} + ~KeyConvertingIterator() override { + if (arena_mode_) { + iter_->~InternalIterator(); + } else { + delete iter_; + } + } + bool Valid() const override { return iter_->Valid() && status_.ok(); } + void Seek(const Slice& target) override { + ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); + std::string encoded; + AppendInternalKey(&encoded, ikey); + iter_->Seek(encoded); + } + void SeekForPrev(const Slice& target) override { + ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); + std::string encoded; + AppendInternalKey(&encoded, ikey); + iter_->SeekForPrev(encoded); + } + void SeekToFirst() override { iter_->SeekToFirst(); } + void SeekToLast() override { iter_->SeekToLast(); } + void Next() override { iter_->Next(); } + void Prev() override { iter_->Prev(); } + IterBoundCheck UpperBoundCheckResult() override { + return iter_->UpperBoundCheckResult(); + } + + Slice key() const override { + assert(Valid()); + ParsedInternalKey parsed_key; + Status pik_status = + ParseInternalKey(iter_->key(), &parsed_key, true /* log_err_key */); + if (!pik_status.ok()) { + status_ = pik_status; + return Slice(status_.getState()); + } + return parsed_key.user_key; + } + + Slice value() const override { return iter_->value(); } + Status status() const override { + return status_.ok() ? iter_->status() : status_; + } + + private: + mutable Status status_; + InternalIterator* iter_; + bool arena_mode_; + + // No copying allowed + KeyConvertingIterator(const KeyConvertingIterator&); + void operator=(const KeyConvertingIterator&); +}; + +// `BlockConstructor` APIs always accept/return user keys. +class BlockConstructor : public Constructor { + public: + explicit BlockConstructor(const Comparator* cmp) + : Constructor(cmp), comparator_(cmp), block_(nullptr) {} + ~BlockConstructor() override { delete block_; } + Status FinishImpl(const Options& /*options*/, + const ImmutableOptions& /*ioptions*/, + const MutableCFOptions& /*moptions*/, + const BlockBasedTableOptions& table_options, + const InternalKeyComparator& /*internal_comparator*/, + const stl_wrappers::KVMap& kv_map) override { + delete block_; + block_ = nullptr; + BlockBuilder builder(table_options.block_restart_interval); + + for (const auto& kv : kv_map) { + // `DataBlockIter` assumes it reads only internal keys. `BlockConstructor` + // clients provide user keys, so we need to convert to internal key format + // before writing the data block. + ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); + std::string encoded; + AppendInternalKey(&encoded, ikey); + builder.Add(encoded, kv.second); + } + // Open the block + data_ = builder.Finish().ToString(); + BlockContents contents; + contents.data = data_; + block_ = new Block(std::move(contents)); + return Status::OK(); + } + InternalIterator* NewIterator( + const SliceTransform* /*prefix_extractor*/) const override { + // `DataBlockIter` returns the internal keys it reads. + // `KeyConvertingIterator` converts them to user keys before they are + // exposed to the `BlockConstructor` clients. + return new KeyConvertingIterator( + block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber)); + } + + private: + const Comparator* comparator_; + std::string data_; + Block* block_; + + BlockConstructor(); +}; + +class TableConstructor : public Constructor { + public: + explicit TableConstructor(const Comparator* cmp, + bool convert_to_internal_key = false, + int level = -1, SequenceNumber largest_seqno = 0) + : Constructor(cmp), + largest_seqno_(largest_seqno), + convert_to_internal_key_(convert_to_internal_key), + level_(level) { + env_ = ROCKSDB_NAMESPACE::Env::Default(); + } + ~TableConstructor() override { Reset(); } + + Status FinishImpl(const Options& options, const ImmutableOptions& ioptions, + const MutableCFOptions& moptions, + const BlockBasedTableOptions& /*table_options*/, + const InternalKeyComparator& internal_comparator, + const stl_wrappers::KVMap& kv_map) override { + Reset(); + soptions.use_mmap_reads = ioptions.allow_mmap_reads; + std::unique_ptr<FSWritableFile> sink(new test::StringSink()); + file_writer_.reset(new WritableFileWriter( + std::move(sink), "" /* don't care */, FileOptions())); + std::unique_ptr<TableBuilder> builder; + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + + if (largest_seqno_ != 0) { + // Pretend that it's an external file written by SstFileWriter. + int_tbl_prop_collector_factories.emplace_back( + new SstFileWriterPropertiesCollectorFactory(2 /* version */, + 0 /* global_seqno*/)); + } + + std::string column_family_name; + builder.reset(ioptions.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, internal_comparator, + &int_tbl_prop_collector_factories, + options.compression, options.compression_opts, + kUnknownColumnFamily, column_family_name, level_), + file_writer_.get())); + + for (const auto& kv : kv_map) { + if (convert_to_internal_key_) { + ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); + std::string encoded; + AppendInternalKey(&encoded, ikey); + builder->Add(encoded, kv.second); + } else { + builder->Add(kv.first, kv.second); + } + EXPECT_OK(builder->status()); + } + Status s = builder->Finish(); + EXPECT_OK(file_writer_->Flush()); + EXPECT_TRUE(s.ok()) << s.ToString(); + + EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize()); + + // Open the table + file_num_ = cur_file_num_++; + + return Reopen(ioptions, moptions); + } + + InternalIterator* NewIterator( + const SliceTransform* prefix_extractor) const override { + InternalIterator* iter = table_reader_->NewIterator( + read_options_, prefix_extractor, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized); + if (convert_to_internal_key_) { + return new KeyConvertingIterator(iter); + } else { + return iter; + } + } + + uint64_t ApproximateOffsetOf(const Slice& key) const { + if (convert_to_internal_key_) { + InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); + const Slice skey = ikey.Encode(); + return table_reader_->ApproximateOffsetOf( + skey, TableReaderCaller::kUncategorized); + } + return table_reader_->ApproximateOffsetOf( + key, TableReaderCaller::kUncategorized); + } + + virtual Status Reopen(const ImmutableOptions& ioptions, + const MutableCFOptions& moptions) { + std::unique_ptr<FSRandomAccessFile> source(new test::StringSource( + TEST_GetSink()->contents(), file_num_, ioptions.allow_mmap_reads)); + + file_reader_.reset(new RandomAccessFileReader(std::move(source), "test")); + return ioptions.table_factory->NewTableReader( + TableReaderOptions(ioptions, moptions.prefix_extractor, soptions, + *last_internal_comparator_, /*skip_filters*/ false, + /*immortal*/ false, false, level_, + &block_cache_tracer_, moptions.write_buffer_size, "", + file_num_, kNullUniqueId64x2, largest_seqno_), + std::move(file_reader_), TEST_GetSink()->contents().size(), + &table_reader_); + } + + virtual TableReader* GetTableReader() { return table_reader_.get(); } + + bool AnywayDeleteIterator() const override { + return convert_to_internal_key_; + } + + void ResetTableReader() { table_reader_.reset(); } + + bool ConvertToInternalKey() { return convert_to_internal_key_; } + + test::StringSink* TEST_GetSink() { + return static_cast<test::StringSink*>(file_writer_->writable_file()); + } + + BlockCacheTracer block_cache_tracer_; + + private: + void Reset() { + file_num_ = 0; + table_reader_.reset(); + file_writer_.reset(); + file_reader_.reset(); + } + + const ReadOptions read_options_; + uint64_t file_num_; + std::unique_ptr<WritableFileWriter> file_writer_; + std::unique_ptr<RandomAccessFileReader> file_reader_; + std::unique_ptr<TableReader> table_reader_; + SequenceNumber largest_seqno_; + bool convert_to_internal_key_; + int level_; + + TableConstructor(); + + static uint64_t cur_file_num_; + EnvOptions soptions; + Env* env_; +}; +uint64_t TableConstructor::cur_file_num_ = 1; + +class MemTableConstructor : public Constructor { + public: + explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb) + : Constructor(cmp), + internal_comparator_(cmp), + write_buffer_manager_(wb), + table_factory_(new SkipListFactory) { + options_.memtable_factory = table_factory_; + ImmutableOptions ioptions(options_); + memtable_ = + new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_), + wb, kMaxSequenceNumber, 0 /* column_family_id */); + memtable_->Ref(); + } + ~MemTableConstructor() override { delete memtable_->Unref(); } + Status FinishImpl(const Options&, const ImmutableOptions& ioptions, + const MutableCFOptions& /*moptions*/, + const BlockBasedTableOptions& /*table_options*/, + const InternalKeyComparator& /*internal_comparator*/, + const stl_wrappers::KVMap& kv_map) override { + delete memtable_->Unref(); + ImmutableOptions mem_ioptions(ioptions); + memtable_ = new MemTable(internal_comparator_, mem_ioptions, + MutableCFOptions(options_), write_buffer_manager_, + kMaxSequenceNumber, 0 /* column_family_id */); + memtable_->Ref(); + int seq = 1; + for (const auto& kv : kv_map) { + Status s = memtable_->Add(seq, kTypeValue, kv.first, kv.second, + nullptr /* kv_prot_info */); + if (!s.ok()) { + return s; + } + seq++; + } + return Status::OK(); + } + InternalIterator* NewIterator( + const SliceTransform* /*prefix_extractor*/) const override { + return new KeyConvertingIterator( + memtable_->NewIterator(ReadOptions(), &arena_), true); + } + + bool AnywayDeleteIterator() const override { return true; } + + bool IsArenaMode() const override { return true; } + + private: + mutable Arena arena_; + InternalKeyComparator internal_comparator_; + Options options_; + WriteBufferManager* write_buffer_manager_; + MemTable* memtable_; + std::shared_ptr<SkipListFactory> table_factory_; +}; + +class InternalIteratorFromIterator : public InternalIterator { + public: + explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {} + bool Valid() const override { return it_->Valid(); } + void Seek(const Slice& target) override { it_->Seek(target); } + void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); } + void SeekToFirst() override { it_->SeekToFirst(); } + void SeekToLast() override { it_->SeekToLast(); } + void Next() override { it_->Next(); } + void Prev() override { it_->Prev(); } + Slice key() const override { return it_->key(); } + Slice value() const override { return it_->value(); } + Status status() const override { return it_->status(); } + + private: + std::unique_ptr<Iterator> it_; +}; + +class DBConstructor : public Constructor { + public: + explicit DBConstructor(const Comparator* cmp) + : Constructor(cmp), comparator_(cmp) { + db_ = nullptr; + NewDB(); + } + ~DBConstructor() override { delete db_; } + Status FinishImpl(const Options& /*options*/, + const ImmutableOptions& /*ioptions*/, + const MutableCFOptions& /*moptions*/, + const BlockBasedTableOptions& /*table_options*/, + const InternalKeyComparator& /*internal_comparator*/, + const stl_wrappers::KVMap& kv_map) override { + delete db_; + db_ = nullptr; + NewDB(); + for (const auto& kv : kv_map) { + WriteBatch batch; + EXPECT_OK(batch.Put(kv.first, kv.second)); + EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok()); + } + return Status::OK(); + } + + InternalIterator* NewIterator( + const SliceTransform* /*prefix_extractor*/) const override { + return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions())); + } + + DB* db() const override { return db_; } + + private: + void NewDB() { + std::string name = test::PerThreadDBPath("table_testdb"); + + Options options; + options.comparator = comparator_; + Status status = DestroyDB(name, options); + ASSERT_TRUE(status.ok()) << status.ToString(); + + options.create_if_missing = true; + options.error_if_exists = true; + options.write_buffer_size = 10000; // Something small to force merging + status = DB::Open(options, name, &db_); + ASSERT_TRUE(status.ok()) << status.ToString(); + } + + const Comparator* comparator_; + DB* db_; +}; + +enum TestType { + BLOCK_BASED_TABLE_TEST, +#ifndef ROCKSDB_LITE + PLAIN_TABLE_SEMI_FIXED_PREFIX, + PLAIN_TABLE_FULL_STR_PREFIX, + PLAIN_TABLE_TOTAL_ORDER, +#endif // !ROCKSDB_LITE + BLOCK_TEST, + MEMTABLE_TEST, + DB_TEST +}; + +struct TestArgs { + TestType type; + bool reverse_compare; + int restart_interval; + CompressionType compression; + uint32_t compression_parallel_threads; + uint32_t format_version; + bool use_mmap; +}; + +std::ostream& operator<<(std::ostream& os, const TestArgs& args) { + os << "type: " << args.type << " reverse_compare: " << args.reverse_compare + << " restart_interval: " << args.restart_interval + << " compression: " << args.compression + << " compression_parallel_threads: " << args.compression_parallel_threads + << " format_version: " << args.format_version + << " use_mmap: " << args.use_mmap; + + return os; +} + +static std::vector<TestArgs> GenerateArgList() { + std::vector<TestArgs> test_args; + std::vector<TestType> test_types = {BLOCK_BASED_TABLE_TEST, +#ifndef ROCKSDB_LITE + PLAIN_TABLE_SEMI_FIXED_PREFIX, + PLAIN_TABLE_FULL_STR_PREFIX, + PLAIN_TABLE_TOTAL_ORDER, +#endif // !ROCKSDB_LITE + BLOCK_TEST, + MEMTABLE_TEST, + DB_TEST}; + std::vector<bool> reverse_compare_types = {false, true}; + std::vector<int> restart_intervals = {16, 1, 1024}; + std::vector<uint32_t> compression_parallel_threads = {1, 4}; + + // Only add compression if it is supported + std::vector<std::pair<CompressionType, bool>> compression_types; + compression_types.emplace_back(kNoCompression, false); + if (Snappy_Supported()) { + compression_types.emplace_back(kSnappyCompression, false); + } + if (Zlib_Supported()) { + compression_types.emplace_back(kZlibCompression, false); + compression_types.emplace_back(kZlibCompression, true); + } + if (BZip2_Supported()) { + compression_types.emplace_back(kBZip2Compression, false); + compression_types.emplace_back(kBZip2Compression, true); + } + if (LZ4_Supported()) { + compression_types.emplace_back(kLZ4Compression, false); + compression_types.emplace_back(kLZ4Compression, true); + compression_types.emplace_back(kLZ4HCCompression, false); + compression_types.emplace_back(kLZ4HCCompression, true); + } + if (XPRESS_Supported()) { + compression_types.emplace_back(kXpressCompression, false); + compression_types.emplace_back(kXpressCompression, true); + } + if (ZSTD_Supported()) { + compression_types.emplace_back(kZSTD, false); + compression_types.emplace_back(kZSTD, true); + } + + for (auto test_type : test_types) { + for (auto reverse_compare : reverse_compare_types) { +#ifndef ROCKSDB_LITE + if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX || + test_type == PLAIN_TABLE_FULL_STR_PREFIX || + test_type == PLAIN_TABLE_TOTAL_ORDER) { + // Plain table doesn't use restart index or compression. + TestArgs one_arg; + one_arg.type = test_type; + one_arg.reverse_compare = reverse_compare; + one_arg.restart_interval = restart_intervals[0]; + one_arg.compression = compression_types[0].first; + one_arg.compression_parallel_threads = 1; + one_arg.format_version = 0; + one_arg.use_mmap = true; + test_args.push_back(one_arg); + one_arg.use_mmap = false; + test_args.push_back(one_arg); + continue; + } +#endif // !ROCKSDB_LITE + + for (auto restart_interval : restart_intervals) { + for (auto compression_type : compression_types) { + for (auto num_threads : compression_parallel_threads) { + TestArgs one_arg; + one_arg.type = test_type; + one_arg.reverse_compare = reverse_compare; + one_arg.restart_interval = restart_interval; + one_arg.compression = compression_type.first; + one_arg.compression_parallel_threads = num_threads; + one_arg.format_version = compression_type.second ? 2 : 1; + one_arg.use_mmap = false; + test_args.push_back(one_arg); + } + } + } + } + } + return test_args; +} + +// In order to make all tests run for plain table format, including +// those operating on empty keys, create a new prefix transformer which +// return fixed prefix if the slice is not shorter than the prefix length, +// and the full slice if it is shorter. +class FixedOrLessPrefixTransform : public SliceTransform { + private: + const size_t prefix_len_; + + public: + explicit FixedOrLessPrefixTransform(size_t prefix_len) + : prefix_len_(prefix_len) {} + + const char* Name() const override { return "rocksdb.FixedPrefix"; } + + Slice Transform(const Slice& src) const override { + assert(InDomain(src)); + if (src.size() < prefix_len_) { + return src; + } + return Slice(src.data(), prefix_len_); + } + + bool InDomain(const Slice& /*src*/) const override { return true; } + + bool InRange(const Slice& dst) const override { + return (dst.size() <= prefix_len_); + } + bool FullLengthEnabled(size_t* /*len*/) const override { return false; } +}; + +class HarnessTest : public testing::Test { + public: + explicit HarnessTest(const TestArgs& args) + : args_(args), + ioptions_(options_), + moptions_(options_), + write_buffer_(options_.db_write_buffer_size), + support_prev_(true), + only_support_prefix_seek_(false) { + options_.compression = args_.compression; + options_.compression_opts.parallel_threads = + args_.compression_parallel_threads; + // Use shorter block size for tests to exercise block boundary + // conditions more. + if (args_.reverse_compare) { + options_.comparator = &reverse_key_comparator; + } + + internal_comparator_.reset( + new test::PlainInternalKeyComparator(options_.comparator)); + + options_.allow_mmap_reads = args_.use_mmap; + switch (args_.type) { + case BLOCK_BASED_TABLE_TEST: + table_options_.flush_block_policy_factory.reset( + new FlushBlockBySizePolicyFactory()); + table_options_.block_size = 256; + table_options_.block_restart_interval = args_.restart_interval; + table_options_.index_block_restart_interval = args_.restart_interval; + table_options_.format_version = args_.format_version; + options_.table_factory.reset( + new BlockBasedTableFactory(table_options_)); + constructor_.reset(new TableConstructor( + options_.comparator, true /* convert_to_internal_key_ */)); + internal_comparator_.reset( + new InternalKeyComparator(options_.comparator)); + break; +// Plain table is not supported in ROCKSDB_LITE +#ifndef ROCKSDB_LITE + case PLAIN_TABLE_SEMI_FIXED_PREFIX: + support_prev_ = false; + only_support_prefix_seek_ = true; + options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2)); + options_.table_factory.reset(NewPlainTableFactory()); + constructor_.reset(new TableConstructor( + options_.comparator, true /* convert_to_internal_key_ */)); + internal_comparator_.reset( + new InternalKeyComparator(options_.comparator)); + break; + case PLAIN_TABLE_FULL_STR_PREFIX: + support_prev_ = false; + only_support_prefix_seek_ = true; + options_.prefix_extractor.reset(NewNoopTransform()); + options_.table_factory.reset(NewPlainTableFactory()); + constructor_.reset(new TableConstructor( + options_.comparator, true /* convert_to_internal_key_ */)); + internal_comparator_.reset( + new InternalKeyComparator(options_.comparator)); + break; + case PLAIN_TABLE_TOTAL_ORDER: + support_prev_ = false; + only_support_prefix_seek_ = false; + options_.prefix_extractor = nullptr; + + { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = kPlainTableVariableLength; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + + options_.table_factory.reset( + NewPlainTableFactory(plain_table_options)); + } + constructor_.reset(new TableConstructor( + options_.comparator, true /* convert_to_internal_key_ */)); + internal_comparator_.reset( + new InternalKeyComparator(options_.comparator)); + break; +#endif // !ROCKSDB_LITE + case BLOCK_TEST: + table_options_.block_size = 256; + options_.table_factory.reset( + new BlockBasedTableFactory(table_options_)); + constructor_.reset(new BlockConstructor(options_.comparator)); + break; + case MEMTABLE_TEST: + table_options_.block_size = 256; + options_.table_factory.reset( + new BlockBasedTableFactory(table_options_)); + constructor_.reset( + new MemTableConstructor(options_.comparator, &write_buffer_)); + break; + case DB_TEST: + table_options_.block_size = 256; + options_.table_factory.reset( + new BlockBasedTableFactory(table_options_)); + constructor_.reset(new DBConstructor(options_.comparator)); + break; + } + ioptions_ = ImmutableOptions(options_); + moptions_ = MutableCFOptions(options_); + } + + void Add(const std::string& key, const std::string& value) { + constructor_->Add(key, value); + } + + void Test(Random* rnd) { + std::vector<std::string> keys; + stl_wrappers::KVMap data; + constructor_->Finish(options_, ioptions_, moptions_, table_options_, + *internal_comparator_, &keys, &data); + + TestForwardScan(keys, data); + if (support_prev_) { + TestBackwardScan(keys, data); + } + TestRandomAccess(rnd, keys, data); + } + + void TestForwardScan(const std::vector<std::string>& /*keys*/, + const stl_wrappers::KVMap& data) { + InternalIterator* iter = constructor_->NewIterator(); + ASSERT_TRUE(!iter->Valid()); + iter->SeekToFirst(); + ASSERT_OK(iter->status()); + for (stl_wrappers::KVMap::const_iterator model_iter = data.begin(); + model_iter != data.end(); ++model_iter) { + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + iter->Next(); + ASSERT_OK(iter->status()); + } + ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); + if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { + iter->~InternalIterator(); + } else { + delete iter; + } + } + + void TestBackwardScan(const std::vector<std::string>& /*keys*/, + const stl_wrappers::KVMap& data) { + InternalIterator* iter = constructor_->NewIterator(); + ASSERT_TRUE(!iter->Valid()); + iter->SeekToLast(); + ASSERT_OK(iter->status()); + for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin(); + model_iter != data.rend(); ++model_iter) { + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + iter->Prev(); + ASSERT_OK(iter->status()); + } + ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); + if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { + iter->~InternalIterator(); + } else { + delete iter; + } + } + + void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys, + const stl_wrappers::KVMap& data) { + static const bool kVerbose = false; + InternalIterator* iter = constructor_->NewIterator(); + ASSERT_TRUE(!iter->Valid()); + stl_wrappers::KVMap::const_iterator model_iter = data.begin(); + if (kVerbose) fprintf(stderr, "---\n"); + for (int i = 0; i < 200; i++) { + const int toss = rnd->Uniform(support_prev_ ? 5 : 3); + switch (toss) { + case 0: { + if (iter->Valid()) { + if (kVerbose) fprintf(stderr, "Next\n"); + iter->Next(); + ASSERT_OK(iter->status()); + ++model_iter; + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + } + break; + } + + case 1: { + if (kVerbose) fprintf(stderr, "SeekToFirst\n"); + iter->SeekToFirst(); + ASSERT_OK(iter->status()); + model_iter = data.begin(); + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + break; + } + + case 2: { + std::string key = PickRandomKey(rnd, keys); + model_iter = data.lower_bound(key); + if (kVerbose) + fprintf(stderr, "Seek '%s'\n", EscapeString(key).c_str()); + iter->Seek(Slice(key)); + ASSERT_OK(iter->status()); + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + break; + } + + case 3: { + if (iter->Valid()) { + if (kVerbose) fprintf(stderr, "Prev\n"); + iter->Prev(); + ASSERT_OK(iter->status()); + if (model_iter == data.begin()) { + model_iter = data.end(); // Wrap around to invalid value + } else { + --model_iter; + } + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + } + break; + } + + case 4: { + if (kVerbose) fprintf(stderr, "SeekToLast\n"); + iter->SeekToLast(); + ASSERT_OK(iter->status()); + if (keys.empty()) { + model_iter = data.end(); + } else { + std::string last = data.rbegin()->first; + model_iter = data.lower_bound(last); + } + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + break; + } + } + } + if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { + iter->~InternalIterator(); + } else { + delete iter; + } + } + + std::string ToString(const stl_wrappers::KVMap& data, + const stl_wrappers::KVMap::const_iterator& it) { + if (it == data.end()) { + return "END"; + } else { + return "'" + it->first + "->" + it->second + "'"; + } + } + + std::string ToString(const stl_wrappers::KVMap& data, + const stl_wrappers::KVMap::const_reverse_iterator& it) { + if (it == data.rend()) { + return "END"; + } else { + return "'" + it->first + "->" + it->second + "'"; + } + } + + std::string ToString(const InternalIterator* it) { + if (!it->Valid()) { + return "END"; + } else { + return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; + } + } + + std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) { + if (keys.empty()) { + return "foo"; + } else { + const int index = rnd->Uniform(static_cast<int>(keys.size())); + std::string result = keys[index]; + switch (rnd->Uniform(support_prev_ ? 3 : 1)) { + case 0: + // Return an existing key + break; + case 1: { + // Attempt to return something smaller than an existing key + if (result.size() > 0 && result[result.size() - 1] > '\0' && + (!only_support_prefix_seek_ || + options_.prefix_extractor->Transform(result).size() < + result.size())) { + result[result.size() - 1]--; + } + break; + } + case 2: { + // Return something larger than an existing key + Increment(options_.comparator, &result); + break; + } + } + return result; + } + } + + // Returns nullptr if not running against a DB + DB* db() const { return constructor_->db(); } + + private: + TestArgs args_; + Options options_; + ImmutableOptions ioptions_; + MutableCFOptions moptions_; + BlockBasedTableOptions table_options_; + std::unique_ptr<Constructor> constructor_; + WriteBufferManager write_buffer_; + bool support_prev_; + bool only_support_prefix_seek_; + std::shared_ptr<InternalKeyComparator> internal_comparator_; +}; + +class ParameterizedHarnessTest : public HarnessTest, + public testing::WithParamInterface<TestArgs> { + public: + ParameterizedHarnessTest() : HarnessTest(GetParam()) {} +}; + +INSTANTIATE_TEST_CASE_P(TableTest, ParameterizedHarnessTest, + ::testing::ValuesIn(GenerateArgList())); + +class DBHarnessTest : public HarnessTest { + public: + DBHarnessTest() + : HarnessTest(TestArgs{DB_TEST, /* reverse_compare */ false, + /* restart_interval */ 16, kNoCompression, + /* compression_parallel_threads */ 1, + /* format_version */ 0, /* use_mmap */ false}) {} +}; + +static bool Between(uint64_t val, uint64_t low, uint64_t high) { + bool result = (val >= low) && (val <= high); + if (!result) { + fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", + (unsigned long long)(val), (unsigned long long)(low), + (unsigned long long)(high)); + } + return result; +} + +// Tests against all kinds of tables +class TableTest : public testing::Test { + public: + const InternalKeyComparator& GetPlainInternalComparator( + const Comparator* comp) { + if (!plain_internal_comparator) { + plain_internal_comparator.reset( + new test::PlainInternalKeyComparator(comp)); + } + return *plain_internal_comparator; + } + void IndexTest(BlockBasedTableOptions table_options); + + private: + std::unique_ptr<InternalKeyComparator> plain_internal_comparator; +}; + +class GeneralTableTest : public TableTest {}; +class BlockBasedTableTestBase : public TableTest {}; +class BlockBasedTableTest + : public BlockBasedTableTestBase, + virtual public ::testing::WithParamInterface<uint32_t> { + public: + BlockBasedTableTest() : format_(GetParam()) { + env_ = ROCKSDB_NAMESPACE::Env::Default(); + } + + BlockBasedTableOptions GetBlockBasedTableOptions() { + BlockBasedTableOptions options; + options.format_version = format_; + return options; + } + + void SetupTracingTest(TableConstructor* c) { + test_path_ = test::PerThreadDBPath("block_based_table_tracing_test"); + EXPECT_OK(env_->CreateDir(test_path_)); + trace_file_path_ = test_path_ + "/block_cache_trace_file"; + + BlockCacheTraceWriterOptions trace_writer_opt; + BlockCacheTraceOptions trace_opt; + std::unique_ptr<TraceWriter> trace_writer; + EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_, + &trace_writer)); + std::unique_ptr<BlockCacheTraceWriter> block_cache_trace_writer = + NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, + std::move(trace_writer)); + ASSERT_NE(block_cache_trace_writer, nullptr); + // Always return Status::OK(). + assert(c->block_cache_tracer_ + .StartTrace(trace_opt, std::move(block_cache_trace_writer)) + .ok()); + + { + std::string user_key = "k01"; + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + c->Add(encoded_key, kDummyValue); + } + { + std::string user_key = "k02"; + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + c->Add(encoded_key, kDummyValue); + } + } + + void VerifyBlockAccessTrace( + TableConstructor* c, + const std::vector<BlockCacheTraceRecord>& expected_records) { + c->block_cache_tracer_.EndTrace(); + + { + std::unique_ptr<TraceReader> trace_reader; + Status s = NewFileTraceReader(env_, EnvOptions(), trace_file_path_, + &trace_reader); + EXPECT_OK(s); + BlockCacheTraceReader reader(std::move(trace_reader)); + BlockCacheTraceHeader header; + EXPECT_OK(reader.ReadHeader(&header)); + uint32_t index = 0; + while (s.ok()) { + BlockCacheTraceRecord access; + s = reader.ReadAccess(&access); + if (!s.ok()) { + break; + } + ASSERT_LT(index, expected_records.size()); + EXPECT_NE("", access.block_key); + EXPECT_EQ(access.block_type, expected_records[index].block_type); + EXPECT_GT(access.block_size, 0); + EXPECT_EQ(access.caller, expected_records[index].caller); + EXPECT_EQ(access.no_insert, expected_records[index].no_insert); + EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit); + // Get + if (access.caller == TableReaderCaller::kUserGet) { + EXPECT_EQ(access.referenced_key, + expected_records[index].referenced_key); + EXPECT_EQ(access.get_id, expected_records[index].get_id); + EXPECT_EQ(access.get_from_user_specified_snapshot, + expected_records[index].get_from_user_specified_snapshot); + if (access.block_type == TraceType::kBlockTraceDataBlock) { + EXPECT_GT(access.referenced_data_size, 0); + EXPECT_GT(access.num_keys_in_block, 0); + EXPECT_EQ(access.referenced_key_exist_in_block, + expected_records[index].referenced_key_exist_in_block); + } + } else { + EXPECT_EQ(access.referenced_key, ""); + EXPECT_EQ(access.get_id, 0); + EXPECT_FALSE(access.get_from_user_specified_snapshot); + EXPECT_EQ(access.referenced_data_size, 0); + EXPECT_EQ(access.num_keys_in_block, 0); + EXPECT_FALSE(access.referenced_key_exist_in_block); + } + index++; + } + EXPECT_EQ(index, expected_records.size()); + } + EXPECT_OK(env_->DeleteFile(trace_file_path_)); + EXPECT_OK(env_->DeleteDir(test_path_)); + } + + protected: + uint64_t IndexUncompressedHelper(bool indexCompress); + + private: + uint32_t format_; + Env* env_; + std::string trace_file_path_; + std::string test_path_; +}; +class PlainTableTest : public TableTest {}; +class TablePropertyTest : public testing::Test {}; +class BBTTailPrefetchTest : public TableTest {}; + +// The helper class to test the file checksum +class FileChecksumTestHelper { + public: + FileChecksumTestHelper(bool convert_to_internal_key = false) + : convert_to_internal_key_(convert_to_internal_key) {} + ~FileChecksumTestHelper() {} + + void CreateWritableFile() { + sink_ = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink_); + file_writer_.reset(new WritableFileWriter( + std::move(holder), "" /* don't care */, FileOptions())); + } + + void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) { + if (file_writer_ != nullptr) { + file_writer_->TEST_SetFileChecksumGenerator(checksum_generator); + } else { + delete checksum_generator; + } + } + + WritableFileWriter* GetFileWriter() { return file_writer_.get(); } + + Status ResetTableBuilder(std::unique_ptr<TableBuilder>&& builder) { + assert(builder != nullptr); + table_builder_ = std::move(builder); + return Status::OK(); + } + + void AddKVtoKVMap(int num_entries) { + Random rnd(test::RandomSeed()); + for (int i = 0; i < num_entries; i++) { + std::string v = rnd.RandomString(100); + kv_map_[test::RandomKey(&rnd, 20)] = v; + } + } + + Status WriteKVAndFlushTable() { + for (const auto& kv : kv_map_) { + if (convert_to_internal_key_) { + ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); + std::string encoded; + AppendInternalKey(&encoded, ikey); + table_builder_->Add(encoded, kv.second); + } else { + table_builder_->Add(kv.first, kv.second); + } + EXPECT_TRUE(table_builder_->status().ok()); + } + Status s = table_builder_->Finish(); + EXPECT_OK(file_writer_->Flush()); + EXPECT_OK(s); + + EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize()); + return s; + } + + std::string GetFileChecksum() { + EXPECT_OK(file_writer_->Close()); + return table_builder_->GetFileChecksum(); + } + + const char* GetFileChecksumFuncName() { + return table_builder_->GetFileChecksumFuncName(); + } + + Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator, + std::string* checksum) { + assert(file_checksum_generator != nullptr); + cur_file_num_ = checksum_file_num_++; + test::StringSink* ss_rw = + static_cast<test::StringSink*>(file_writer_->writable_file()); + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(ss_rw->contents())); + file_reader_.reset(new RandomAccessFileReader(std::move(source), "test")); + + std::unique_ptr<char[]> scratch(new char[2048]); + Slice result; + uint64_t offset = 0; + Status s; + s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), + nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + if (!s.ok()) { + return s; + } + while (result.size() != 0) { + file_checksum_generator->Update(scratch.get(), result.size()); + offset += static_cast<uint64_t>(result.size()); + s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), + nullptr, + Env::IO_TOTAL /* rate_limiter_priority */); + if (!s.ok()) { + return s; + } + } + EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize())); + file_checksum_generator->Finalize(); + *checksum = file_checksum_generator->GetChecksum(); + return Status::OK(); + } + + private: + bool convert_to_internal_key_; + uint64_t cur_file_num_; + std::unique_ptr<WritableFileWriter> file_writer_; + std::unique_ptr<RandomAccessFileReader> file_reader_; + std::unique_ptr<TableBuilder> table_builder_; + stl_wrappers::KVMap kv_map_; + test::StringSink* sink_ = nullptr; + + static uint64_t checksum_file_num_; +}; + +uint64_t FileChecksumTestHelper::checksum_file_num_ = 1; + +INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest, + testing::ValuesIn(test::kFooterFormatVersionsToTest)); + +// This test serves as the living tutorial for the prefix scan of user collected +// properties. +TEST_F(TablePropertyTest, PrefixScanTest) { + UserCollectedProperties props{ + {"num.111.1", "1"}, {"num.111.2", "2"}, {"num.111.3", "3"}, + {"num.333.1", "1"}, {"num.333.2", "2"}, {"num.333.3", "3"}, + {"num.555.1", "1"}, {"num.555.2", "2"}, {"num.555.3", "3"}, + }; + + // prefixes that exist + for (const std::string prefix : {"num.111", "num.333", "num.555"}) { + int num = 0; + for (auto pos = props.lower_bound(prefix); + pos != props.end() && + pos->first.compare(0, prefix.size(), prefix) == 0; + ++pos) { + ++num; + auto key = prefix + "." + std::to_string(num); + ASSERT_EQ(key, pos->first); + ASSERT_EQ(std::to_string(num), pos->second); + } + ASSERT_EQ(3, num); + } + + // prefixes that don't exist + for (const std::string prefix : + {"num.000", "num.222", "num.444", "num.666"}) { + auto pos = props.lower_bound(prefix); + ASSERT_TRUE(pos == props.end() || + pos->first.compare(0, prefix.size(), prefix) != 0); + } +} + +namespace { +struct TestIds { + UniqueId64x3 internal_id; + UniqueId64x3 external_id; +}; + +inline bool operator==(const TestIds& lhs, const TestIds& rhs) { + return lhs.internal_id == rhs.internal_id && + lhs.external_id == rhs.external_id; +} + +std::ostream& operator<<(std::ostream& os, const TestIds& ids) { + return os << std::hex << "{{{ 0x" << ids.internal_id[0] << "U, 0x" + << ids.internal_id[1] << "U, 0x" << ids.internal_id[2] + << "U }}, {{ 0x" << ids.external_id[0] << "U, 0x" + << ids.external_id[1] << "U, 0x" << ids.external_id[2] << "U }}}"; +} + +TestIds GetUniqueId(TableProperties* tp, std::unordered_set<uint64_t>* seen, + const std::string& db_id, const std::string& db_session_id, + uint64_t file_number) { + // First test session id logic + if (db_session_id.size() == 20) { + uint64_t upper; + uint64_t lower; + EXPECT_OK(DecodeSessionId(db_session_id, &upper, &lower)); + EXPECT_EQ(EncodeSessionId(upper, lower), db_session_id); + } + + // Get external using public API + tp->db_id = db_id; + tp->db_session_id = db_session_id; + tp->orig_file_number = file_number; + TestIds t; + { + std::string euid; + EXPECT_OK(GetExtendedUniqueIdFromTableProperties(*tp, &euid)); + EXPECT_EQ(euid.size(), 24U); + t.external_id[0] = DecodeFixed64(&euid[0]); + t.external_id[1] = DecodeFixed64(&euid[8]); + t.external_id[2] = DecodeFixed64(&euid[16]); + + std::string uid; + EXPECT_OK(GetUniqueIdFromTableProperties(*tp, &uid)); + EXPECT_EQ(uid.size(), 16U); + EXPECT_EQ(uid, euid.substr(0, 16)); + EXPECT_EQ(t.external_id[0], DecodeFixed64(&uid[0])); + EXPECT_EQ(t.external_id[1], DecodeFixed64(&uid[8])); + } + // All these should be effectively random + EXPECT_TRUE(seen->insert(t.external_id[0]).second); + EXPECT_TRUE(seen->insert(t.external_id[1]).second); + EXPECT_TRUE(seen->insert(t.external_id[2]).second); + + // Get internal with internal API + EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, + &t.internal_id)); + EXPECT_NE(t.internal_id, kNullUniqueId64x3); + + // Verify relationship + UniqueId64x3 tmp = t.internal_id; + InternalUniqueIdToExternal(&tmp); + EXPECT_EQ(tmp, t.external_id); + ExternalUniqueIdToInternal(&tmp); + EXPECT_EQ(tmp, t.internal_id); + + // And 128-bit internal version + UniqueId64x2 tmp2{}; + EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, &tmp2)); + EXPECT_NE(tmp2, kNullUniqueId64x2); + + EXPECT_EQ(tmp2[0], t.internal_id[0]); + EXPECT_EQ(tmp2[1], t.internal_id[1]); + InternalUniqueIdToExternal(&tmp2); + EXPECT_EQ(tmp2[0], t.external_id[0]); + EXPECT_EQ(tmp2[1], t.external_id[1]); + ExternalUniqueIdToInternal(&tmp2); + EXPECT_EQ(tmp2[0], t.internal_id[0]); + EXPECT_EQ(tmp2[1], t.internal_id[1]); + + return t; +} +} // namespace + +TEST_F(TablePropertyTest, UniqueIdsSchemaAndQuality) { + // To ensure the computation only depends on the expected entries, we set + // the rest randomly + TableProperties tp; + TEST_SetRandomTableProperties(&tp); + + // DB id is normally RFC-4122 + const std::string db_id1 = "7265b6eb-4e42-4aec-86a4-0dc5e73a228d"; + // Allow other forms of DB id + const std::string db_id2 = "1728000184588763620"; + const std::string db_id3 = "x"; + + // DB session id is normally 20 chars in base-36, but 13 to 24 chars + // is ok, roughly 64 to 128 bits. + const std::string ses_id1 = "ABCDEFGHIJ0123456789"; + // Same trailing 13 digits + const std::string ses_id2 = "HIJ0123456789"; + const std::string ses_id3 = "0123ABCDEFGHIJ0123456789"; + // Different trailing 12 digits + const std::string ses_id4 = "ABCDEFGH888888888888"; + // And change length + const std::string ses_id5 = "ABCDEFGHIJ012"; + const std::string ses_id6 = "ABCDEFGHIJ0123456789ABCD"; + + using T = TestIds; + std::unordered_set<uint64_t> seen; + // Establish a stable schema for the unique IDs. These values must not + // change for existing table files. + // (Note: parens needed for macro parsing, extra braces needed for some + // compilers.) + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id1, 1), + T({{{0x61d7dcf415d9cf19U, 0x160d77aae90757fdU, 0x907f41dfd90724ffU}}, + {{0xf0bd230365df7464U, 0xca089303f3648eb4U, 0x4b44f7e7324b2817U}}})); + // Only change internal_id[1] with file number + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id1, 2), + T({{{0x61d7dcf415d9cf19U, 0x160d77aae90757feU, 0x907f41dfd90724ffU}}, + {{0xf13fdf7adcfebb6dU, 0x97cd2226cc033ea2U, 0x198c438182091f0eU}}})); + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id1, 123456789), + T({{{0x61d7dcf415d9cf19U, 0x160d77aaee5c9ae9U, 0x907f41dfd90724ffU}}, + {{0x81fbcebe1ac6c4f0U, 0x6b14a64cfdc0f1c4U, 0x7d8fb6eaf18edbb3U}}})); + // Change internal_id[1] and internal_id[2] with db_id + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id2, ses_id1, 1), + T({{{0x61d7dcf415d9cf19U, 0xf89c471f572f0d25U, 0x1f0f2a5eb0e6257eU}}, + {{0x7f1d01d453616991U, 0x32ddf2afec804ab2U, 0xd10a1ee2f0c7d9c1U}}})); + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id3, ses_id1, 1), + T({{{0x61d7dcf415d9cf19U, 0xfed297a8154a57d0U, 0x8b931b9cdebd9e8U}}, + {{0x62b2f43183f6894bU, 0x897ff2b460eefad1U, 0xf4ec189fb2d15e04U}}})); + // Keeping same last 13 digits of ses_id keeps same internal_id[0] + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id2, 1), + T({{{0x61d7dcf415d9cf19U, 0x5f6cc4fa2d528c8U, 0x7b70845d5bfb5446U}}, + {{0x96d1c83ffcc94266U, 0x82663eac0ec6e14aU, 0x94a88b49678b77f6U}}})); + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id3, 1), + T({{{0x61d7dcf415d9cf19U, 0xfc7232879db37ea2U, 0xc0378d74ea4c89cdU}}, + {{0xdf2ef57e98776905U, 0xda5b31c987da833bU, 0x79c1b4bd0a9e760dU}}})); + // Changing last 12 digits of ses_id only changes internal_id[0] + // (vs. db_id1, ses_id1, 1) + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id4, 1), + T({{{0x4f07cc0d003a83a8U, 0x160d77aae90757fdU, 0x907f41dfd90724ffU}}, + {{0xbcf85336a9f71f04U, 0x4f2949e2f3adb60dU, 0x9ca0def976abfa10U}}})); + // ses_id can change everything. + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id5, 1), + T({{{0x94b8768e43f87ce6U, 0xc2559653ac4e7c93U, 0xde6dff6bbb1223U}}, + {{0x5a9537af681817fbU, 0x1afcd1fecaead5eaU, 0x767077ad9ebe0008U}}})); + EXPECT_EQ( + GetUniqueId(&tp, &seen, db_id1, ses_id6, 1), + T({{{0x43cfb0ffa3b710edU, 0x263c580426406a1bU, 0xfacc91379a80d29dU}}, + {{0xfa90547d84cb1cdbU, 0x2afe99c641992d4aU, 0x205b7f7b60e51cc2U}}})); + + // Now verify more thoroughly that any small change in inputs completely + // changes external unique id. + // (Relying on 'seen' checks etc. in GetUniqueId) + std::string db_id = "00000000-0000-0000-0000-000000000000"; + std::string ses_id = "000000000000000000000000"; + uint64_t file_num = 1; + // change db_id + for (size_t i = 0; i < db_id.size(); ++i) { + if (db_id[i] == '-') { + continue; + } + for (char alt : std::string("123456789abcdef")) { + db_id[i] = alt; + GetUniqueId(&tp, &seen, db_id, ses_id, file_num); + } + db_id[i] = '0'; + } + // change ses_id + for (size_t i = 0; i < ses_id.size(); ++i) { + for (char alt : std::string("123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")) { + ses_id[i] = alt; + GetUniqueId(&tp, &seen, db_id, ses_id, file_num); + } + ses_id[i] = '0'; + } + // change file_num + for (int i = 1; i < 64; ++i) { + GetUniqueId(&tp, &seen, db_id, ses_id, file_num << i); + } + + // Verify that "all zeros" in first 128 bits is equivalent for internal and + // external IDs. This way, as long as we avoid "all zeros" in internal IDs, + // we avoid it in external IDs. + { + UniqueId64x3 id1{{0, 0, Random::GetTLSInstance()->Next64()}}; + UniqueId64x3 id2 = id1; + InternalUniqueIdToExternal(&id1); + EXPECT_EQ(id1, id2); + ExternalUniqueIdToInternal(&id2); + EXPECT_EQ(id1, id2); + } +} + +namespace { +void SetGoodTableProperties(TableProperties* tp) { + // To ensure the computation only depends on the expected entries, we set + // the rest randomly + TEST_SetRandomTableProperties(tp); + tp->db_id = "7265b6eb-4e42-4aec-86a4-0dc5e73a228d"; + tp->db_session_id = "ABCDEFGHIJ0123456789"; + tp->orig_file_number = 1; +} +} // namespace + +TEST_F(TablePropertyTest, UniqueIdHumanStrings) { + TableProperties tp; + SetGoodTableProperties(&tp); + + std::string tmp; + EXPECT_OK(GetExtendedUniqueIdFromTableProperties(tp, &tmp)); + EXPECT_EQ(tmp, + (std::string{{'\x64', '\x74', '\xdf', '\x65', '\x03', '\x23', + '\xbd', '\xf0', '\xb4', '\x8e', '\x64', '\xf3', + '\x03', '\x93', '\x08', '\xca', '\x17', '\x28', + '\x4b', '\x32', '\xe7', '\xf7', '\x44', '\x4b'}})); + EXPECT_EQ(UniqueIdToHumanString(tmp), + "6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B"); + + EXPECT_OK(GetUniqueIdFromTableProperties(tp, &tmp)); + EXPECT_EQ(UniqueIdToHumanString(tmp), "6474DF650323BDF0-B48E64F3039308CA"); + + // including zero padding + tmp = std::string(24U, '\0'); + tmp[15] = '\x12'; + tmp[23] = '\xAB'; + EXPECT_EQ(UniqueIdToHumanString(tmp), + "0000000000000000-0000000000000012-00000000000000AB"); + + // And shortened + tmp = std::string(20U, '\0'); + tmp[5] = '\x12'; + tmp[10] = '\xAB'; + tmp[17] = '\xEF'; + EXPECT_EQ(UniqueIdToHumanString(tmp), + "0000000000120000-0000AB0000000000-00EF0000"); + + tmp.resize(16); + EXPECT_EQ(UniqueIdToHumanString(tmp), "0000000000120000-0000AB0000000000"); + + tmp.resize(11); + EXPECT_EQ(UniqueIdToHumanString(tmp), "0000000000120000-0000AB"); + + tmp.resize(6); + EXPECT_EQ(UniqueIdToHumanString(tmp), "000000000012"); + + // Also internal IDs to human string + UniqueId64x3 euid = {12345, 678, 9}; + EXPECT_EQ(InternalUniqueIdToHumanString(&euid), "{12345,678,9}"); + + UniqueId64x2 uid = {1234, 567890}; + EXPECT_EQ(InternalUniqueIdToHumanString(&uid), "{1234,567890}"); +} + +TEST_F(TablePropertyTest, UniqueIdsFailure) { + TableProperties tp; + std::string tmp; + + // Missing DB id + SetGoodTableProperties(&tp); + tp.db_id = ""; + EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + + // Missing session id + SetGoodTableProperties(&tp); + tp.db_session_id = ""; + EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + + // Missing file number + SetGoodTableProperties(&tp); + tp.orig_file_number = 0; + EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); +} + +// This test include all the basic checks except those for index size and block +// size, which will be conducted in separated unit tests. +TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + + c.Add("a1", "val1"); + c.Add("b2", "val2"); + c.Add("c3", "val3"); + c.Add("d4", "val4"); + c.Add("e5", "val5"); + c.Add("f6", "val6"); + c.Add("g7", "val7"); + c.Add("h8", "val8"); + c.Add("j9", "val9"); + uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + options.compression = kNoCompression; + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_restart_interval = 1; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0); + + auto& props = *c.GetTableReader()->GetTableProperties(); + ASSERT_EQ(kvmap.size(), props.num_entries); + + auto raw_key_size = kvmap.size() * 2ul; + auto raw_value_size = kvmap.size() * 4ul; + + ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size); + ASSERT_EQ(raw_value_size, props.raw_value_size); + ASSERT_EQ(1ul, props.num_data_blocks); + ASSERT_EQ("", props.filter_policy_name); // no filter policy is used + + // Verify data size. + BlockBuilder block_builder(1); + for (const auto& item : kvmap) { + block_builder.Add(item.first, item.second); + } + Slice content = block_builder.Finish(); + ASSERT_EQ(content.size() + BlockBasedTable::kBlockTrailerSize + + diff_internal_user_bytes, + props.data_size); + c.ResetTableReader(); +} + +#ifdef SNAPPY +uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + constexpr size_t kNumKeys = 10000; + + for (size_t k = 0; k < kNumKeys; ++k) { + c.Add("key" + std::to_string(k), "val" + std::to_string(k)); + } + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + options.compression = kSnappyCompression; + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_restart_interval = 1; + table_options.enable_index_compression = compressed; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + c.ResetTableReader(); + return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); +} +TEST_P(BlockBasedTableTest, IndexUncompressed) { + uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true); + uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false); + // tbl1_compressed_cnt should include 1 index block + EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt); +} +#endif // SNAPPY + +TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) { + TableConstructor c(&reverse_key_comparator); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + + { + Options options; + options.compression = CompressionType::kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + auto& props = *c.GetTableReader()->GetTableProperties(); + + // Default comparator + ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name); + // No merge operator + ASSERT_EQ("nullptr", props.merge_operator_name); + // No prefix extractor + ASSERT_EQ("nullptr", props.prefix_extractor_name); + // No property collectors + ASSERT_EQ("[]", props.property_collectors_names); + // No filter policy is used + ASSERT_EQ("", props.filter_policy_name); + // Compression type == that set: + ASSERT_EQ("NoCompression", props.compression_name); + c.ResetTableReader(); + } + + { + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.comparator = &reverse_key_comparator; + options.merge_operator = MergeOperators::CreateUInt64AddOperator(); + options.prefix_extractor.reset(NewNoopTransform()); + options.table_properties_collector_factories.emplace_back( + new DummyPropertiesCollectorFactory1()); + options.table_properties_collector_factories.emplace_back( + new DummyPropertiesCollectorFactory2()); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + auto& props = *c.GetTableReader()->GetTableProperties(); + + ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name); + ASSERT_EQ("UInt64AddOperator", props.merge_operator_name); + ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name); + ASSERT_EQ( + "[DummyPropertiesCollectorFactory1,DummyPropertiesCollectorFactory2]", + props.property_collectors_names); + ASSERT_EQ("", props.filter_policy_name); // no filter policy is used + c.ResetTableReader(); + } +} + +TEST_P(BlockBasedTableTest, RangeDelBlock) { + TableConstructor c(BytewiseComparator()); + std::vector<std::string> keys = {"1pika", "2chu"}; + std::vector<std::string> vals = {"p", "c"}; + + std::vector<RangeTombstone> expected_tombstones = { + {"1pika", "2chu", 0}, + {"2chu", "c", 1}, + {"2chu", "c", 0}, + {"c", "p", 0}, + }; + + for (int i = 0; i < 2; i++) { + RangeTombstone t(keys[i], vals[i], i); + std::pair<InternalKey, Slice> p = t.Serialize(); + c.Add(p.first.Encode().ToString(), p.second); + } + + std::vector<std::string> sorted_keys; + stl_wrappers::KVMap kvmap; + Options options; + options.compression = kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_restart_interval = 1; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + std::unique_ptr<InternalKeyComparator> internal_cmp( + new InternalKeyComparator(options.comparator)); + c.Finish(options, ioptions, moptions, table_options, *internal_cmp, + &sorted_keys, &kvmap); + + for (int j = 0; j < 2; ++j) { + std::unique_ptr<InternalIterator> iter( + c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions())); + if (j > 0) { + // For second iteration, delete the table reader object and verify the + // iterator can still access its metablock's range tombstones. + c.ResetTableReader(); + } + ASSERT_FALSE(iter->Valid()); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + for (size_t i = 0; i < expected_tombstones.size(); i++) { + ASSERT_TRUE(iter->Valid()); + ParsedInternalKey parsed_key; + ASSERT_OK( + ParseInternalKey(iter->key(), &parsed_key, true /* log_err_key */)); + RangeTombstone t(parsed_key, iter->value()); + const auto& expected_t = expected_tombstones[i]; + ASSERT_EQ(t.start_key_, expected_t.start_key_); + ASSERT_EQ(t.end_key_, expected_t.end_key_); + ASSERT_EQ(t.seq_, expected_t.seq_); + iter->Next(); + } + ASSERT_TRUE(!iter->Valid()); + } +} + +TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("a1", "val1"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + Options options; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + auto& props = *c.GetTableReader()->GetTableProperties(); + ASSERT_EQ(table_options.filter_policy->Name(), props.filter_policy_name); + c.ResetTableReader(); +} + +// +// BlockBasedTableTest::PrefetchTest +// +void AssertKeysInCache(BlockBasedTable* table_reader, + const std::vector<std::string>& keys_in_cache, + const std::vector<std::string>& keys_not_in_cache, + bool convert = false) { + if (convert) { + for (auto key : keys_in_cache) { + InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); + ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); + } + for (auto key : keys_not_in_cache) { + InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); + ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); + } + } else { + for (auto key : keys_in_cache) { + ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key)); + } + for (auto key : keys_not_in_cache) { + ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key)); + } + } +} + +void PrefetchRange(TableConstructor* c, Options* opt, + BlockBasedTableOptions* table_options, const char* key_begin, + const char* key_end, + const std::vector<std::string>& keys_in_cache, + const std::vector<std::string>& keys_not_in_cache, + const Status expected_status = Status::OK()) { + // reset the cache and reopen the table + table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4); + opt->table_factory.reset(NewBlockBasedTableFactory(*table_options)); + const ImmutableOptions ioptions2(*opt); + const MutableCFOptions moptions(*opt); + ASSERT_OK(c->Reopen(ioptions2, moptions)); + + // prefetch + auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader()); + Status s; + std::unique_ptr<Slice> begin, end; + std::unique_ptr<InternalKey> i_begin, i_end; + if (key_begin != nullptr) { + if (c->ConvertToInternalKey()) { + i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue)); + begin.reset(new Slice(i_begin->Encode())); + } else { + begin.reset(new Slice(key_begin)); + } + } + if (key_end != nullptr) { + if (c->ConvertToInternalKey()) { + i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue)); + end.reset(new Slice(i_end->Encode())); + } else { + end.reset(new Slice(key_end)); + } + } + s = table_reader->Prefetch(begin.get(), end.get()); + + ASSERT_TRUE(s.code() == expected_status.code()); + + // assert our expectation in cache warmup + AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache, + c->ConvertToInternalKey()); + c->ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, PrefetchTest) { + // The purpose of this test is to test the prefetching operation built into + // BlockBasedTable. + Options opt; + std::unique_ptr<InternalKeyComparator> ikc; + ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); + opt.compression = kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_size = 1024; + // big enough so we don't ever lose cached values. + table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); + opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(opt); + const MutableCFOptions moptions(opt); + c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); + c.ResetTableReader(); + + // We get the following data spread : + // + // Data block Index + // ======================== + // [ k01 k02 k03 ] k03 + // [ k04 ] k04 + // [ k05 ] k05 + // [ k06 k07 ] k07 + + // Simple + PrefetchRange(&c, &opt, &table_options, + /*key_range=*/"k01", "k05", + /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"}, + /*keys_not_in_cache=*/{"k06", "k07"}); + PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"}, + {"k04", "k05", "k06", "k07"}); + // odd + PrefetchRange(&c, &opt, &table_options, "a", "z", + {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); + PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"}, + {"k04", "k05", "k06", "k07"}); + // Edge cases + PrefetchRange(&c, &opt, &table_options, "k00", "k06", + {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); + PrefetchRange(&c, &opt, &table_options, "k00", "zzz", + {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); + // null keys + PrefetchRange(&c, &opt, &table_options, nullptr, nullptr, + {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); + PrefetchRange(&c, &opt, &table_options, "k04", nullptr, + {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"}); + PrefetchRange(&c, &opt, &table_options, nullptr, "k05", + {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"}); + // invalid + PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {}, + Status::InvalidArgument(Slice("k06 "), Slice("k07"))); + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + for (int i = 0; i <= 4; ++i) { + Options options; + // Make each key/value an individual block + table_options.block_size = 64; + switch (i) { + case 0: + // Binary search index + table_options.index_type = BlockBasedTableOptions::kBinarySearch; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + break; + case 1: + // Hash search index + table_options.index_type = BlockBasedTableOptions::kHashSearch; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(4)); + break; + case 2: + // Hash search index with filter policy + table_options.index_type = BlockBasedTableOptions::kHashSearch; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(4)); + break; + case 3: + // Two-level index + table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + break; + case 4: + // Binary search with first key + table_options.index_type = + BlockBasedTableOptions::kBinarySearchWithFirstKey; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + break; + } + + TableConstructor c(BytewiseComparator(), + true /* convert_to_internal_key_ */); + c.Add("aaaa1", std::string('a', 56)); + c.Add("bbaa1", std::string('a', 56)); + c.Add("cccc1", std::string('a', 56)); + c.Add("bbbb1", std::string('a', 56)); + c.Add("baaa1", std::string('a', 56)); + c.Add("abbb1", std::string('a', 56)); + c.Add("cccc2", std::string('a', 56)); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + auto props = c.GetTableReader()->GetTableProperties(); + ASSERT_EQ(7u, props->num_data_blocks); + auto* reader = c.GetTableReader(); + ReadOptions ro; + ro.total_order_seek = true; + std::unique_ptr<InternalIterator> iter(reader->NewIterator( + ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + iter->Seek(InternalKey("b", 0, kTypeValue).Encode()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); + + iter->Seek(InternalKey("bb", 0, kTypeValue).Encode()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); + + iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); + iter->Next(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString()); + } +} + +TEST_P(BlockBasedTableTest, NoopTransformSeek) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + + Options options; + options.comparator = BytewiseComparator(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewNoopTransform()); + + TableConstructor c(options.comparator); + // To tickle the PrefixMayMatch bug it is important that the + // user-key is a single byte so that the index key exactly matches + // the user-key. + InternalKey key("a", 1, kTypeValue); + c.Add(key.Encode().ToString(), "b"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + auto* reader = c.GetTableReader(); + for (int i = 0; i < 2; ++i) { + ReadOptions ro; + ro.total_order_seek = (i == 0); + std::unique_ptr<InternalIterator> iter(reader->NewIterator( + ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + iter->Seek(key.Encode()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString()); + } +} + +TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) { + // if DB is opened with a prefix extractor of a different name, + // prefix bloom is skipped when read the file + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.filter_policy.reset(NewBloomFilterPolicy(2)); + table_options.whole_key_filtering = false; + + Options options; + options.comparator = BytewiseComparator(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + + TableConstructor c(options.comparator); + InternalKey key("abcdefghijk", 1, kTypeValue); + c.Add(key.Encode().ToString(), "test"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + // TODO(Zhongyi): update test to use MutableCFOptions + options.prefix_extractor.reset(NewFixedPrefixTransform(9)); + const ImmutableOptions new_ioptions(options); + const MutableCFOptions new_moptions(options); + ASSERT_OK(c.Reopen(new_ioptions, new_moptions)); + auto reader = c.GetTableReader(); + ReadOptions read_options; + std::unique_ptr<InternalIterator> db_iter(reader->NewIterator( + read_options, new_moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // Test point lookup + // only one kv + for (auto& kv : kvmap) { + db_iter->Seek(kv.first); + ASSERT_TRUE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); + ASSERT_EQ(db_iter->key(), kv.first); + ASSERT_EQ(db_iter->value(), kv.second); + } +} + +TEST_P(BlockBasedTableTest, BadChecksumType) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + + Options options; + options.comparator = BytewiseComparator(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(options.comparator); + InternalKey key("abc", 1, kTypeValue); + c.Add(key.Encode().ToString(), "test"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + // Corrupt checksum type (123 is invalid) + auto& sink = *c.TEST_GetSink(); + size_t len = sink.contents_.size(); + ASSERT_EQ(sink.contents_[len - Footer::kNewVersionsEncodedLength], + table_options.checksum); + sink.contents_[len - Footer::kNewVersionsEncodedLength] = char{123}; + + // (Re-)Open table file with bad checksum type + const ImmutableOptions new_ioptions(options); + const MutableCFOptions new_moptions(options); + Status s = c.Reopen(new_ioptions, new_moptions); + ASSERT_NOK(s); + ASSERT_EQ(s.ToString(), + "Corruption: Corrupt or unsupported checksum type: 123"); +} + +namespace { +std::string ChecksumAsString(const std::string& data, + ChecksumType checksum_type) { + uint32_t v = ComputeBuiltinChecksum(checksum_type, data.data(), data.size()); + + // Verify consistency with other function + if (data.size() >= 1) { + EXPECT_EQ(v, ComputeBuiltinChecksumWithLastByte( + checksum_type, data.data(), data.size() - 1, data.back())); + } + // Little endian as in file + std::array<char, 4> raw_bytes; + EncodeFixed32(raw_bytes.data(), v); + return Slice(raw_bytes.data(), raw_bytes.size()).ToString(/*hex*/ true); +} + +std::string ChecksumAsString(std::string* data, char new_last_byte, + ChecksumType checksum_type) { + data->back() = new_last_byte; + return ChecksumAsString(*data, checksum_type); +} +} // namespace + +// Make sure that checksum values don't change in later versions, even if +// consistent within current version. +TEST_P(BlockBasedTableTest, ChecksumSchemas) { + std::string b0 = "x"; + std::string b1 = "This is a short block!x"; + std::string b2; + for (int i = 0; i < 100; ++i) { + b2.append("This is a long block!"); + } + b2.append("x"); + // Trailing 'x' will be replaced by compression type + + std::string empty; + + char ct1 = kNoCompression; + char ct2 = kSnappyCompression; + char ct3 = kZSTD; + + // Note: first byte of trailer is compression type, last 4 are checksum + + for (ChecksumType t : GetSupportedChecksums()) { + switch (t) { + case kNoChecksum: + EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00000000"); + break; + case kCRC32c: + EXPECT_EQ(ChecksumAsString(empty, t), "D8EA82A2"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "D28F2549"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "052B2843"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "46F8F711"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "583F0355"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "2F9B0A57"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "ECE7DA1D"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "943EF0AB"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "43A2EDB1"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00E53D63"); + break; + case kxxHash: + EXPECT_EQ(ChecksumAsString(empty, t), "055DCC02"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "3EB065CF"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "31F79238"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "320D2E00"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "4A2E5FB0"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "0BD9F652"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "B4107E50"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "20F4D4BA"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "8F1A1F99"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "A191A338"); + break; + case kxxHash64: + EXPECT_EQ(ChecksumAsString(empty, t), "99E9D851"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "682705DB"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "30E7211B"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "B7BB58E8"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B74655EF"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "B6C8BBBE"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "AED9E3B4"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "0D4999FE"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "F5932423"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "6B31BAB1"); + break; + case kXXH3: + EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "C294D338"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "1B174353"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "2D0E20C8"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B37FB5E6"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "6AFC258D"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "5CE54616"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "FA2D482E"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "23AED845"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "15B7BBDE"); + break; + default: + // Force this test to be updated on new ChecksumTypes + assert(false); + break; + } + } +} + +void AddInternalKey(TableConstructor* c, const std::string& prefix, + std::string value = "v", int /*suffix_len*/ = 800) { + static Random rnd(1023); + InternalKey k(prefix + rnd.RandomString(800), 0, kTypeValue); + c->Add(k.Encode().ToString(), value); +} + +void TableTest::IndexTest(BlockBasedTableOptions table_options) { + TableConstructor c(BytewiseComparator()); + + // keys with prefix length 3, make sure the key/value is big enough to fill + // one block + AddInternalKey(&c, "0015"); + AddInternalKey(&c, "0035"); + + AddInternalKey(&c, "0054"); + AddInternalKey(&c, "0055"); + + AddInternalKey(&c, "0056"); + AddInternalKey(&c, "0057"); + + AddInternalKey(&c, "0058"); + AddInternalKey(&c, "0075"); + + AddInternalKey(&c, "0076"); + AddInternalKey(&c, "0095"); + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + table_options.block_size = 1700; + table_options.block_cache = NewLRUCache(1024, 4); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, + &kvmap); + auto reader = c.GetTableReader(); + + auto props = reader->GetTableProperties(); + ASSERT_EQ(5u, props->num_data_blocks); + + // TODO(Zhongyi): update test to use MutableCFOptions + ReadOptions read_options; + std::unique_ptr<InternalIterator> index_iter(reader->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // -- Find keys do not exist, but have common prefix. + std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"}; + std::vector<std::string> lower_bound = { + keys[0], keys[1], keys[2], keys[7], keys[9], + }; + + // find the lower bound of the prefix + for (size_t i = 0; i < prefixes.size(); ++i) { + index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode()); + ASSERT_OK(index_iter->status()); + ASSERT_TRUE(index_iter->Valid()); + + // seek the first element in the block + ASSERT_EQ(lower_bound[i], index_iter->key().ToString()); + ASSERT_EQ("v", index_iter->value().ToString()); + } + + // find the upper bound of prefixes + std::vector<std::string> upper_bound = { + keys[1], + keys[2], + keys[7], + keys[9], + }; + + // find existing keys + for (const auto& item : kvmap) { + auto ukey = ExtractUserKey(item.first).ToString(); + index_iter->Seek(ukey); + + // ASSERT_OK(regular_iter->status()); + ASSERT_OK(index_iter->status()); + + // ASSERT_TRUE(regular_iter->Valid()); + ASSERT_TRUE(index_iter->Valid()); + + ASSERT_EQ(item.first, index_iter->key().ToString()); + ASSERT_EQ(item.second, index_iter->value().ToString()); + } + + for (size_t i = 0; i < prefixes.size(); ++i) { + // the key is greater than any existing keys. + auto key = prefixes[i] + "9"; + index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode()); + + ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound()); + ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid()); + if (i == prefixes.size() - 1) { + // last key + ASSERT_TRUE(!index_iter->Valid()); + } else { + ASSERT_TRUE(index_iter->Valid()); + // seek the first element in the block + ASSERT_EQ(upper_bound[i], index_iter->key().ToString()); + ASSERT_EQ("v", index_iter->value().ToString()); + } + } + + // find keys with prefix that don't match any of the existing prefixes. + std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"}; + for (const auto& prefix : non_exist_prefixes) { + index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode()); + // regular_iter->Seek(prefix); + + ASSERT_OK(index_iter->status()); + // Seek to non-existing prefixes should yield either invalid, or a + // key with prefix greater than the target. + if (index_iter->Valid()) { + Slice ukey = ExtractUserKey(index_iter->key()); + Slice ukey_prefix = options.prefix_extractor->Transform(ukey); + ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0); + } + } + for (const auto& prefix : non_exist_prefixes) { + index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode()); + // regular_iter->Seek(prefix); + + ASSERT_OK(index_iter->status()); + // Seek to non-existing prefixes should yield either invalid, or a + // key with prefix greater than the target. + if (index_iter->Valid()) { + Slice ukey = ExtractUserKey(index_iter->key()); + Slice ukey_prefix = options.prefix_extractor->Transform(ukey); + ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0); + } + } + + { + // Test reseek case. It should impact partitioned index more. + ReadOptions ro; + ro.total_order_seek = true; + std::unique_ptr<InternalIterator> index_iter2(reader->NewIterator( + ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // Things to cover in partitioned index: + // 1. Both of Seek() and SeekToLast() has optimization to prevent + // rereek leaf index block if it remains to the same one, and + // they reuse the same variable. + // 2. When Next() or Prev() is called, the block moves, so the + // optimization should kick in only with the current one. + index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->SeekToLast(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->SeekToLast(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->SeekToLast(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + index_iter2->Prev(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode()); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->Next(); + ASSERT_TRUE(index_iter2->Valid()); + index_iter2->Next(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + + index_iter2->SeekToLast(); + ASSERT_TRUE(index_iter2->Valid()); + ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); + } + + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, BinaryIndexTest) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kBinarySearch; + IndexTest(table_options); +} + +TEST_P(BlockBasedTableTest, HashIndexTest) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kHashSearch; + IndexTest(table_options); +} + +TEST_P(BlockBasedTableTest, PartitionIndexTest) { + const int max_index_keys = 5; + const int est_max_index_key_value_size = 32; + const int est_max_index_size = max_index_keys * est_max_index_key_value_size; + for (int i = 1; i <= est_max_index_size + 1; i++) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; + table_options.metadata_block_size = i; + IndexTest(table_options); + } +} + +TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) { + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + Options options; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + + TableConstructor c(BytewiseComparator()); + AddInternalKey(&c, "pika"); + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, + &kvmap); + ASSERT_EQ(1, keys.size()); + + auto reader = c.GetTableReader(); + ReadOptions ropt; + ropt.read_tier = ReadTier::kBlockCacheTier; + std::unique_ptr<InternalIterator> iter(reader->NewIterator( + ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + auto ikey = [](Slice user_key) { + return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); + }; + + iter->Seek(ikey("pika")); + ASSERT_FALSE(iter->Valid()); + ASSERT_TRUE(iter->status().IsIncomplete()); + + // This used to crash at some point. + iter->Seek(ikey("pika")); + ASSERT_FALSE(iter->Valid()); + ASSERT_TRUE(iter->status().IsIncomplete()); +} + +TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; + IndexTest(table_options); +} + +class CustomFlushBlockPolicy : public FlushBlockPolicyFactory, + public FlushBlockPolicy { + public: + explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block) + : keys_per_block_(keys_per_block) {} + + const char* Name() const override { return "CustomFlushBlockPolicy"; } + + FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&, + const BlockBuilder&) const override { + return new CustomFlushBlockPolicy(keys_per_block_); + } + + bool Update(const Slice&, const Slice&) override { + if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) { + ++current_block_idx_; + keys_in_current_block_ = 1; + return true; + } + + ++keys_in_current_block_; + return false; + } + + std::vector<int> keys_per_block_; + + int current_block_idx_ = 0; + int keys_in_current_block_ = 0; +}; + +TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) { + for (int use_first_key = 0; use_first_key < 2; ++use_first_key) { + SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key)); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = + use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey + : BlockBasedTableOptions::kBinarySearch; + table_options.block_cache = NewLRUCache(10000); // fits all blocks + table_options.index_shortening = + BlockBasedTableOptions::IndexShorteningMode::kNoShortening; + table_options.flush_block_policy_factory = + std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2}); + Options options; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.statistics = CreateDBStatistics(); + Statistics* stats = options.statistics.get(); + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + + TableConstructor c(BytewiseComparator()); + + // Block 0. + AddInternalKey(&c, "aaaa", "v0"); + AddInternalKey(&c, "aaac", "v1"); + + // Block 1. + AddInternalKey(&c, "aaca", "v2"); + + // Block 2. + AddInternalKey(&c, "caaa", "v3"); + AddInternalKey(&c, "caac", "v4"); + AddInternalKey(&c, "caae", "v5"); + + // Block 3. + AddInternalKey(&c, "ccaa", "v6"); + AddInternalKey(&c, "ccac", "v7"); + + // Write the file. + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, + &kvmap); + ASSERT_EQ(8, keys.size()); + + auto reader = c.GetTableReader(); + auto props = reader->GetTableProperties(); + ASSERT_EQ(4u, props->num_data_blocks); + ReadOptions read_options; + std::unique_ptr<InternalIterator> iter(reader->NewIterator( + read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized, + /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); + + // Shouldn't have read data blocks before iterator is seeked. + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + auto ikey = [](Slice user_key) { + return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); + }; + + // Seek to a key between blocks. If index contains first key, we shouldn't + // read any data blocks until value is requested. + iter->Seek(ikey("aaba")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[2], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 0 : 1, + stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v2", iter->value().ToString()); + EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Seek to the middle of a block. The block should be read right away. + iter->Seek(ikey("caab")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[4], iter->key().ToString()); + EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v4", iter->value().ToString()); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Seek to just before the same block and don't access value. + // The iterator should keep pinning the block contents. + iter->Seek(ikey("baaa")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[3], iter->key().ToString()); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Seek to the same block again to check that the block is still pinned. + iter->Seek(ikey("caae")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[5], iter->key().ToString()); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v5", iter->value().ToString()); + EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Step forward and fall through to the next block. Don't access value. + iter->Next(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[6], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 2 : 3, + stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Step forward again. Block should be read. + iter->Next(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[7], iter->key().ToString()); + EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v7", iter->value().ToString()); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Step forward and reach the end. + iter->Next(); + EXPECT_FALSE(iter->Valid()); + EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Seek to a single-key block and step forward without accessing value. + iter->Seek(ikey("aaca")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[2], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 0 : 1, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[3], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 1 : 2, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v3", iter->value().ToString()); + EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + + // Seek between blocks and step back without accessing value. + iter->Seek(ikey("aaca")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[2], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 2 : 3, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[1], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 2 : 3, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + // All blocks are in cache now, there'll be no more misses ever. + EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v1", iter->value().ToString()); + + // Next into the next block again. + iter->Next(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[2], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 2 : 4, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Seek to first and step back without accessing value. + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[0], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 2 : 5, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + iter->Prev(); + EXPECT_FALSE(iter->Valid()); + EXPECT_EQ(use_first_key ? 2 : 5, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + // Do some SeekForPrev() and SeekToLast() just to cover all methods. + iter->SeekForPrev(ikey("caad")); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[4], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 3 : 6, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v4", iter->value().ToString()); + EXPECT_EQ(use_first_key ? 3 : 6, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(keys[7], iter->key().ToString()); + EXPECT_EQ(use_first_key ? 4 : 7, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("v7", iter->value().ToString()); + EXPECT_EQ(use_first_key ? 4 : 7, + stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + + c.ResetTableReader(); + } +} + +TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; + table_options.block_cache = NewLRUCache(10000); + Options options; + options.statistics = CreateDBStatistics(); + Statistics* stats = options.statistics.get(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + + TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false, + /* level */ -1, /* largest_seqno */ 42); + + c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x"); + c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y"); + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, + &kvmap); + ASSERT_EQ(2, keys.size()); + + auto reader = c.GetTableReader(); + auto props = reader->GetTableProperties(); + ASSERT_EQ(1u, props->num_data_blocks); + ReadOptions read_options; + std::unique_ptr<InternalIterator> iter(reader->NewIterator( + read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized, + /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); + + iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString()); + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), + iter->key().ToString()); + EXPECT_NE(keys[0], iter->key().ToString()); + // Key should have been served from index, without reading data blocks. + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + + ASSERT_TRUE(iter->PrepareValue()); + EXPECT_EQ("x", iter->value().ToString()); + EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); + EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); + EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), + iter->key().ToString()); + + c.ResetTableReader(); +} + +// It's very hard to figure out the index block size of a block accurately. +// To make sure we get the index size, we just make sure as key number +// grows, the filter block size also grows. +TEST_P(BlockBasedTableTest, IndexSizeStat) { + uint64_t last_index_size = 0; + + // we need to use random keys since the pure human readable texts + // may be well compressed, resulting insignifcant change of index + // block size. + Random rnd(test::RandomSeed()); + std::vector<std::string> keys; + + for (int i = 0; i < 100; ++i) { + keys.push_back(rnd.RandomString(10000)); + } + + // Each time we load one more key to the table. the table index block + // size is expected to be larger than last time's. + for (size_t i = 1; i < keys.size(); ++i) { + TableConstructor c(BytewiseComparator(), + true /* convert_to_internal_key_ */); + for (size_t j = 0; j < i; ++j) { + c.Add(keys[j], "val"); + } + + std::vector<std::string> ks; + stl_wrappers::KVMap kvmap; + Options options; + options.compression = kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_restart_interval = 1; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &ks, &kvmap); + auto index_size = c.GetTableReader()->GetTableProperties()->index_size; + ASSERT_GT(index_size, last_index_size); + last_index_size = index_size; + c.ResetTableReader(); + } +} + +TEST_P(BlockBasedTableTest, NumBlockStat) { + Random rnd(test::RandomSeed()); + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + Options options; + options.compression = kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_restart_interval = 1; + table_options.block_size = 1000; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + for (int i = 0; i < 10; ++i) { + // the key/val are slightly smaller than block size, so that each block + // holds roughly one key/value pair. + c.Add(rnd.RandomString(900), "val"); + } + + std::vector<std::string> ks; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &ks, &kvmap); + ASSERT_EQ(kvmap.size(), + c.GetTableReader()->GetTableProperties()->num_data_blocks); + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, TracingGetTest) { + TableConstructor c(BytewiseComparator()); + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.create_if_missing = true; + table_options.block_cache = NewLRUCache(1024 * 1024, 0); + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + SetupTracingTest(&c); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + std::string user_key = "k01"; + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + for (uint32_t i = 1; i <= 2; i++) { + PinnableSlice value; + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, /*tracing_get_id=*/i); + get_perf_context()->Reset(); + ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context, + moptions.prefix_extractor.get())); + ASSERT_EQ(get_context.State(), GetContext::kFound); + ASSERT_EQ(value.ToString(), kDummyValue); + } + + // Verify traces. + std::vector<BlockCacheTraceRecord> expected_records; + // The first two records should be prefetching index and filter blocks. + BlockCacheTraceRecord record; + record.block_type = TraceType::kBlockTraceIndexBlock; + record.caller = TableReaderCaller::kPrefetch; + record.is_cache_hit = false; + record.no_insert = false; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceFilterBlock; + expected_records.push_back(record); + // Then we should have three records for one index, one filter, and one data + // block access. + record.get_id = 1; + record.block_type = TraceType::kBlockTraceFilterBlock; + record.caller = TableReaderCaller::kUserGet; + record.get_from_user_specified_snapshot = false; + record.referenced_key = encoded_key; + record.referenced_key_exist_in_block = true; + record.is_cache_hit = true; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceIndexBlock; + expected_records.push_back(record); + record.is_cache_hit = false; + record.block_type = TraceType::kBlockTraceDataBlock; + expected_records.push_back(record); + // The second get should all observe cache hits. + record.is_cache_hit = true; + record.get_id = 2; + record.block_type = TraceType::kBlockTraceFilterBlock; + record.caller = TableReaderCaller::kUserGet; + record.get_from_user_specified_snapshot = false; + record.referenced_key = encoded_key; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceIndexBlock; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceDataBlock; + expected_records.push_back(record); + VerifyBlockAccessTrace(&c, expected_records); + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) { + TableConstructor c(BytewiseComparator()); + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.create_if_missing = true; + table_options.block_cache = NewLRUCache(1024 * 1024, 0); + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + SetupTracingTest(&c); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + for (uint32_t i = 1; i <= 2; i++) { + std::string user_key = "k01"; + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + c.GetTableReader()->ApproximateOffsetOf( + encoded_key, TableReaderCaller::kUserApproximateSize); + } + // Verify traces. + std::vector<BlockCacheTraceRecord> expected_records; + // The first two records should be prefetching index and filter blocks. + BlockCacheTraceRecord record; + record.block_type = TraceType::kBlockTraceIndexBlock; + record.caller = TableReaderCaller::kPrefetch; + record.is_cache_hit = false; + record.no_insert = false; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceFilterBlock; + expected_records.push_back(record); + // Then we should have two records for only index blocks. + record.block_type = TraceType::kBlockTraceIndexBlock; + record.caller = TableReaderCaller::kUserApproximateSize; + record.is_cache_hit = true; + expected_records.push_back(record); + expected_records.push_back(record); + VerifyBlockAccessTrace(&c, expected_records); + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, TracingIterator) { + TableConstructor c(BytewiseComparator()); + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.create_if_missing = true; + table_options.block_cache = NewLRUCache(1024 * 1024, 0); + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + SetupTracingTest(&c); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + for (uint32_t i = 1; i <= 2; i++) { + ReadOptions read_options; + std::unique_ptr<InternalIterator> iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUserIterator)); + iter->SeekToFirst(); + while (iter->Valid()) { + iter->key(); + iter->value(); + iter->Next(); + } + ASSERT_OK(iter->status()); + iter.reset(); + } + + // Verify traces. + std::vector<BlockCacheTraceRecord> expected_records; + // The first two records should be prefetching index and filter blocks. + BlockCacheTraceRecord record; + record.block_type = TraceType::kBlockTraceIndexBlock; + record.caller = TableReaderCaller::kPrefetch; + record.is_cache_hit = false; + record.no_insert = false; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceFilterBlock; + expected_records.push_back(record); + // Then we should have three records for index and two data block access. + record.block_type = TraceType::kBlockTraceIndexBlock; + record.caller = TableReaderCaller::kUserIterator; + record.is_cache_hit = true; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceDataBlock; + record.is_cache_hit = false; + expected_records.push_back(record); + expected_records.push_back(record); + // When we iterate this file for the second time, we should observe all cache + // hits. + record.block_type = TraceType::kBlockTraceIndexBlock; + record.is_cache_hit = true; + expected_records.push_back(record); + record.block_type = TraceType::kBlockTraceDataBlock; + expected_records.push_back(record); + expected_records.push_back(record); + VerifyBlockAccessTrace(&c, expected_records); + c.ResetTableReader(); +} + +// A simple tool that takes the snapshot of block cache statistics. +class BlockCachePropertiesSnapshot { + public: + explicit BlockCachePropertiesSnapshot(Statistics* statistics) { + block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS); + block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT); + index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS); + index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT); + data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS); + data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT); + filter_block_cache_miss = + statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS); + filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT); + block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ); + block_cache_bytes_write = + statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE); + } + + void AssertIndexBlockStat(int64_t expected_index_block_cache_miss, + int64_t expected_index_block_cache_hit) { + ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); + ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); + } + + void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss, + int64_t expected_filter_block_cache_hit) { + ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss); + ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit); + } + + // Check if the fetched props matches the expected ones. + // TODO(kailiu) Use this only when you disabled filter policy! + void AssertEqual(int64_t expected_index_block_cache_miss, + int64_t expected_index_block_cache_hit, + int64_t expected_data_block_cache_miss, + int64_t expected_data_block_cache_hit) const { + ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); + ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); + ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss); + ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit); + ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss, + block_cache_miss); + ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit, + block_cache_hit); + } + + int64_t GetCacheBytesRead() { return block_cache_bytes_read; } + + int64_t GetCacheBytesWrite() { return block_cache_bytes_write; } + + private: + int64_t block_cache_miss = 0; + int64_t block_cache_hit = 0; + int64_t index_block_cache_miss = 0; + int64_t index_block_cache_hit = 0; + int64_t data_block_cache_miss = 0; + int64_t data_block_cache_hit = 0; + int64_t filter_block_cache_miss = 0; + int64_t filter_block_cache_hit = 0; + int64_t block_cache_bytes_read = 0; + int64_t block_cache_bytes_write = 0; +}; + +// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't +// use block cache to store them). +TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) { + Options options; + options.create_if_missing = true; + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_cache = NewLRUCache(1024, 4); + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("key", "value"); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + // preloading filter/index blocks is enabled. + auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader()); + ASSERT_FALSE(reader->TEST_FilterBlockInCache()); + ASSERT_FALSE(reader->TEST_IndexBlockInCache()); + + { + // nothing happens in the beginning + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } + + { + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(), nullptr, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + // a hack that just to trigger BlockBasedTable::GetFilter. + ASSERT_OK(reader->Get(ReadOptions(), "non-exist-key", &get_context, + moptions.prefix_extractor.get())); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } +} + +// Due to the difficulities of the intersaction between statistics, this test +// only tests the case when "index block is put to block cache" +TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) { + // -- Table construction + Options options; + options.create_if_missing = true; + options.statistics = CreateDBStatistics(); + + // Enable the cache for index/filter blocks + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + LRUCacheOptions co; + co.capacity = 2048; + co.num_shard_bits = 2; + co.metadata_charge_policy = kDontChargeCacheMetadata; + table_options.block_cache = NewLRUCache(co); + table_options.cache_index_and_filter_blocks = true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("key", "value"); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + // preloading filter/index blocks is prohibited. + auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader()); + ASSERT_FALSE(reader->TEST_FilterBlockInCache()); + ASSERT_TRUE(reader->TEST_IndexBlockInCache()); + + // -- PART 1: Open with regular block cache. + // Since block_cache is disabled, no cache activities will be involved. + std::unique_ptr<InternalIterator> iter; + + int64_t last_cache_bytes_read = 0; + // At first, no block will be accessed. + { + BlockCachePropertiesSnapshot props(options.statistics.get()); + // index will be added to block cache. + props.AssertEqual(1, // index block miss + 0, 0, 0); + ASSERT_EQ(props.GetCacheBytesRead(), 0); + ASSERT_EQ(props.GetCacheBytesWrite(), + static_cast<int64_t>(table_options.block_cache->GetUsage())); + last_cache_bytes_read = props.GetCacheBytesRead(); + } + + // Only index block will be accessed + { + iter.reset(c.NewIterator(moptions.prefix_extractor.get())); + BlockCachePropertiesSnapshot props(options.statistics.get()); + // NOTE: to help better highlight the "detla" of each ticker, I use + // <last_value> + <added_value> to indicate the increment of changed + // value; other numbers remain the same. + props.AssertEqual(1, 0 + 1, // index block hit + 0, 0); + // Cache hit, bytes read from cache should increase + ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); + ASSERT_EQ(props.GetCacheBytesWrite(), + static_cast<int64_t>(table_options.block_cache->GetUsage())); + last_cache_bytes_read = props.GetCacheBytesRead(); + } + + // Only data block will be accessed + { + iter->SeekToFirst(); + ASSERT_OK(iter->status()); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertEqual(1, 1, 0 + 1, // data block miss + 0); + // Cache miss, Bytes read from cache should not change + ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read); + ASSERT_EQ(props.GetCacheBytesWrite(), + static_cast<int64_t>(table_options.block_cache->GetUsage())); + last_cache_bytes_read = props.GetCacheBytesRead(); + } + + // Data block will be in cache + { + iter.reset(c.NewIterator(moptions.prefix_extractor.get())); + iter->SeekToFirst(); + ASSERT_OK(iter->status()); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertEqual(1, 1 + 1, /* index block hit */ + 1, 0 + 1 /* data block hit */); + // Cache hit, bytes read from cache should increase + ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); + ASSERT_EQ(props.GetCacheBytesWrite(), + static_cast<int64_t>(table_options.block_cache->GetUsage())); + } + // release the iterator so that the block cache can reset correctly. + iter.reset(); + + c.ResetTableReader(); + + // -- PART 2: Open with very small block cache + // In this test, no block will ever get hit since the block cache is + // too small to fit even one entry. + table_options.block_cache = NewLRUCache(1, 4); + options.statistics = CreateDBStatistics(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + const ImmutableOptions ioptions2(options); + const MutableCFOptions moptions2(options); + ASSERT_OK(c.Reopen(ioptions2, moptions2)); + { + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertEqual(1, // index block miss + 0, 0, 0); + // Cache miss, Bytes read from cache should not change + ASSERT_EQ(props.GetCacheBytesRead(), 0); + } + + { + // Both index and data block get accessed. + // It first cache index block then data block. But since the cache size + // is only 1, index block will be purged after data block is inserted. + iter.reset(c.NewIterator(moptions2.prefix_extractor.get())); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertEqual(1 + 1, // index block miss + 0, 0, // data block miss + 0); + // Cache hit, bytes read from cache should increase + ASSERT_EQ(props.GetCacheBytesRead(), 0); + } + + { + // SeekToFirst() accesses data block. With similar reason, we expect data + // block's cache miss. + iter->SeekToFirst(); + ASSERT_OK(iter->status()); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertEqual(2, 0, 0 + 1, // data block miss + 0); + // Cache miss, Bytes read from cache should not change + ASSERT_EQ(props.GetCacheBytesRead(), 0); + } + iter.reset(); + c.ResetTableReader(); + + // -- PART 3: Open table with bloom filter enabled but not in SST file + table_options.block_cache = NewLRUCache(4096, 4); + table_options.cache_index_and_filter_blocks = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c3(BytewiseComparator()); + std::string user_key = "k01"; + InternalKey internal_key(user_key, 0, kTypeValue); + c3.Add(internal_key.Encode().ToString(), "hello"); + ImmutableOptions ioptions3(options); + MutableCFOptions moptions3(options); + // Generate table without filter policy + c3.Finish(options, ioptions3, moptions3, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + c3.ResetTableReader(); + + // Open table with filter policy + table_options.filter_policy.reset(NewBloomFilterPolicy(1)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.statistics = CreateDBStatistics(); + ImmutableOptions ioptions4(options); + MutableCFOptions moptions4(options); + ASSERT_OK(c3.Reopen(ioptions4, moptions4)); + reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader()); + ASSERT_FALSE(reader->TEST_FilterBlockInCache()); + PinnableSlice value; + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context, + moptions4.prefix_extractor.get())); + ASSERT_STREQ(value.data(), "hello"); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertFilterBlockStat(0, 0); + c3.ResetTableReader(); +} + +void ValidateBlockSizeDeviation(int value, int expected) { + BlockBasedTableOptions table_options; + table_options.block_size_deviation = value; + BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); + + const BlockBasedTableOptions* normalized_table_options = + factory->GetOptions<BlockBasedTableOptions>(); + ASSERT_EQ(normalized_table_options->block_size_deviation, expected); + + delete factory; +} + +void ValidateBlockRestartInterval(int value, int expected) { + BlockBasedTableOptions table_options; + table_options.block_restart_interval = value; + BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); + + const BlockBasedTableOptions* normalized_table_options = + factory->GetOptions<BlockBasedTableOptions>(); + ASSERT_EQ(normalized_table_options->block_restart_interval, expected); + + delete factory; +} + +TEST_P(BlockBasedTableTest, InvalidOptions) { + // invalid values for block_size_deviation (<0 or >100) are silently set to 0 + ValidateBlockSizeDeviation(-10, 0); + ValidateBlockSizeDeviation(-1, 0); + ValidateBlockSizeDeviation(0, 0); + ValidateBlockSizeDeviation(1, 1); + ValidateBlockSizeDeviation(99, 99); + ValidateBlockSizeDeviation(100, 100); + ValidateBlockSizeDeviation(101, 0); + ValidateBlockSizeDeviation(1000, 0); + + // invalid values for block_restart_interval (<1) are silently set to 1 + ValidateBlockRestartInterval(-10, 1); + ValidateBlockRestartInterval(-1, 1); + ValidateBlockRestartInterval(0, 1); + ValidateBlockRestartInterval(1, 1); + ValidateBlockRestartInterval(2, 2); + ValidateBlockRestartInterval(1000, 1000); +} + +TEST_P(BlockBasedTableTest, BlockReadCountTest) { + // bloom_filter_type = 1 -- full filter using use_block_based_builder=false + // bloom_filter_type = 2 -- full filter using use_block_based_builder=true + // because of API change to hide block-based filter + for (int bloom_filter_type = 1; bloom_filter_type <= 2; ++bloom_filter_type) { + for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2; + ++index_and_filter_in_cache) { + Options options; + options.create_if_missing = true; + + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_cache = NewLRUCache(1, 0); + table_options.cache_index_and_filter_blocks = index_and_filter_in_cache; + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, bloom_filter_type == 2)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + + TableConstructor c(BytewiseComparator()); + std::string user_key = "k04"; + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + c.Add(encoded_key, "hello"); + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + // Generate table with filter policy + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + auto reader = c.GetTableReader(); + PinnableSlice value; + { + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + get_perf_context()->Reset(); + ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, + moptions.prefix_extractor.get())); + if (index_and_filter_in_cache) { + // data, index and filter block + ASSERT_EQ(get_perf_context()->block_read_count, 3); + ASSERT_EQ(get_perf_context()->index_block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); + } else { + // just the data block + ASSERT_EQ(get_perf_context()->block_read_count, 1); + } + ASSERT_EQ(get_context.State(), GetContext::kFound); + ASSERT_STREQ(value.data(), "hello"); + } + + // Get non-existing key + user_key = "does-not-exist"; + internal_key = InternalKey(user_key, 0, kTypeValue); + encoded_key = internal_key.Encode().ToString(); + + value.Reset(); + { + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + get_perf_context()->Reset(); + ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, + moptions.prefix_extractor.get())); + ASSERT_EQ(get_context.State(), GetContext::kNotFound); + } + + if (index_and_filter_in_cache) { + if (bloom_filter_type == 0) { + // with block-based, we read index and then the filter + ASSERT_EQ(get_perf_context()->block_read_count, 2); + ASSERT_EQ(get_perf_context()->index_block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); + } else { + // with full-filter, we read filter first and then we stop + ASSERT_EQ(get_perf_context()->block_read_count, 1); + ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); + } + } else { + // filter is already in memory and it figures out that the key doesn't + // exist + ASSERT_EQ(get_perf_context()->block_read_count, 0); + } + } + } +} + +TEST_P(BlockBasedTableTest, BlockCacheLeak) { + // Check that when we reopen a table we don't lose access to blocks already + // in the cache. This test checks whether the Table actually makes use of the + // unique ID from the file. + + Options opt; + std::unique_ptr<InternalKeyComparator> ikc; + ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); + opt.compression = kNoCompression; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.block_size = 1024; + // big enough so we don't ever lose cached values. + table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); + opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(opt); + const MutableCFOptions moptions(opt); + c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); + + std::unique_ptr<InternalIterator> iter( + c.NewIterator(moptions.prefix_extractor.get())); + iter->SeekToFirst(); + while (iter->Valid()) { + iter->key(); + iter->value(); + iter->Next(); + } + ASSERT_OK(iter->status()); + iter.reset(); + + const ImmutableOptions ioptions1(opt); + const MutableCFOptions moptions1(opt); + ASSERT_OK(c.Reopen(ioptions1, moptions1)); + auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader()); + for (const std::string& key : keys) { + InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); + ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); + } + c.ResetTableReader(); + + // rerun with different block cache + table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); + opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); + const ImmutableOptions ioptions2(opt); + const MutableCFOptions moptions2(opt); + ASSERT_OK(c.Reopen(ioptions2, moptions2)); + table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader()); + for (const std::string& key : keys) { + InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); + ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); + } + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, MemoryAllocator) { + auto default_memory_allocator = std::make_shared<DefaultMemoryAllocator>(); + auto custom_memory_allocator = + std::make_shared<CountedMemoryAllocator>(default_memory_allocator); + { + Options opt; + std::unique_ptr<InternalKeyComparator> ikc; + ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); + opt.compression = kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 1024; + LRUCacheOptions lruOptions; + lruOptions.memory_allocator = custom_memory_allocator; + lruOptions.capacity = 16 * 1024 * 1024; + lruOptions.num_shard_bits = 4; + table_options.block_cache = NewLRUCache(std::move(lruOptions)); + opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator(), + true /* convert_to_internal_key_ */); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(opt); + const MutableCFOptions moptions(opt); + c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); + + std::unique_ptr<InternalIterator> iter( + c.NewIterator(moptions.prefix_extractor.get())); + iter->SeekToFirst(); + while (iter->Valid()) { + iter->key(); + iter->value(); + iter->Next(); + } + ASSERT_OK(iter->status()); + } + + // out of scope, block cache should have been deleted, all allocations + // deallocated + EXPECT_EQ(custom_memory_allocator->GetNumAllocations(), + custom_memory_allocator->GetNumDeallocations()); + // make sure that allocations actually happened through the cache allocator + EXPECT_GT(custom_memory_allocator->GetNumAllocations(), 0); +} + +// Test the file checksum of block based table +TEST_P(BlockBasedTableTest, NoFileChecksum) { + Options options; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + int level = 0; + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + + FileChecksumTestHelper f(true); + f.CreateWritableFile(); + std::unique_ptr<TableBuilder> builder; + builder.reset(ioptions.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, *comparator, + &int_tbl_prop_collector_factories, + options.compression, options.compression_opts, + kUnknownColumnFamily, column_family_name, level), + f.GetFileWriter())); + ASSERT_OK(f.ResetTableBuilder(std::move(builder))); + f.AddKVtoKVMap(1000); + ASSERT_OK(f.WriteKVAndFlushTable()); + ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); + ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum); +} + +TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { + FileChecksumGenCrc32cFactory* file_checksum_gen_factory = + new FileChecksumGenCrc32cFactory(); + Options options; + options.file_checksum_gen_factory.reset(file_checksum_gen_factory); + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + int level = 0; + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + + FileChecksumGenContext gen_context; + gen_context.file_name = "db/tmp"; + std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + FileChecksumTestHelper f(true); + f.CreateWritableFile(); + f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); + std::unique_ptr<TableBuilder> builder; + builder.reset(ioptions.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, *comparator, + &int_tbl_prop_collector_factories, + options.compression, options.compression_opts, + kUnknownColumnFamily, column_family_name, level), + f.GetFileWriter())); + ASSERT_OK(f.ResetTableBuilder(std::move(builder))); + f.AddKVtoKVMap(1000); + ASSERT_OK(f.WriteKVAndFlushTable()); + ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); + + std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + std::string checksum; + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); + ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); + + // Unit test the generator itself for schema stability + std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen3 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + const char data[] = "here is some data"; + checksum_crc32c_gen3->Update(data, sizeof(data)); + checksum_crc32c_gen3->Finalize(); + checksum = checksum_crc32c_gen3->GetChecksum(); + ASSERT_STREQ(checksum.c_str(), "\345\245\277\110"); +} + +// Plain table is not supported in ROCKSDB_LITE +#ifndef ROCKSDB_LITE +TEST_F(PlainTableTest, BasicPlainTableProperties) { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 8; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + + PlainTableFactory factory(plain_table_options); + std::unique_ptr<FSWritableFile> sink(new test::StringSink()); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(sink), "" /* don't care */, FileOptions())); + Options options; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + int unknown_level = -1; + std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, unknown_level), + file_writer.get())); + + for (char c = 'a'; c <= 'z'; ++c) { + std::string key(8, c); + key.append("\1 "); // PlainTable expects internal key structure + std::string value(28, c + 42); + builder->Add(key, value); + } + ASSERT_OK(builder->Finish()); + ASSERT_OK(file_writer->Flush()); + + test::StringSink* ss = + static_cast<test::StringSink*>(file_writer->writable_file()); + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(ss->contents(), 72242, true)); + std::unique_ptr<RandomAccessFileReader> file_reader( + new RandomAccessFileReader(std::move(source), "test")); + + std::unique_ptr<TableProperties> props; + auto s = ReadTableProperties(file_reader.get(), ss->contents().size(), + kPlainTableMagicNumber, ioptions, &props); + ASSERT_OK(s); + + ASSERT_EQ(0ul, props->index_size); + ASSERT_EQ(0ul, props->filter_size); + ASSERT_EQ(16ul * 26, props->raw_key_size); + ASSERT_EQ(28ul * 26, props->raw_value_size); + ASSERT_EQ(26ul, props->num_entries); + ASSERT_EQ(1ul, props->num_data_blocks); +} + +TEST_F(PlainTableTest, NoFileChecksum) { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 20; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + PlainTableFactory factory(plain_table_options); + + Options options; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + int unknown_level = -1; + FileChecksumTestHelper f(true); + f.CreateWritableFile(); + + std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, unknown_level), + f.GetFileWriter())); + ASSERT_OK(f.ResetTableBuilder(std::move(builder))); + f.AddKVtoKVMap(1000); + ASSERT_OK(f.WriteKVAndFlushTable()); + ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); + EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum); +} + +TEST_F(PlainTableTest, Crc32cFileChecksum) { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 20; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + PlainTableFactory factory(plain_table_options); + + FileChecksumGenCrc32cFactory* file_checksum_gen_factory = + new FileChecksumGenCrc32cFactory(); + Options options; + options.file_checksum_gen_factory.reset(file_checksum_gen_factory); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + int unknown_level = -1; + + FileChecksumGenContext gen_context; + gen_context.file_name = "db/tmp"; + std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + FileChecksumTestHelper f(true); + f.CreateWritableFile(); + f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); + + std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, unknown_level), + f.GetFileWriter())); + ASSERT_OK(f.ResetTableBuilder(std::move(builder))); + f.AddKVtoKVMap(1000); + ASSERT_OK(f.WriteKVAndFlushTable()); + ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); + + std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + std::string checksum; + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); + EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); +} + +#endif // !ROCKSDB_LITE + +TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + options.db_host_id = ""; + test::PlainInternalKeyComparator internal_comparator(options.comparator); + options.compression = kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 1024; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); + // k04 and k05 will be in two consecutive blocks, the index is + // an arbitrary slice between k04 and k05, either before or after k04a + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); + c.ResetTableReader(); +} + +static void DoCompressionTest(CompressionType comp) { + Random rnd(301); + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + std::string tmp; + c.Add("k01", "hello"); + c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); + c.Add("k03", "hello3"); + c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + test::PlainInternalKeyComparator ikc(options.comparator); + options.compression = comp; + BlockBasedTableOptions table_options; + table_options.block_size = 1024; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap); + + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3525)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3525)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7075)); + c.ResetTableReader(); +} + +TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) { + std::vector<CompressionType> compression_state; + if (!Snappy_Supported()) { + fprintf(stderr, "skipping snappy compression tests\n"); + } else { + compression_state.push_back(kSnappyCompression); + } + + if (!Zlib_Supported()) { + fprintf(stderr, "skipping zlib compression tests\n"); + } else { + compression_state.push_back(kZlibCompression); + } + + // TODO(kailiu) DoCompressionTest() doesn't work with BZip2. + /* + if (!BZip2_Supported()) { + fprintf(stderr, "skipping bzip2 compression tests\n"); + } else { + compression_state.push_back(kBZip2Compression); + } + */ + + if (!LZ4_Supported()) { + fprintf(stderr, "skipping lz4 and lz4hc compression tests\n"); + } else { + compression_state.push_back(kLZ4Compression); + compression_state.push_back(kLZ4HCCompression); + } + + if (!XPRESS_Supported()) { + fprintf(stderr, "skipping xpress and xpress compression tests\n"); + } else { + compression_state.push_back(kXpressCompression); + } + + for (auto state : compression_state) { + DoCompressionTest(state); + } +} + +TEST_F(GeneralTableTest, ApproximateKeyAnchors) { + Random rnd(301); + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + std::string tmp; + for (int i = 1000; i < 9000; i++) { + c.Add(std::to_string(i), rnd.RandomString(2000)); + } + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + InternalKeyComparator ikc(options.comparator); + options.compression = kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 4096; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap); + + std::vector<TableReader::Anchor> anchors; + ASSERT_OK(c.GetTableReader()->ApproximateKeyAnchors(ReadOptions(), anchors)); + // The target is 128 anchors. But in reality it can be slightly more or fewer. + ASSERT_GT(anchors.size(), 120); + ASSERT_LT(anchors.size(), 140); + + // We have around 8000 keys. With 128 anchors, in average 62.5 keys per + // anchor. Here we take a rough range and estimate the distance between + // anchors is between 50 and 100. + // Total data size is about 18,000,000, so each anchor range is about + // 140,625. We also take a rough range. + int prev_num = 1000; + // Non-last anchor + for (size_t i = 0; i + 1 < anchors.size(); i++) { + auto& anchor = anchors[i]; + ASSERT_GT(anchor.range_size, 100000); + ASSERT_LT(anchor.range_size, 200000); + + // Key might be shortened, so fill 0 in the end if it is the case. + std::string key_cpy = anchor.user_key; + key_cpy.append(4 - key_cpy.size(), '0'); + int num = std::stoi(key_cpy); + ASSERT_GT(num - prev_num, 50); + ASSERT_LT(num - prev_num, 100); + prev_num = num; + } + + ASSERT_EQ("8999", anchors.back().user_key); + ASSERT_LT(anchors.back().range_size, 200000); + + c.ResetTableReader(); +} + +#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) +TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) { + Random rnd(test::RandomSeed() + 5); + for (int num_entries = 0; num_entries < 2000; + num_entries += (num_entries < 50 ? 1 : 200)) { + for (int e = 0; e < num_entries; e++) { + Add(test::RandomKey(&rnd, rnd.Skewed(4)), + rnd.RandomString(rnd.Skewed(5))); + } + Test(&rnd); + } +} + +#ifndef ROCKSDB_LITE +TEST_F(DBHarnessTest, RandomizedLongDB) { + Random rnd(test::RandomSeed()); + int num_entries = 100000; + for (int e = 0; e < num_entries; e++) { + std::string v; + Add(test::RandomKey(&rnd, rnd.Skewed(4)), rnd.RandomString(rnd.Skewed(5))); + } + Test(&rnd); + + // We must have created enough data to force merging + int files = 0; + for (int level = 0; level < db()->NumberLevels(); level++) { + std::string value; + char name[100]; + snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level); + ASSERT_TRUE(db()->GetProperty(name, &value)); + files += atoi(value.c_str()); + } + ASSERT_GT(files, 0); +} +#endif // ROCKSDB_LITE +#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) + +class MemTableTest : public testing::Test { + public: + MemTableTest() { + InternalKeyComparator cmp(BytewiseComparator()); + auto table_factory = std::make_shared<SkipListFactory>(); + options_.memtable_factory = table_factory; + ImmutableOptions ioptions(options_); + wb_ = new WriteBufferManager(options_.db_write_buffer_size); + memtable_ = new MemTable(cmp, ioptions, MutableCFOptions(options_), wb_, + kMaxSequenceNumber, 0 /* column_family_id */); + memtable_->Ref(); + } + + ~MemTableTest() { + delete memtable_->Unref(); + delete wb_; + } + + MemTable* GetMemTable() { return memtable_; } + + private: + MemTable* memtable_; + Options options_; + WriteBufferManager* wb_; +}; + +TEST_F(MemTableTest, Simple) { + WriteBatch batch; + WriteBatchInternal::SetSequence(&batch, 100); + ASSERT_OK(batch.Put(std::string("k1"), std::string("v1"))); + ASSERT_OK(batch.Put(std::string("k2"), std::string("v2"))); + ASSERT_OK(batch.Put(std::string("k3"), std::string("v3"))); + ASSERT_OK(batch.Put(std::string("largekey"), std::string("vlarge"))); + ASSERT_OK(batch.DeleteRange(std::string("chi"), std::string("xigua"))); + ASSERT_OK(batch.DeleteRange(std::string("begin"), std::string("end"))); + ColumnFamilyMemTablesDefault cf_mems_default(GetMemTable()); + ASSERT_TRUE( + WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr) + .ok()); + + for (int i = 0; i < 2; ++i) { + Arena arena; + ScopedArenaIterator arena_iter_guard; + std::unique_ptr<InternalIterator> iter_guard; + InternalIterator* iter; + if (i == 0) { + iter = GetMemTable()->NewIterator(ReadOptions(), &arena); + arena_iter_guard.set(iter); + } else { + iter = GetMemTable()->NewRangeTombstoneIterator( + ReadOptions(), kMaxSequenceNumber /* read_seq */, + false /* immutable_memtable */); + iter_guard.reset(iter); + } + if (iter == nullptr) { + continue; + } + iter->SeekToFirst(); + while (iter->Valid()) { + fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), + iter->value().ToString().c_str()); + iter->Next(); + } + } +} + +// Test the empty key +TEST_P(ParameterizedHarnessTest, SimpleEmptyKey) { + Random rnd(test::RandomSeed() + 1); + Add("", "v"); + Test(&rnd); +} + +TEST_P(ParameterizedHarnessTest, SimpleSingle) { + Random rnd(test::RandomSeed() + 2); + Add("abc", "v"); + Test(&rnd); +} + +TEST_P(ParameterizedHarnessTest, SimpleMulti) { + Random rnd(test::RandomSeed() + 3); + Add("abc", "v"); + Add("abcd", "v"); + Add("ac", "v2"); + Test(&rnd); +} + +TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) { + Random rnd(test::RandomSeed() + 4); + Add("\xff\xff", "v3"); + Test(&rnd); +} + +TEST(TableTest, FooterTests) { + Random* r = Random::GetTLSInstance(); + uint64_t data_size = (uint64_t{1} << r->Uniform(40)) + r->Uniform(100); + uint64_t index_size = r->Uniform(1000000000); + uint64_t metaindex_size = r->Uniform(1000000); + // 5 == block trailer size + BlockHandle index(data_size + 5, index_size); + BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size); + uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5; + { + // legacy block based + FooterBuilder footer; + footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0, + footer_offset, kCRC32c, meta_index, index); + Footer decoded_footer; + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); + ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); + ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.format_version(), 0U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); + // Ensure serialized with legacy magic + ASSERT_EQ( + DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), + kLegacyBlockBasedTableMagicNumber); + } + // block based, various checksums, various versions + for (auto t : GetSupportedChecksums()) { + for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) { + FooterBuilder footer; + footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t, + meta_index, index); + Footer decoded_footer; + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); + ASSERT_EQ(decoded_footer.table_magic_number(), + kBlockBasedTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum_type(), t); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), + meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); + ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.format_version(), fv); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); + } + } +// Plain table is not supported in ROCKSDB_LITE +#ifndef ROCKSDB_LITE + { + // legacy plain table + FooterBuilder footer; + footer.Build(kPlainTableMagicNumber, /* format_version */ 0, footer_offset, + kNoChecksum, meta_index); + Footer decoded_footer; + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); + ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); + ASSERT_EQ(decoded_footer.index_handle().size(), 0U); + ASSERT_EQ(decoded_footer.format_version(), 0U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); + // Ensure serialized with legacy magic + ASSERT_EQ( + DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), + kLegacyPlainTableMagicNumber); + } + { + // xxhash plain table (not currently used) + FooterBuilder footer; + footer.Build(kPlainTableMagicNumber, /* format_version */ 1, footer_offset, + kxxHash, meta_index); + Footer decoded_footer; + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); + ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum_type(), kxxHash); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); + ASSERT_EQ(decoded_footer.index_handle().size(), 0U); + ASSERT_EQ(decoded_footer.format_version(), 1U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); + } +#endif // !ROCKSDB_LITE +} + +class IndexBlockRestartIntervalTest + : public TableTest, + public ::testing::WithParamInterface<std::pair<int, bool>> { + public: + static std::vector<std::pair<int, bool>> GetRestartValues() { + return {{-1, false}, {0, false}, {1, false}, {8, false}, + {16, false}, {32, false}, {-1, true}, {0, true}, + {1, true}, {8, true}, {16, true}, {32, true}}; + } +}; + +INSTANTIATE_TEST_CASE_P( + IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest, + ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues())); + +TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { + const int kKeysInTable = 10000; + const int kKeySize = 100; + const int kValSize = 500; + + const int index_block_restart_interval = std::get<0>(GetParam()); + const bool value_delta_encoding = std::get<1>(GetParam()); + + Options options; + BlockBasedTableOptions table_options; + table_options.block_size = 64; // small block size to get big index block + table_options.index_block_restart_interval = index_block_restart_interval; + if (value_delta_encoding) { + table_options.format_version = 4; + } else { + table_options.format_version = 3; + } + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator()); + static Random rnd(301); + for (int i = 0; i < kKeysInTable; i++) { + InternalKey k(rnd.RandomString(kKeySize), 0, kTypeValue); + c.Add(k.Encode().ToString(), rnd.RandomString(kValSize)); + } + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + std::unique_ptr<InternalKeyComparator> comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, + &kvmap); + auto reader = c.GetTableReader(); + + ReadOptions read_options; + std::unique_ptr<InternalIterator> db_iter(reader->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // Test point lookup + for (auto& kv : kvmap) { + db_iter->Seek(kv.first); + + ASSERT_TRUE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); + ASSERT_EQ(db_iter->key(), kv.first); + ASSERT_EQ(db_iter->value(), kv.second); + } + + // Test iterating + auto kv_iter = kvmap.begin(); + for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { + ASSERT_EQ(db_iter->key(), kv_iter->first); + ASSERT_EQ(db_iter->value(), kv_iter->second); + kv_iter++; + } + ASSERT_EQ(kv_iter, kvmap.end()); + c.ResetTableReader(); +} + +class PrefixTest : public testing::Test { + public: + PrefixTest() : testing::Test() {} + ~PrefixTest() override {} +}; + +namespace { +// A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest +class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform { + public: + ~TestPrefixExtractor() override{}; + const char* Name() const override { return "TestPrefixExtractor"; } + + ROCKSDB_NAMESPACE::Slice Transform( + const ROCKSDB_NAMESPACE::Slice& src) const override { + assert(IsValid(src)); + return ROCKSDB_NAMESPACE::Slice(src.data(), 3); + } + + bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override { + return IsValid(src); + } + + bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override { + return true; + } + + bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const { + if (src.size() != 4) { + return false; + } + if (src[0] != '[') { + return false; + } + if (src[1] < '0' || src[1] > '9') { + return false; + } + if (src[2] != ']') { + return false; + } + if (src[3] < '0' || src[3] > '9') { + return false; + } + return true; + } +}; +} // namespace + +TEST_F(PrefixTest, PrefixAndWholeKeyTest) { + ROCKSDB_NAMESPACE::Options options; + options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal; + options.num_levels = 20; + options.create_if_missing = true; + options.optimize_filters_for_hits = false; + options.target_file_size_base = 268435456; + options.prefix_extractor = std::make_shared<TestPrefixExtractor>(); + ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto; + bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); + bbto.block_size = 262144; + bbto.whole_key_filtering = true; + + const std::string kDBPath = test::PerThreadDBPath("table_prefix_test"); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + ASSERT_OK(DestroyDB(kDBPath, options)); + ROCKSDB_NAMESPACE::DB* db; + ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); + + // Create a bunch of keys with 10 filters. + for (int i = 0; i < 10; i++) { + std::string prefix = "[" + std::to_string(i) + "]"; + for (int j = 0; j < 10; j++) { + std::string key = prefix + std::to_string(j); + ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1")); + } + } + + // Trigger compaction. + ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + delete db; + // In the second round, turn whole_key_filtering off and expect + // rocksdb still works. +} + +/* + * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in + * the SST file any more. Instead, RocksDB deduces global_seqno from the + * MANIFEST while reading from an SST. Therefore, it's not possible to test the + * functionality of global_seqno in a single, isolated unit test without the + * involvement of Version, VersionSet, etc. + */ +TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { + BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "" /* don't care */, FileOptions())); + Options options; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + int_tbl_prop_collector_factories.emplace_back( + new SstFileWriterPropertiesCollectorFactory(2 /* version */, + 0 /* global_seqno*/)); + std::string column_family_name; + std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, -1), + file_writer.get())); + + for (char c = 'a'; c <= 'z'; ++c) { + std::string key(8, c); + std::string value = key; + InternalKey ik(key, 0, kTypeValue); + + builder->Add(ik.Encode(), value); + } + ASSERT_OK(builder->Finish()); + ASSERT_OK(file_writer->Flush()); + + test::RandomRWStringSink ss_rw(sink); + uint32_t version; + uint64_t global_seqno; + uint64_t global_seqno_offset; + + // Helper function to get version, global_seqno, global_seqno_offset + std::function<void()> GetVersionAndGlobalSeqno = [&]() { + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(ss_rw.contents(), 73342, true)); + std::unique_ptr<RandomAccessFileReader> file_reader( + new RandomAccessFileReader(std::move(source), "")); + + std::unique_ptr<TableProperties> props; + ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(), + kBlockBasedTableMagicNumber, ioptions, + &props)); + + UserCollectedProperties user_props = props->user_collected_properties; + version = DecodeFixed32( + user_props[ExternalSstFilePropertyNames::kVersion].c_str()); + global_seqno = DecodeFixed64( + user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str()); + global_seqno_offset = props->external_sst_file_global_seqno_offset; + }; + + // Helper function to update the value of the global seqno in the file + std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) { + std::string new_global_seqno; + PutFixed64(&new_global_seqno, val); + + ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno, IOOptions(), + nullptr)); + }; + + // Helper function to get the contents of the table InternalIterator + std::unique_ptr<TableReader> table_reader; + const ReadOptions read_options; + std::function<InternalIterator*()> GetTableInternalIter = [&]() { + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(ss_rw.contents(), 73342, true)); + std::unique_ptr<RandomAccessFileReader> file_reader( + new RandomAccessFileReader(std::move(source), "")); + + options.table_factory->NewTableReader( + TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(), + ikc), + std::move(file_reader), ss_rw.contents().size(), &table_reader); + + return table_reader->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized); + }; + + GetVersionAndGlobalSeqno(); + ASSERT_EQ(2u, version); + ASSERT_EQ(0u, global_seqno); + + InternalIterator* iter = GetTableInternalIter(); + char current_c = 'a'; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedInternalKey pik; + ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); + + ASSERT_EQ(pik.type, ValueType::kTypeValue); + ASSERT_EQ(pik.sequence, 0); + ASSERT_EQ(pik.user_key, iter->value()); + ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); + current_c++; + } + ASSERT_EQ(current_c, 'z' + 1); + delete iter; + + // Update global sequence number to 10 + SetGlobalSeqno(10); + GetVersionAndGlobalSeqno(); + ASSERT_EQ(2u, version); + ASSERT_EQ(10u, global_seqno); + + iter = GetTableInternalIter(); + current_c = 'a'; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedInternalKey pik; + ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); + + ASSERT_EQ(pik.type, ValueType::kTypeValue); + ASSERT_EQ(pik.sequence, 10); + ASSERT_EQ(pik.user_key, iter->value()); + ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); + current_c++; + } + ASSERT_EQ(current_c, 'z' + 1); + + // Verify Seek + for (char c = 'a'; c <= 'z'; c++) { + std::string k = std::string(8, c); + InternalKey ik(k, 10, kValueTypeForSeek); + iter->Seek(ik.Encode()); + ASSERT_TRUE(iter->Valid()); + + ParsedInternalKey pik; + ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); + + ASSERT_EQ(pik.type, ValueType::kTypeValue); + ASSERT_EQ(pik.sequence, 10); + ASSERT_EQ(pik.user_key.ToString(), k); + ASSERT_EQ(iter->value().ToString(), k); + } + delete iter; + + // Update global sequence number to 3 + SetGlobalSeqno(3); + GetVersionAndGlobalSeqno(); + ASSERT_EQ(2u, version); + ASSERT_EQ(3u, global_seqno); + + iter = GetTableInternalIter(); + current_c = 'a'; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedInternalKey pik; + ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); + + ASSERT_EQ(pik.type, ValueType::kTypeValue); + ASSERT_EQ(pik.sequence, 3); + ASSERT_EQ(pik.user_key, iter->value()); + ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); + current_c++; + } + ASSERT_EQ(current_c, 'z' + 1); + + // Verify Seek + for (char c = 'a'; c <= 'z'; c++) { + std::string k = std::string(8, c); + // seqno=4 is less than 3 so we still should get our key + InternalKey ik(k, 4, kValueTypeForSeek); + iter->Seek(ik.Encode()); + ASSERT_TRUE(iter->Valid()); + + ParsedInternalKey pik; + ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); + + ASSERT_EQ(pik.type, ValueType::kTypeValue); + ASSERT_EQ(pik.sequence, 3); + ASSERT_EQ(pik.user_key.ToString(), k); + ASSERT_EQ(iter->value().ToString(), k); + } + + delete iter; +} + +TEST_P(BlockBasedTableTest, BlockAlignTest) { + BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); + bbto.block_align = true; + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "" /* don't care */, FileOptions())); + Options options; + options.compression = kNoCompression; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, -1), + file_writer.get())); + + for (int i = 1; i <= 10000; ++i) { + std::ostringstream ostr; + ostr << std::setfill('0') << std::setw(5) << i; + std::string key = ostr.str(); + std::string value = "val"; + InternalKey ik(key, 0, kTypeValue); + + builder->Add(ik.Encode(), value); + } + ASSERT_OK(builder->Finish()); + ASSERT_OK(file_writer->Flush()); + + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(sink->contents(), 73342, false)); + std::unique_ptr<RandomAccessFileReader> file_reader( + new RandomAccessFileReader(std::move(source), "test")); + // Helper function to get version, global_seqno, global_seqno_offset + std::function<void()> VerifyBlockAlignment = [&]() { + std::unique_ptr<TableProperties> props; + ASSERT_OK(ReadTableProperties(file_reader.get(), sink->contents().size(), + kBlockBasedTableMagicNumber, ioptions, + &props)); + + uint64_t data_block_size = props->data_size / props->num_data_blocks; + ASSERT_EQ(data_block_size, 4096); + ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks); + }; + + VerifyBlockAlignment(); + + // The below block of code verifies that we can read back the keys. Set + // block_align to false when creating the reader to ensure we can flip between + // the two modes without any issues + std::unique_ptr<TableReader> table_reader; + bbto.block_align = false; + Options options2; + options2.table_factory.reset(NewBlockBasedTableFactory(bbto)); + ImmutableOptions ioptions2(options2); + const MutableCFOptions moptions2(options2); + + ASSERT_OK(ioptions.table_factory->NewTableReader( + TableReaderOptions(ioptions2, moptions2.prefix_extractor, EnvOptions(), + GetPlainInternalComparator(options2.comparator)), + std::move(file_reader), sink->contents().size(), &table_reader)); + + ReadOptions read_options; + std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator( + read_options, moptions2.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + int expected_key = 1; + for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { + std::ostringstream ostr; + ostr << std::setfill('0') << std::setw(5) << expected_key++; + std::string key = ostr.str(); + std::string value = "val"; + + ASSERT_OK(db_iter->status()); + ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key); + ASSERT_EQ(db_iter->value().ToString(), value); + } + expected_key--; + ASSERT_EQ(expected_key, 10000); + table_reader.reset(); +} + +TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { + BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); + bbto.block_align = true; + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "" /* don't care */, FileOptions())); + + Options options; + options.compression = kNoCompression; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + std::string column_family_name; + + std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kNoCompression, + CompressionOptions(), kUnknownColumnFamily, + column_family_name, -1), + file_writer.get())); + + for (int i = 1; i <= 10000; ++i) { + std::ostringstream ostr; + ostr << std::setfill('0') << std::setw(5) << i; + std::string key = ostr.str(); + std::string value = "val"; + InternalKey ik(key, 0, kTypeValue); + + builder->Add(ik.Encode(), value); + } + ASSERT_OK(builder->Finish()); + ASSERT_OK(file_writer->Flush()); + + std::unique_ptr<FSRandomAccessFile> source( + new test::StringSource(sink->contents(), 73342, true)); + std::unique_ptr<RandomAccessFileReader> file_reader( + new RandomAccessFileReader(std::move(source), "test")); + + { + RandomAccessFileReader* file = file_reader.get(); + uint64_t file_size = sink->contents().size(); + + Footer footer; + IOOptions opts; + ASSERT_OK(ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */, + file_size, &footer, + kBlockBasedTableMagicNumber)); + + auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type, + BlockContents* contents) { + ReadOptions read_options; + read_options.verify_checksums = false; + PersistentCacheOptions cache_options; + + BlockFetcher block_fetcher( + file, nullptr /* prefetch_buffer */, footer, read_options, handle, + contents, ioptions, false /* decompress */, + false /*maybe_compressed*/, block_type, + UncompressionDict::GetEmptyDict(), cache_options); + + ASSERT_OK(block_fetcher.ReadBlockContents()); + }; + + // -- Read metaindex block + auto metaindex_handle = footer.metaindex_handle(); + BlockContents metaindex_contents; + + BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex, + &metaindex_contents); + Block metaindex_block(std::move(metaindex_contents)); + + std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator( + BytewiseComparator(), kDisableGlobalSequenceNumber)); + + // -- Read properties block + BlockHandle properties_handle; + ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlockName, + &properties_handle)); + ASSERT_FALSE(properties_handle.IsNull()); + BlockContents properties_contents; + BlockFetchHelper(properties_handle, BlockType::kProperties, + &properties_contents); + Block properties_block(std::move(properties_contents)); + + ASSERT_EQ(properties_block.NumRestarts(), 1u); + } +} + +TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { + // The properties meta-block should come at the end since we always need to + // read it when opening a file, unlike index/filter/other meta-blocks, which + // are sometimes read depending on the user's configuration. This ordering + // allows us to do a small readahead on the end of the file to read properties + // and meta-index blocks with one I/O. + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("a1", "val1"); + c.Add("b2", "val2"); + c.Add("c3", "val3"); + c.Add("d4", "val4"); + c.Add("e5", "val5"); + c.Add("f6", "val6"); + c.Add("g7", "val7"); + c.Add("h8", "val8"); + c.Add("j9", "val9"); + + // write an SST file + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.filter_policy.reset(NewBloomFilterPolicy( + 8 /* bits_per_key */, false /* use_block_based_filter */)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + // get file reader + test::StringSink* table_sink = c.TEST_GetSink(); + std::unique_ptr<FSRandomAccessFile> source(new test::StringSource( + table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */)); + + std::unique_ptr<RandomAccessFileReader> table_reader( + new RandomAccessFileReader(std::move(source), "test")); + size_t table_size = table_sink->contents().size(); + + // read footer + Footer footer; + IOOptions opts; + ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(), + nullptr /* prefetch_buffer */, table_size, + &footer, kBlockBasedTableMagicNumber)); + + // read metaindex + auto metaindex_handle = footer.metaindex_handle(); + BlockContents metaindex_contents; + PersistentCacheOptions pcache_opts; + BlockFetcher block_fetcher( + table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), + metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, + false /*maybe_compressed*/, BlockType::kMetaIndex, + UncompressionDict::GetEmptyDict(), pcache_opts, + nullptr /*memory_allocator*/); + ASSERT_OK(block_fetcher.ReadBlockContents()); + Block metaindex_block(std::move(metaindex_contents)); + + // verify properties block comes last + std::unique_ptr<InternalIterator> metaindex_iter{ + metaindex_block.NewMetaIterator()}; + uint64_t max_offset = 0; + std::string key_at_max_offset; + for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); + metaindex_iter->Next()) { + BlockHandle handle; + Slice value = metaindex_iter->value(); + ASSERT_OK(handle.DecodeFrom(&value)); + if (handle.offset() > max_offset) { + max_offset = handle.offset(); + key_at_max_offset = metaindex_iter->key().ToString(); + } + } + ASSERT_EQ(kPropertiesBlockName, key_at_max_offset); + // index handle is stored in footer rather than metaindex block, so need + // separate logic to verify it comes before properties block. + ASSERT_GT(max_offset, footer.index_handle().offset()); + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, SeekMetaBlocks) { + TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); + c.Add("foo_a1", "val1"); + c.Add("foo_b2", "val2"); + c.Add("foo_c3", "val3"); + c.Add("foo_d4", "val4"); + c.Add("foo_e5", "val5"); + c.Add("foo_f6", "val6"); + c.Add("foo_g7", "val7"); + c.Add("foo_h8", "val8"); + c.Add("foo_j9", "val9"); + + // write an SST file + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.index_type = BlockBasedTableOptions::kHashSearch; + table_options.filter_policy.reset(NewBloomFilterPolicy( + 8 /* bits_per_key */, false /* use_block_based_filter */)); + options.prefix_extractor.reset(NewFixedPrefixTransform(4)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + c.Finish(options, ioptions, moptions, table_options, + GetPlainInternalComparator(options.comparator), &keys, &kvmap); + + // get file reader + test::StringSink* table_sink = c.TEST_GetSink(); + std::unique_ptr<FSRandomAccessFile> source(new test::StringSource( + table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */)); + + std::unique_ptr<RandomAccessFileReader> table_reader( + new RandomAccessFileReader(std::move(source), "test")); + size_t table_size = table_sink->contents().size(); + + // read footer + Footer footer; + IOOptions opts; + ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(), + nullptr /* prefetch_buffer */, table_size, + &footer, kBlockBasedTableMagicNumber)); + + // read metaindex + auto metaindex_handle = footer.metaindex_handle(); + BlockContents metaindex_contents; + PersistentCacheOptions pcache_opts; + BlockFetcher block_fetcher( + table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), + metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, + false /*maybe_compressed*/, BlockType::kMetaIndex, + UncompressionDict::GetEmptyDict(), pcache_opts, + nullptr /*memory_allocator*/); + ASSERT_OK(block_fetcher.ReadBlockContents()); + Block metaindex_block(std::move(metaindex_contents)); + + // verify properties block comes last + std::unique_ptr<MetaBlockIter> metaindex_iter( + metaindex_block.NewMetaIterator()); + bool has_hash_prefixes = false; + bool has_hash_metadata = false; + for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); + metaindex_iter->Next()) { + if (metaindex_iter->key().ToString() == kHashIndexPrefixesBlock) { + has_hash_prefixes = true; + } else if (metaindex_iter->key().ToString() == + kHashIndexPrefixesMetadataBlock) { + has_hash_metadata = true; + } + } + if (has_hash_metadata) { + metaindex_iter->Seek(kHashIndexPrefixesMetadataBlock); + ASSERT_TRUE(metaindex_iter->Valid()); + ASSERT_EQ(kHashIndexPrefixesMetadataBlock, + metaindex_iter->key().ToString()); + } + if (has_hash_prefixes) { + metaindex_iter->Seek(kHashIndexPrefixesBlock); + ASSERT_TRUE(metaindex_iter->Valid()); + ASSERT_EQ(kHashIndexPrefixesBlock, metaindex_iter->key().ToString()); + } + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, BadOptions) { + ROCKSDB_NAMESPACE::Options options; + options.compression = kNoCompression; + BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); + bbto.block_size = 4000; + bbto.block_align = true; + + const std::string kDBPath = + test::PerThreadDBPath("block_based_table_bad_options_test"); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + ASSERT_OK(DestroyDB(kDBPath, options)); + ROCKSDB_NAMESPACE::DB* db; + ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); + + bbto.block_size = 4096; + options.compression = kSnappyCompression; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); +} + +TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) { + TailPrefetchStats tpstats; + ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize()); + tpstats.RecordEffectiveSize(size_t{1000}); + tpstats.RecordEffectiveSize(size_t{1005}); + tpstats.RecordEffectiveSize(size_t{1002}); + ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize()); + + // One single super large value shouldn't influence much + tpstats.RecordEffectiveSize(size_t{1002000}); + tpstats.RecordEffectiveSize(size_t{999}); + ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize()); + ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize()); + + // Only history of 32 is kept + for (int i = 0; i < 32; i++) { + tpstats.RecordEffectiveSize(size_t{100}); + } + ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize()); + + // 16 large values and 16 small values. The result should be closer + // to the small value as the algorithm. + for (int i = 0; i < 16; i++) { + tpstats.RecordEffectiveSize(size_t{1000}); + } + tpstats.RecordEffectiveSize(size_t{10}); + tpstats.RecordEffectiveSize(size_t{20}); + for (int i = 0; i < 6; i++) { + tpstats.RecordEffectiveSize(size_t{100}); + } + ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize()); + ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize()); +} + +TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) { + TailPrefetchStats tpstats; + FilePrefetchBuffer buffer(0 /* readahead_size */, 0 /* max_readahead_size */, + false /* enable */, true /* track_min_offset */); + IOOptions opts; + buffer.TryReadFromCache(opts, nullptr /* reader */, 500 /* offset */, + 10 /* n */, nullptr /* result */, + nullptr /* status */, + Env::IO_TOTAL /* rate_limiter_priority */); + buffer.TryReadFromCache(opts, nullptr /* reader */, 480 /* offset */, + 10 /* n */, nullptr /* result */, + nullptr /* status */, + Env::IO_TOTAL /* rate_limiter_priority */); + buffer.TryReadFromCache(opts, nullptr /* reader */, 490 /* offset */, + 10 /* n */, nullptr /* result */, + nullptr /* status */, + Env::IO_TOTAL /* rate_limiter_priority */); + ASSERT_EQ(480, buffer.min_offset_read()); +} + +TEST_P(BlockBasedTableTest, DataBlockHashIndex) { + const int kNumKeys = 500; + const int kKeySize = 8; + const int kValSize = 40; + + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + table_options.data_block_index_type = + BlockBasedTableOptions::kDataBlockBinaryAndHash; + + Options options; + options.comparator = BytewiseComparator(); + + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(options.comparator); + + static Random rnd(1048); + for (int i = 0; i < kNumKeys; i++) { + // padding one "0" to mark existent keys. + std::string random_key(rnd.RandomString(kKeySize - 1) + "1"); + InternalKey k(random_key, 0, kTypeValue); + c.Add(k.Encode().ToString(), rnd.RandomString(kValSize)); + } + + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + auto reader = c.GetTableReader(); + + std::unique_ptr<InternalIterator> seek_iter; + ReadOptions read_options; + seek_iter.reset(reader->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + for (int i = 0; i < 2; ++i) { + ReadOptions ro; + // for every kv, we seek using two method: Get() and Seek() + // Get() will use the SuffixIndexHash in Block. For non-existent key it + // will invalidate the iterator + // Seek() will use the default BinarySeek() in Block. So for non-existent + // key it will land at the closest key that is large than target. + + // Search for existent keys + for (auto& kv : kvmap) { + if (i == 0) { + // Search using Seek() + seek_iter->Seek(kv.first); + ASSERT_OK(seek_iter->status()); + ASSERT_TRUE(seek_iter->Valid()); + ASSERT_EQ(seek_iter->key(), kv.first); + ASSERT_EQ(seek_iter->value(), kv.second); + } else { + // Search using Get() + PinnableSlice value; + std::string user_key = ExtractUserKey(kv.first).ToString(); + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + ASSERT_OK(reader->Get(ro, kv.first, &get_context, + moptions.prefix_extractor.get())); + ASSERT_EQ(get_context.State(), GetContext::kFound); + ASSERT_EQ(value, Slice(kv.second)); + value.Reset(); + } + } + + // Search for non-existent keys + for (auto& kv : kvmap) { + std::string user_key = ExtractUserKey(kv.first).ToString(); + user_key.back() = '0'; // make it non-existent key + InternalKey internal_key(user_key, 0, kTypeValue); + std::string encoded_key = internal_key.Encode().ToString(); + if (i == 0) { // Search using Seek() + seek_iter->Seek(encoded_key); + ASSERT_OK(seek_iter->status()); + if (seek_iter->Valid()) { + ASSERT_TRUE(BytewiseComparator()->Compare( + user_key, ExtractUserKey(seek_iter->key())) < 0); + } + } else { // Search using Get() + PinnableSlice value; + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, user_key, &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + ASSERT_OK(reader->Get(ro, encoded_key, &get_context, + moptions.prefix_extractor.get())); + ASSERT_EQ(get_context.State(), GetContext::kNotFound); + value.Reset(); + } + } + } +} + +// BlockBasedTableIterator should invalidate itself and return +// OutOfBound()=true immediately after Seek(), to allow LevelIterator +// filter out corresponding level. +TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) { + TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); + c.Add("foo", "v1"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); + options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_opt, + GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); + auto* reader = c.GetTableReader(); + ReadOptions read_opt; + std::string upper_bound = "bar"; + Slice upper_bound_slice(upper_bound); + read_opt.iterate_upper_bound = &upper_bound_slice; + std::unique_ptr<InternalIterator> iter; + iter.reset(new KeyConvertingIterator(reader->NewIterator( + read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized))); + iter->SeekToFirst(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); + iter.reset(new KeyConvertingIterator(reader->NewIterator( + read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized))); + iter->Seek("foo"); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); +} + +// BlockBasedTableIterator should invalidate itself and return +// OutOfBound()=true after Next(), if it finds current index key is no smaller +// than upper bound, unless it is pointing to the last data block. +TEST_P(BlockBasedTableTest, OutOfBoundOnNext) { + TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); + c.Add("bar", "v"); + c.Add("foo", "v"); + std::vector<std::string> keys; + stl_wrappers::KVMap kvmap; + Options options; + BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); + table_opt.flush_block_policy_factory = + std::make_shared<FlushBlockEveryKeyPolicyFactory>(); + options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + c.Finish(options, ioptions, moptions, table_opt, + GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); + auto* reader = c.GetTableReader(); + ReadOptions read_opt; + std::string ub1 = "bar_after"; + Slice ub_slice1(ub1); + read_opt.iterate_upper_bound = &ub_slice1; + std::unique_ptr<InternalIterator> iter; + iter.reset(new KeyConvertingIterator(reader->NewIterator( + read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized))); + iter->Seek("bar"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("bar", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); + std::string ub2 = "foo_after"; + Slice ub_slice2(ub2); + read_opt.iterate_upper_bound = &ub_slice2; + iter.reset(new KeyConvertingIterator(reader->NewIterator( + read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized))); + iter->Seek("foo"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("foo", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_FALSE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); +} + +class ChargeCompressionDictionaryBuildingBufferTest + : public BlockBasedTableTestBase {}; +TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) { + constexpr std::size_t kSizeDummyEntry = 256 * 1024; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + constexpr std::size_t kCacheCapacity = 8 * 1024 * 1024; + constexpr std::size_t kMaxDictBytes = 1024; + constexpr std::size_t kMaxDictBufferBytes = 1024; + + for (CacheEntryRoleOptions::Decision + charge_compression_dictionary_building_buffer : + {CacheEntryRoleOptions::Decision::kEnabled, + CacheEntryRoleOptions::Decision::kDisabled}) { + BlockBasedTableOptions table_options; + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; // 2^0 shard + lo.strict_capacity_limit = true; + std::shared_ptr<Cache> cache(NewLRUCache(lo)); + table_options.block_cache = cache; + table_options.flush_block_policy_factory = + std::make_shared<FlushBlockEveryKeyPolicyFactory>(); + table_options.cache_usage_options.options_overrides.insert( + {CacheEntryRole::kCompressionDictionaryBuildingBuffer, + {/*.charged = */ charge_compression_dictionary_building_buffer}}); + Options options; + options.compression = kSnappyCompression; + options.compression_opts.max_dict_bytes = kMaxDictBytes; + options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "test_file_name", FileOptions())); + + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + + std::unique_ptr<TableBuilder> builder( + options.table_factory->NewTableBuilder( + TableBuilderOptions( + ioptions, moptions, ikc, &int_tbl_prop_collector_factories, + kSnappyCompression, options.compression_opts, + kUnknownColumnFamily, "test_cf", -1 /* level */), + file_writer.get())); + + std::string key1 = "key1"; + std::string value1 = "val1"; + InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); + // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy + // therefore won't trigger any data block's buffering + builder->Add(ik1.Encode(), value1); + ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + + std::string key2 = "key2"; + std::string value2 = "val2"; + InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); + // Adding the second key will trigger a flush of the last data block (the + // one containing key1 and value1) by FlushBlockEveryKeyPolicy and hence + // trigger buffering of that data block. + builder->Add(ik2.Encode(), value2); + // Cache charging will increase for last buffered data block (the one + // containing key1 and value1) since the buffer limit is not exceeded after + // that buffering and the cache will not be full after this reservation + if (charge_compression_dictionary_building_buffer == + CacheEntryRoleOptions::Decision::kEnabled) { + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); + EXPECT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead); + } else { + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + } + + ASSERT_OK(builder->Finish()); + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + } +} + +TEST_F(ChargeCompressionDictionaryBuildingBufferTest, + BasicWithBufferLimitExceed) { + constexpr std::size_t kSizeDummyEntry = 256 * 1024; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + constexpr std::size_t kCacheCapacity = 8 * 1024 * 1024; + constexpr std::size_t kMaxDictBytes = 1024; + constexpr std::size_t kMaxDictBufferBytes = 2 * kSizeDummyEntry; + + // `CacheEntryRoleOptions::charged` is enabled by default for + // CacheEntryRole::kCompressionDictionaryBuildingBuffer + BlockBasedTableOptions table_options; + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; // 2^0 shard + lo.strict_capacity_limit = true; + std::shared_ptr<Cache> cache(NewLRUCache(lo)); + table_options.block_cache = cache; + table_options.flush_block_policy_factory = + std::make_shared<FlushBlockEveryKeyPolicyFactory>(); + + Options options; + options.compression = kSnappyCompression; + options.compression_opts.max_dict_bytes = kMaxDictBytes; + options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "test_file_name", FileOptions())); + + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + + std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kSnappyCompression, + options.compression_opts, kUnknownColumnFamily, + "test_cf", -1 /* level */), + file_writer.get())); + + std::string key1 = "key1"; + std::string value1(kSizeDummyEntry, '0'); + InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); + // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy + // therefore won't trigger any data block's buffering + builder->Add(ik1.Encode(), value1); + ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + + std::string key2 = "key2"; + std::string value2(kSizeDummyEntry, '0'); + InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); + // Adding the second key will trigger a flush of the last data block (the one + // containing key1 and value1) by FlushBlockEveryKeyPolicy and hence trigger + // buffering of the last data block. + builder->Add(ik2.Encode(), value2); + // Cache charging will increase for last buffered data block (the one + // containing key1 and value1) since the buffer limit is not exceeded after + // the buffering and the cache will not be full after this reservation + EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); + EXPECT_LT(cache->GetPinnedUsage(), + 2 * kSizeDummyEntry + kMetaDataChargeOverhead); + + std::string key3 = "key3"; + std::string value3 = "val3"; + InternalKey ik3(key3, 2 /* sequnce number */, kTypeValue); + // Adding the third key will trigger a flush of the last data block (the one + // containing key2 and value2) by FlushBlockEveryKeyPolicy and hence trigger + // buffering of the last data block. + builder->Add(ik3.Encode(), value3); + // Cache charging will decrease since the buffer limit is now exceeded + // after the last buffering and EnterUnbuffered() is triggered + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + + ASSERT_OK(builder->Finish()); + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); +} + +TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) { + constexpr std::size_t kSizeDummyEntry = 256 * 1024; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + // A small kCacheCapacity is chosen so that increase cache charging for + // buffering two data blocks, each containing key1/value1, key2/a big + // value2, will cause cache full + constexpr std::size_t kCacheCapacity = + 1 * kSizeDummyEntry + kSizeDummyEntry / 2; + constexpr std::size_t kMaxDictBytes = 1024; + // A big kMaxDictBufferBytes is chosen so that adding a big key value pair + // (key2, value2) won't exceed the buffer limit + constexpr std::size_t kMaxDictBufferBytes = 1024 * 1024 * 1024; + + // `CacheEntryRoleOptions::charged` is enabled by default for + // CacheEntryRole::kCompressionDictionaryBuildingBuffer + BlockBasedTableOptions table_options; + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; // 2^0 shard + lo.strict_capacity_limit = true; + std::shared_ptr<Cache> cache(NewLRUCache(lo)); + table_options.block_cache = cache; + table_options.flush_block_policy_factory = + std::make_shared<FlushBlockEveryKeyPolicyFactory>(); + + Options options; + options.compression = kSnappyCompression; + options.compression_opts.max_dict_bytes = kMaxDictBytes; + options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + test::StringSink* sink = new test::StringSink(); + std::unique_ptr<FSWritableFile> holder(sink); + std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( + std::move(holder), "test_file_name", FileOptions())); + + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + InternalKeyComparator ikc(options.comparator); + IntTblPropCollectorFactories int_tbl_prop_collector_factories; + + std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, ikc, + &int_tbl_prop_collector_factories, kSnappyCompression, + options.compression_opts, kUnknownColumnFamily, + "test_cf", -1 /* level */), + file_writer.get())); + + std::string key1 = "key1"; + std::string value1 = "val1"; + InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); + // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy + // therefore won't trigger any data block's buffering + builder->Add(ik1.Encode(), value1); + ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + + std::string key2 = "key2"; + std::string value2(kSizeDummyEntry, '0'); + InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); + // Adding the second key will trigger a flush of the last data block (the one + // containing key1 and value1) by FlushBlockEveryKeyPolicy and hence trigger + // buffering of the last data block. + builder->Add(ik2.Encode(), value2); + // Cache charging will increase for the last buffered data block (the one + // containing key1 and value1) since the buffer limit is not exceeded after + // the buffering and the cache will not be full after this reservation + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); + EXPECT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead); + + std::string key3 = "key3"; + std::string value3 = "value3"; + InternalKey ik3(key3, 2 /* sequnce number */, kTypeValue); + // Adding the third key will trigger a flush of the last data block (the one + // containing key2 and value2) by FlushBlockEveryKeyPolicy and hence trigger + // buffering of the last data block. + builder->Add(ik3.Encode(), value3); + // Cache charging will decrease since the cache is now full after + // increasing reservation for the last buffered block and EnterUnbuffered() is + // triggered + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); + + ASSERT_OK(builder->Finish()); + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); +} + +class CacheUsageOptionsOverridesTest : public DBTestBase { + public: + CacheUsageOptionsOverridesTest() + : DBTestBase("cache_usage_options_overrides_test", + /*env_do_fsync=*/false) {} +}; + +TEST_F(CacheUsageOptionsOverridesTest, SanitizeAndValidateOptions) { + // To test `cache_usage_options.options_overrides` is sanitized + // where `cache_usage_options.options` is used when there is no entry in + // `cache_usage_options.options_overrides` + Options options; + options.create_if_missing = true; + BlockBasedTableOptions table_options = BlockBasedTableOptions(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Destroy(options); + Status s = TryReopen(options); + EXPECT_TRUE(s.ok()); + const auto* sanitized_table_options = + options.table_factory->GetOptions<BlockBasedTableOptions>(); + const auto sanitized_options_overrides = + sanitized_table_options->cache_usage_options.options_overrides; + EXPECT_EQ(sanitized_options_overrides.size(), kNumCacheEntryRoles); + for (auto options_overrides_iter = sanitized_options_overrides.cbegin(); + options_overrides_iter != sanitized_options_overrides.cend(); + ++options_overrides_iter) { + CacheEntryRoleOptions role_options = options_overrides_iter->second; + CacheEntryRoleOptions default_options = + sanitized_table_options->cache_usage_options.options; + EXPECT_TRUE(role_options == default_options); + } + Destroy(options); + + // To test option validation on unsupported CacheEntryRole + table_options = BlockBasedTableOptions(); + table_options.cache_usage_options.options_overrides.insert( + {CacheEntryRole::kDataBlock, + {/*.charged = */ CacheEntryRoleOptions::Decision::kDisabled}}); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Destroy(options); + s = TryReopen(options); + EXPECT_TRUE(s.IsNotSupported()); + EXPECT_TRUE( + s.ToString().find("Enable/Disable CacheEntryRoleOptions::charged") != + std::string::npos); + EXPECT_TRUE( + s.ToString().find(kCacheEntryRoleToCamelString[static_cast<uint32_t>( + CacheEntryRole::kDataBlock)]) != std::string::npos); + Destroy(options); + + // To test option validation on existence of block cache + table_options = BlockBasedTableOptions(); + table_options.no_block_cache = true; + table_options.cache_usage_options.options_overrides.insert( + {CacheEntryRole::kFilterConstruction, + {/*.charged = */ CacheEntryRoleOptions::Decision::kEnabled}}); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Destroy(options); + s = TryReopen(options); + EXPECT_TRUE(s.IsInvalidArgument()); + EXPECT_TRUE(s.ToString().find("Enable CacheEntryRoleOptions::charged") != + std::string::npos); + EXPECT_TRUE( + s.ToString().find(kCacheEntryRoleToCamelString[static_cast<std::size_t>( + CacheEntryRole::kFilterConstruction)]) != std::string::npos); + EXPECT_TRUE(s.ToString().find("block cache is disabled") != + std::string::npos); + Destroy(options); +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} |