From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/rocksdb/table/block_based/index_builder.cc | 282 +++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 src/rocksdb/table/block_based/index_builder.cc (limited to 'src/rocksdb/table/block_based/index_builder.cc') diff --git a/src/rocksdb/table/block_based/index_builder.cc b/src/rocksdb/table/block_based/index_builder.cc new file mode 100644 index 000000000..024730178 --- /dev/null +++ b/src/rocksdb/table/block_based/index_builder.cc @@ -0,0 +1,282 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "table/block_based/index_builder.h" + +#include + +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/comparator.h" +#include "rocksdb/flush_block_policy.h" +#include "table/block_based/partitioned_filter_block.h" +#include "table/format.h" + +namespace ROCKSDB_NAMESPACE { + +// Create a index builder based on its type. +IndexBuilder* IndexBuilder::CreateIndexBuilder( + BlockBasedTableOptions::IndexType index_type, + const InternalKeyComparator* comparator, + const InternalKeySliceTransform* int_key_slice_transform, + const bool use_value_delta_encoding, + const BlockBasedTableOptions& table_opt) { + IndexBuilder* result = nullptr; + switch (index_type) { + case BlockBasedTableOptions::kBinarySearch: { + result = new ShortenedIndexBuilder( + comparator, table_opt.index_block_restart_interval, + table_opt.format_version, use_value_delta_encoding, + table_opt.index_shortening, /* include_first_key */ false); + break; + } + case BlockBasedTableOptions::kHashSearch: { + // Currently kHashSearch is incompatible with index_block_restart_interval + // > 1 + assert(table_opt.index_block_restart_interval == 1); + result = new HashIndexBuilder( + comparator, int_key_slice_transform, + table_opt.index_block_restart_interval, table_opt.format_version, + use_value_delta_encoding, table_opt.index_shortening); + break; + } + case BlockBasedTableOptions::kTwoLevelIndexSearch: { + result = PartitionedIndexBuilder::CreateIndexBuilder( + comparator, use_value_delta_encoding, table_opt); + break; + } + case BlockBasedTableOptions::kBinarySearchWithFirstKey: { + result = new ShortenedIndexBuilder( + comparator, table_opt.index_block_restart_interval, + table_opt.format_version, use_value_delta_encoding, + table_opt.index_shortening, /* include_first_key */ true); + break; + } + default: { + assert(!"Do not recognize the index type "); + break; + } + } + return result; +} + +void ShortenedIndexBuilder::FindShortestInternalKeySeparator( + const Comparator& comparator, std::string* start, const Slice& limit) { + // Attempt to shorten the user portion of the key + Slice user_start = ExtractUserKey(*start); + Slice user_limit = ExtractUserKey(limit); + std::string tmp(user_start.data(), user_start.size()); + comparator.FindShortestSeparator(&tmp, user_limit); + if (tmp.size() <= user_start.size() && + comparator.Compare(user_start, tmp) < 0) { + // User key has become shorter physically, but larger logically. + // Tack on the earliest possible number to the shortened user key. + PutFixed64(&tmp, + PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); + assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0); + assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0); + start->swap(tmp); + } +} + +void ShortenedIndexBuilder::FindShortInternalKeySuccessor( + const Comparator& comparator, std::string* key) { + Slice user_key = ExtractUserKey(*key); + std::string tmp(user_key.data(), user_key.size()); + comparator.FindShortSuccessor(&tmp); + if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) { + // User key has become shorter physically, but larger logically. + // Tack on the earliest possible number to the shortened user key. + PutFixed64(&tmp, + PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); + assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0); + key->swap(tmp); + } +} + +PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder( + const InternalKeyComparator* comparator, + const bool use_value_delta_encoding, + const BlockBasedTableOptions& table_opt) { + return new PartitionedIndexBuilder(comparator, table_opt, + use_value_delta_encoding); +} + +PartitionedIndexBuilder::PartitionedIndexBuilder( + const InternalKeyComparator* comparator, + const BlockBasedTableOptions& table_opt, + const bool use_value_delta_encoding) + : IndexBuilder(comparator), + index_block_builder_(table_opt.index_block_restart_interval, + true /*use_delta_encoding*/, + use_value_delta_encoding), + index_block_builder_without_seq_(table_opt.index_block_restart_interval, + true /*use_delta_encoding*/, + use_value_delta_encoding), + sub_index_builder_(nullptr), + table_opt_(table_opt), + // We start by false. After each partition we revise the value based on + // what the sub_index_builder has decided. If the feature is disabled + // entirely, this will be set to true after switching the first + // sub_index_builder. Otherwise, it could be set to true even one of the + // sub_index_builders could not safely exclude seq from the keys, then it + // wil be enforced on all sub_index_builders on ::Finish. + seperator_is_key_plus_seq_(false), + use_value_delta_encoding_(use_value_delta_encoding) {} + +PartitionedIndexBuilder::~PartitionedIndexBuilder() { + delete sub_index_builder_; +} + +void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { + assert(sub_index_builder_ == nullptr); + sub_index_builder_ = new ShortenedIndexBuilder( + comparator_, table_opt_.index_block_restart_interval, + table_opt_.format_version, use_value_delta_encoding_, + table_opt_.index_shortening, /* include_first_key */ false); + + // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if + // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by + // default on Creation) so that flush policy can point to + // sub_index_builder_->index_block_builder_ + if (seperator_is_key_plus_seq_) { + sub_index_builder_->seperator_is_key_plus_seq_ = true; + } + + flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( + table_opt_.metadata_block_size, table_opt_.block_size_deviation, + // Note: this is sub-optimal since sub_index_builder_ could later reset + // seperator_is_key_plus_seq_ but the probability of that is low. + sub_index_builder_->seperator_is_key_plus_seq_ + ? sub_index_builder_->index_block_builder_ + : sub_index_builder_->index_block_builder_without_seq_)); + partition_cut_requested_ = false; +} + +void PartitionedIndexBuilder::RequestPartitionCut() { + partition_cut_requested_ = true; +} + +void PartitionedIndexBuilder::AddIndexEntry( + std::string* last_key_in_current_block, + const Slice* first_key_in_next_block, const BlockHandle& block_handle) { + // Note: to avoid two consecuitive flush in the same method call, we do not + // check flush policy when adding the last key + if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys + if (sub_index_builder_ == nullptr) { + MakeNewSubIndexBuilder(); + } + sub_index_builder_->AddIndexEntry(last_key_in_current_block, + first_key_in_next_block, block_handle); + if (!seperator_is_key_plus_seq_ && + sub_index_builder_->seperator_is_key_plus_seq_) { + // then we need to apply it to all sub-index builders and reset + // flush_policy to point to Block Builder of sub_index_builder_ that store + // internal keys. + seperator_is_key_plus_seq_ = true; + flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( + table_opt_.metadata_block_size, table_opt_.block_size_deviation, + sub_index_builder_->index_block_builder_)); + } + sub_index_last_key_ = std::string(*last_key_in_current_block); + entries_.push_back( + {sub_index_last_key_, + std::unique_ptr(sub_index_builder_)}); + sub_index_builder_ = nullptr; + cut_filter_block = true; + } else { + // apply flush policy only to non-empty sub_index_builder_ + if (sub_index_builder_ != nullptr) { + std::string handle_encoding; + block_handle.EncodeTo(&handle_encoding); + bool do_flush = + partition_cut_requested_ || + flush_policy_->Update(*last_key_in_current_block, handle_encoding); + if (do_flush) { + entries_.push_back( + {sub_index_last_key_, + std::unique_ptr(sub_index_builder_)}); + cut_filter_block = true; + sub_index_builder_ = nullptr; + } + } + if (sub_index_builder_ == nullptr) { + MakeNewSubIndexBuilder(); + } + sub_index_builder_->AddIndexEntry(last_key_in_current_block, + first_key_in_next_block, block_handle); + sub_index_last_key_ = std::string(*last_key_in_current_block); + if (!seperator_is_key_plus_seq_ && + sub_index_builder_->seperator_is_key_plus_seq_) { + // then we need to apply it to all sub-index builders and reset + // flush_policy to point to Block Builder of sub_index_builder_ that store + // internal keys. + seperator_is_key_plus_seq_ = true; + flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( + table_opt_.metadata_block_size, table_opt_.block_size_deviation, + sub_index_builder_->index_block_builder_)); + } + } +} + +Status PartitionedIndexBuilder::Finish( + IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) { + if (partition_cnt_ == 0) { + partition_cnt_ = entries_.size(); + } + // It must be set to null after last key is added + assert(sub_index_builder_ == nullptr); + if (finishing_indexes == true) { + Entry& last_entry = entries_.front(); + std::string handle_encoding; + last_partition_block_handle.EncodeTo(&handle_encoding); + std::string handle_delta_encoding; + PutVarsignedint64( + &handle_delta_encoding, + last_partition_block_handle.size() - last_encoded_handle_.size()); + last_encoded_handle_ = last_partition_block_handle; + const Slice handle_delta_encoding_slice(handle_delta_encoding); + index_block_builder_.Add(last_entry.key, handle_encoding, + &handle_delta_encoding_slice); + if (!seperator_is_key_plus_seq_) { + index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key), + handle_encoding, + &handle_delta_encoding_slice); + } + entries_.pop_front(); + } + // If there is no sub_index left, then return the 2nd level index. + if (UNLIKELY(entries_.empty())) { + if (seperator_is_key_plus_seq_) { + index_blocks->index_block_contents = index_block_builder_.Finish(); + } else { + index_blocks->index_block_contents = + index_block_builder_without_seq_.Finish(); + } + top_level_index_size_ = index_blocks->index_block_contents.size(); + index_size_ += top_level_index_size_; + return Status::OK(); + } else { + // Finish the next partition index in line and Incomplete() to indicate we + // expect more calls to Finish + Entry& entry = entries_.front(); + // Apply the policy to all sub-indexes + entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_; + auto s = entry.value->Finish(index_blocks); + index_size_ += index_blocks->index_block_contents.size(); + finishing_indexes = true; + return s.ok() ? Status::Incomplete() : s; + } +} + +size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; } +} // namespace ROCKSDB_NAMESPACE -- cgit v1.2.3