From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- .../block_based/partitioned_index_iterator.cc | 163 +++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 src/rocksdb/table/block_based/partitioned_index_iterator.cc (limited to 'src/rocksdb/table/block_based/partitioned_index_iterator.cc') diff --git a/src/rocksdb/table/block_based/partitioned_index_iterator.cc b/src/rocksdb/table/block_based/partitioned_index_iterator.cc new file mode 100644 index 000000000..b9bc2155a --- /dev/null +++ b/src/rocksdb/table/block_based/partitioned_index_iterator.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "table/block_based/partitioned_index_iterator.h" + +namespace ROCKSDB_NAMESPACE { +void PartitionedIndexIterator::Seek(const Slice& target) { SeekImpl(&target); } + +void PartitionedIndexIterator::SeekToFirst() { SeekImpl(nullptr); } + +void PartitionedIndexIterator::SeekImpl(const Slice* target) { + SavePrevIndexValue(); + + if (target) { + index_iter_->Seek(*target); + } else { + index_iter_->SeekToFirst(); + } + + if (!index_iter_->Valid()) { + ResetPartitionedIndexIter(); + return; + } + + InitPartitionedIndexBlock(); + + if (target) { + block_iter_.Seek(*target); + } else { + block_iter_.SeekToFirst(); + } + FindKeyForward(); + + // We could check upper bound here, but that would be too complicated + // and checking index upper bound is less useful than for data blocks. + + if (target) { + assert(!Valid() || (table_->get_rep()->index_key_includes_seq + ? (icomp_.Compare(*target, key()) <= 0) + : (user_comparator_.Compare(ExtractUserKey(*target), + key()) <= 0))); + } +} + +void PartitionedIndexIterator::SeekToLast() { + SavePrevIndexValue(); + index_iter_->SeekToLast(); + if (!index_iter_->Valid()) { + ResetPartitionedIndexIter(); + return; + } + InitPartitionedIndexBlock(); + block_iter_.SeekToLast(); + FindKeyBackward(); +} + +void PartitionedIndexIterator::Next() { + assert(block_iter_points_to_real_block_); + block_iter_.Next(); + FindKeyForward(); +} + +void PartitionedIndexIterator::Prev() { + assert(block_iter_points_to_real_block_); + block_iter_.Prev(); + + FindKeyBackward(); +} + +void PartitionedIndexIterator::InitPartitionedIndexBlock() { + BlockHandle partitioned_index_handle = index_iter_->value().handle; + if (!block_iter_points_to_real_block_ || + partitioned_index_handle.offset() != prev_block_offset_ || + // if previous attempt of reading the block missed cache, try again + block_iter_.status().IsIncomplete()) { + if (block_iter_points_to_real_block_) { + ResetPartitionedIndexIter(); + } + auto* rep = table_->get_rep(); + bool is_for_compaction = + lookup_context_.caller == TableReaderCaller::kCompaction; + // Prefetch additional data for range scans (iterators). + // Implicit auto readahead: + // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. + // Explicit user requested readahead: + // Enabled from the very first IO when ReadOptions.readahead_size is set. + block_prefetcher_.PrefetchIfNeeded( + rep, partitioned_index_handle, read_options_.readahead_size, + is_for_compaction, /*no_sequential_checking=*/false, + read_options_.rate_limiter_priority); + Status s; + table_->NewDataBlockIterator( + read_options_, partitioned_index_handle, &block_iter_, + BlockType::kIndex, + /*get_context=*/nullptr, &lookup_context_, + block_prefetcher_.prefetch_buffer(), + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s); + block_iter_points_to_real_block_ = true; + // We could check upper bound here but it is complicated to reason about + // upper bound in index iterator. On the other than, in large scans, index + // iterators are moved much less frequently compared to data blocks. So + // the upper bound check is skipped for simplicity. + } +} + +void PartitionedIndexIterator::FindKeyForward() { + // This method's code is kept short to make it likely to be inlined. + + assert(block_iter_points_to_real_block_); + + if (!block_iter_.Valid()) { + // This is the only call site of FindBlockForward(), but it's extracted into + // a separate method to keep FindKeyForward() short and likely to be + // inlined. When transitioning to a different block, we call + // FindBlockForward(), which is much longer and is probably not inlined. + FindBlockForward(); + } else { + // This is the fast path that avoids a function call. + } +} + +void PartitionedIndexIterator::FindBlockForward() { + // TODO the while loop inherits from two-level-iterator. We don't know + // whether a block can be empty so it can be replaced by an "if". + do { + if (!block_iter_.status().ok()) { + return; + } + ResetPartitionedIndexIter(); + index_iter_->Next(); + + if (!index_iter_->Valid()) { + return; + } + + InitPartitionedIndexBlock(); + block_iter_.SeekToFirst(); + } while (!block_iter_.Valid()); +} + +void PartitionedIndexIterator::FindKeyBackward() { + while (!block_iter_.Valid()) { + if (!block_iter_.status().ok()) { + return; + } + + ResetPartitionedIndexIter(); + index_iter_->Prev(); + + if (index_iter_->Valid()) { + InitPartitionedIndexBlock(); + block_iter_.SeekToLast(); + } else { + return; + } + } +} +} // namespace ROCKSDB_NAMESPACE -- cgit v1.2.3