diff options
Diffstat (limited to 'src/rocksdb/table/block_based/block_prefetcher.cc')
-rw-r--r-- | src/rocksdb/table/block_based/block_prefetcher.cc | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/src/rocksdb/table/block_based/block_prefetcher.cc b/src/rocksdb/table/block_based/block_prefetcher.cc new file mode 100644 index 000000000..83ec2cb06 --- /dev/null +++ b/src/rocksdb/table/block_based/block_prefetcher.cc @@ -0,0 +1,120 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "table/block_based/block_prefetcher.h" + +#include "rocksdb/file_system.h" +#include "table/block_based/block_based_table_reader.h" + +namespace ROCKSDB_NAMESPACE { +void BlockPrefetcher::PrefetchIfNeeded( + const BlockBasedTable::Rep* rep, const BlockHandle& handle, + const size_t readahead_size, bool is_for_compaction, + const bool no_sequential_checking, + const Env::IOPriority rate_limiter_priority) { + // num_file_reads is used by FilePrefetchBuffer only when + // implicit_auto_readahead is set. + if (is_for_compaction) { + rep->CreateFilePrefetchBufferIfNotExists( + compaction_readahead_size_, compaction_readahead_size_, + &prefetch_buffer_, /*implicit_auto_readahead=*/false, + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0); + return; + } + + // Explicit user requested readahead. + if (readahead_size > 0) { + rep->CreateFilePrefetchBufferIfNotExists( + readahead_size, readahead_size, &prefetch_buffer_, + /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, + /*num_file_reads_for_auto_readahead=*/0); + return; + } + + // Implicit readahead. + + // If max_auto_readahead_size is set to be 0 by user, no data will be + // prefetched. + size_t max_auto_readahead_size = rep->table_options.max_auto_readahead_size; + if (max_auto_readahead_size == 0 || initial_auto_readahead_size_ == 0) { + return; + } + + if (initial_auto_readahead_size_ > max_auto_readahead_size) { + initial_auto_readahead_size_ = max_auto_readahead_size; + } + + // In case of no_sequential_checking, it will skip the num_file_reads_ and + // will always creates the FilePrefetchBuffer. + if (no_sequential_checking) { + rep->CreateFilePrefetchBufferIfNotExists( + initial_auto_readahead_size_, max_auto_readahead_size, + &prefetch_buffer_, /*implicit_auto_readahead=*/true, + /*num_file_reads=*/0, + rep->table_options.num_file_reads_for_auto_readahead); + return; + } + + size_t len = BlockBasedTable::BlockSizeWithTrailer(handle); + size_t offset = handle.offset(); + + // If FS supports prefetching (readahead_limit_ will be non zero in that case) + // and current block exists in prefetch buffer then return. + if (offset + len <= readahead_limit_) { + UpdateReadPattern(offset, len); + return; + } + + if (!IsBlockSequential(offset)) { + UpdateReadPattern(offset, len); + ResetValues(rep->table_options.initial_auto_readahead_size); + return; + } + UpdateReadPattern(offset, len); + + // Implicit auto readahead, which will be enabled if the number of reads + // reached `table_options.num_file_reads_for_auto_readahead` (default: 2) and + // scans are sequential. + num_file_reads_++; + if (num_file_reads_ <= rep->table_options.num_file_reads_for_auto_readahead) { + return; + } + + if (rep->file->use_direct_io()) { + rep->CreateFilePrefetchBufferIfNotExists( + initial_auto_readahead_size_, max_auto_readahead_size, + &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, + rep->table_options.num_file_reads_for_auto_readahead); + return; + } + + if (readahead_size_ > max_auto_readahead_size) { + readahead_size_ = max_auto_readahead_size; + } + + // If prefetch is not supported, fall back to use internal prefetch buffer. + // Discarding other return status of Prefetch calls intentionally, as + // we can fallback to reading from disk if Prefetch fails. + Status s = rep->file->Prefetch( + handle.offset(), + BlockBasedTable::BlockSizeWithTrailer(handle) + readahead_size_, + rate_limiter_priority); + if (s.IsNotSupported()) { + rep->CreateFilePrefetchBufferIfNotExists( + initial_auto_readahead_size_, max_auto_readahead_size, + &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, + rep->table_options.num_file_reads_for_auto_readahead); + return; + } + + readahead_limit_ = offset + len + readahead_size_; + // Keep exponentially increasing readahead size until + // max_auto_readahead_size. + readahead_size_ = std::min(max_auto_readahead_size, readahead_size_ * 2); +} +} // namespace ROCKSDB_NAMESPACE |